From d9147f18b33a3559c24c6bcfc778ece8b4fa0c85 Mon Sep 17 00:00:00 2001 From: M00nF1sh Date: Tue, 17 Sep 2019 15:24:39 -0700 Subject: [PATCH 001/621] sync nodegroup template to latest available (#335) * sync eks node group template to be latest available 1. add support to use ssm parameter for amiID 2. add support for all instance types supported by cni 3. formatted with rain(https://github.com/aws-cloudformation/rain) * add new CFN version 2019-09-17 --- README.md | 1 + amazon-eks-nodegroup.yaml | 331 ++++++++++++++++++++------------------ 2 files changed, 178 insertions(+), 154 deletions(-) diff --git a/README.md b/README.md index a29a40fa9..a6270111c 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,7 @@ versions by running `aws s3 ls s3://amazon-eks/cloudformation/`. | CloudFormation Version | EKS AMI versions | [amazon-vpc-cni-k8s](https://github.com/aws/amazon-vpc-cni-k8s/releases) | | ---------------------- | ------------------------------------------ | -------------------- | +| 2019-09-17 | amazon-eks-node-(1.14,1.13,1.12,1.11)-v20190906 | v1.5.3 | 2019-02-11 | amazon-eks-node-(1.12,1.11,1.10)-v20190327 | v1.3.2 (for p3dn.24xlarge instances) | | 2019-02-11 | amazon-eks-node-(1.11,1.10)-v20190220 | v1.3.2 (for p3dn.24xlarge instances) | | 2019-02-11 | amazon-eks-node-(1.11,1.10)-v20190211 | v1.3.2 (for p3dn.24xlarge instances) | diff --git a/amazon-eks-nodegroup.yaml b/amazon-eks-nodegroup.yaml index 140869504..467f0c28c 100644 --- a/amazon-eks-nodegroup.yaml +++ b/amazon-eks-nodegroup.yaml @@ -1,22 +1,84 @@ ---- -AWSTemplateFormatVersion: 2010-09-09 +AWSTemplateFormatVersion: "2010-09-09" + Description: Amazon EKS - Node Group +Metadata: + "AWS::CloudFormation::Interface": + ParameterGroups: + - Label: + default: EKS Cluster + Parameters: + - ClusterName + - ClusterControlPlaneSecurityGroup + - Label: + default: Worker Node Configuration + Parameters: + - NodeGroupName + - NodeAutoScalingGroupMinSize + - NodeAutoScalingGroupDesiredCapacity + - NodeAutoScalingGroupMaxSize + - NodeInstanceType + - NodeImageIdSSMParam + - NodeImageId + - NodeVolumeSize + - KeyName + - BootstrapArguments + - Label: + default: Worker Network Configuration + Parameters: + - VpcId + - Subnets + Parameters: + BootstrapArguments: + Type: String + Default: "" + Description: "Arguments to pass to the bootstrap script. See files/bootstrap.sh in https://github.com/awslabs/amazon-eks-ami" + + ClusterControlPlaneSecurityGroup: + Type: "AWS::EC2::SecurityGroup::Id" + Description: The security group of the cluster control plane. + + ClusterName: + Type: String + Description: The cluster name provided when the cluster was created. If it is incorrect, nodes will not be able to join the cluster. KeyName: + Type: "AWS::EC2::KeyPair::KeyName" Description: The EC2 Key Pair to allow SSH access to the instances - Type: AWS::EC2::KeyPair::KeyName + + NodeAutoScalingGroupDesiredCapacity: + Type: Number + Default: 3 + Description: Desired capacity of Node Group ASG. + + NodeAutoScalingGroupMaxSize: + Type: Number + Default: 4 + Description: Maximum size of Node Group ASG. Set to at least 1 greater than NodeAutoScalingGroupDesiredCapacity. + + NodeAutoScalingGroupMinSize: + Type: Number + Default: 1 + Description: Minimum size of Node Group ASG. + + NodeGroupName: + Type: String + Description: Unique identifier for the Node Group. NodeImageId: - Description: AMI id for the node instances. - Type: AWS::EC2::Image::Id + Type: String + Default: "" + Description: (Optional) Specify your own custom image ID. This value overrides any AWS Systems Manager Parameter Store value specified above. + + NodeImageIdSSMParam: + Type: "AWS::SSM::Parameter::Value" + Default: /aws/service/eks/optimized-ami/1.14/amazon-linux-2/recommended/image_id + Description: AWS Systems Manager Parameter Store parameter of the AMI ID for the worker node instances. NodeInstanceType: - Description: EC2 instance type for the node instances Type: String Default: t3.medium - ConstraintDescription: Must be a valid EC2 instance type AllowedValues: - a1.medium - a1.large @@ -43,6 +105,7 @@ Parameters: - c5.12xlarge - c5.18xlarge - c5.24xlarge + - c5.metal - c5d.large - c5d.xlarge - c5d.2xlarge @@ -118,6 +181,7 @@ Parameters: - m5.12xlarge - m5.16xlarge - m5.24xlarge + - m5.metal - m5a.large - m5a.xlarge - m5a.2xlarge @@ -140,6 +204,7 @@ Parameters: - m5d.12xlarge - m5d.16xlarge - m5d.24xlarge + - m5d.metal - p2.xlarge - p2.8xlarge - p2.16xlarge @@ -166,6 +231,7 @@ Parameters: - r5.12xlarge - r5.16xlarge - r5.24xlarge + - r5.metal - r5a.large - r5a.xlarge - r5a.2xlarge @@ -188,6 +254,7 @@ Parameters: - r5d.12xlarge - r5d.16xlarge - r5d.24xlarge + - r5d.metal - t1.micro - t2.nano - t2.micro @@ -210,6 +277,9 @@ Parameters: - t3a.large - t3a.xlarge - t3a.2xlarge + - u-6tb1.metal + - u-9tb1.metal + - u-12tb1.metal - x1.16xlarge - x1.32xlarge - x1e.xlarge @@ -224,229 +294,181 @@ Parameters: - z1d.3xlarge - z1d.6xlarge - z1d.12xlarge - - NodeAutoScalingGroupMinSize: - Description: Minimum size of Node Group ASG. - Type: Number - Default: 1 - - NodeAutoScalingGroupMaxSize: - Description: Maximum size of Node Group ASG. Set to at least 1 greater than NodeAutoScalingGroupDesiredCapacity. - Type: Number - Default: 4 - - NodeAutoScalingGroupDesiredCapacity: - Description: Desired capacity of Node Group ASG. - Type: Number - Default: 3 + - z1d.metal + ConstraintDescription: Must be a valid EC2 instance type + Description: EC2 instance type for the node instances NodeVolumeSize: - Description: Node volume size Type: Number Default: 20 - - ClusterName: - Description: The cluster name provided when the cluster was created. If it is incorrect, nodes will not be able to join the cluster. - Type: String - - BootstrapArguments: - Description: Arguments to pass to the bootstrap script. See files/bootstrap.sh in https://github.com/awslabs/amazon-eks-ami - Type: String - Default: "" - - NodeGroupName: - Description: Unique identifier for the Node Group. - Type: String - - ClusterControlPlaneSecurityGroup: - Description: The security group of the cluster control plane. - Type: AWS::EC2::SecurityGroup::Id - - VpcId: - Description: The VPC of the worker instances - Type: AWS::EC2::VPC::Id + Description: Node volume size Subnets: + Type: "List" Description: The subnets where workers can be created. - Type: List -Metadata: + VpcId: + Type: "AWS::EC2::VPC::Id" + Description: The VPC of the worker instances - AWS::CloudFormation::Interface: - ParameterGroups: - - Label: - default: EKS Cluster - Parameters: - - ClusterName - - ClusterControlPlaneSecurityGroup - - Label: - default: Worker Node Configuration - Parameters: - - NodeGroupName - - NodeAutoScalingGroupMinSize - - NodeAutoScalingGroupDesiredCapacity - - NodeAutoScalingGroupMaxSize - - NodeInstanceType - - NodeImageId - - NodeVolumeSize - - KeyName - - BootstrapArguments - - Label: - default: Worker Network Configuration - Parameters: - - VpcId - - Subnets +Conditions: + HasNodeImageId: !Not + - "Fn::Equals": + - Ref: NodeImageId + - "" Resources: - - NodeInstanceProfile: - Type: AWS::IAM::InstanceProfile - Properties: - Path: "/" - Roles: - - !Ref NodeInstanceRole - NodeInstanceRole: - Type: AWS::IAM::Role + Type: "AWS::IAM::Role" Properties: AssumeRolePolicyDocument: - Version: 2012-10-17 + Version: "2012-10-17" Statement: - Effect: Allow Principal: - Service: ec2.amazonaws.com - Action: sts:AssumeRole - Path: "/" + Service: + - ec2.amazonaws.com + Action: + - "sts:AssumeRole" ManagedPolicyArns: - - arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy - - arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy - - arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly + - "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy" + - "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy" + - "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly" + Path: / + + NodeInstanceProfile: + Type: "AWS::IAM::InstanceProfile" + Properties: + Path: / + Roles: + - Ref: NodeInstanceRole NodeSecurityGroup: - Type: AWS::EC2::SecurityGroup + Type: "AWS::EC2::SecurityGroup" Properties: GroupDescription: Security group for all nodes in the cluster - VpcId: !Ref VpcId Tags: - Key: !Sub kubernetes.io/cluster/${ClusterName} Value: owned + VpcId: !Ref VpcId NodeSecurityGroupIngress: - Type: AWS::EC2::SecurityGroupIngress + Type: "AWS::EC2::SecurityGroupIngress" DependsOn: NodeSecurityGroup Properties: Description: Allow node to communicate with each other + FromPort: 0 GroupId: !Ref NodeSecurityGroup + IpProtocol: "-1" SourceSecurityGroupId: !Ref NodeSecurityGroup - IpProtocol: -1 - FromPort: 0 ToPort: 65535 - NodeSecurityGroupFromControlPlaneIngress: - Type: AWS::EC2::SecurityGroupIngress + ClusterControlPlaneSecurityGroupIngress: + Type: "AWS::EC2::SecurityGroupIngress" DependsOn: NodeSecurityGroup Properties: - Description: Allow worker Kubelets and pods to receive communication from the cluster control plane - GroupId: !Ref NodeSecurityGroup - SourceSecurityGroupId: !Ref ClusterControlPlaneSecurityGroup + Description: Allow pods to communicate with the cluster API Server + FromPort: 443 + GroupId: !Ref ClusterControlPlaneSecurityGroup IpProtocol: tcp - FromPort: 1025 - ToPort: 65535 + SourceSecurityGroupId: !Ref NodeSecurityGroup + ToPort: 443 ControlPlaneEgressToNodeSecurityGroup: - Type: AWS::EC2::SecurityGroupEgress + Type: "AWS::EC2::SecurityGroupEgress" DependsOn: NodeSecurityGroup Properties: Description: Allow the cluster control plane to communicate with worker Kubelet and pods - GroupId: !Ref ClusterControlPlaneSecurityGroup DestinationSecurityGroupId: !Ref NodeSecurityGroup - IpProtocol: tcp FromPort: 1025 + GroupId: !Ref ClusterControlPlaneSecurityGroup + IpProtocol: tcp ToPort: 65535 - NodeSecurityGroupFromControlPlaneOn443Ingress: - Type: AWS::EC2::SecurityGroupIngress + ControlPlaneEgressToNodeSecurityGroupOn443: + Type: "AWS::EC2::SecurityGroupEgress" DependsOn: NodeSecurityGroup Properties: - Description: Allow pods running extension API servers on port 443 to receive communication from cluster control plane - GroupId: !Ref NodeSecurityGroup - SourceSecurityGroupId: !Ref ClusterControlPlaneSecurityGroup - IpProtocol: tcp + Description: Allow the cluster control plane to communicate with pods running extension API servers on port 443 + DestinationSecurityGroupId: !Ref NodeSecurityGroup FromPort: 443 + GroupId: !Ref ClusterControlPlaneSecurityGroup + IpProtocol: tcp ToPort: 443 - ControlPlaneEgressToNodeSecurityGroupOn443: - Type: AWS::EC2::SecurityGroupEgress + NodeSecurityGroupFromControlPlaneIngress: + Type: "AWS::EC2::SecurityGroupIngress" DependsOn: NodeSecurityGroup Properties: - Description: Allow the cluster control plane to communicate with pods running extension API servers on port 443 - GroupId: !Ref ClusterControlPlaneSecurityGroup - DestinationSecurityGroupId: !Ref NodeSecurityGroup + Description: Allow worker Kubelets and pods to receive communication from the cluster control plane + FromPort: 1025 + GroupId: !Ref NodeSecurityGroup IpProtocol: tcp - FromPort: 443 - ToPort: 443 + SourceSecurityGroupId: !Ref ClusterControlPlaneSecurityGroup + ToPort: 65535 - ClusterControlPlaneSecurityGroupIngress: - Type: AWS::EC2::SecurityGroupIngress + NodeSecurityGroupFromControlPlaneOn443Ingress: + Type: "AWS::EC2::SecurityGroupIngress" DependsOn: NodeSecurityGroup Properties: - Description: Allow pods to communicate with the cluster API Server - GroupId: !Ref ClusterControlPlaneSecurityGroup - SourceSecurityGroupId: !Ref NodeSecurityGroup + Description: Allow pods running extension API servers on port 443 to receive communication from cluster control plane + FromPort: 443 + GroupId: !Ref NodeSecurityGroup IpProtocol: tcp + SourceSecurityGroupId: !Ref ClusterControlPlaneSecurityGroup ToPort: 443 - FromPort: 443 + + NodeLaunchConfig: + Type: "AWS::AutoScaling::LaunchConfiguration" + Properties: + AssociatePublicIpAddress: "true" + BlockDeviceMappings: + - DeviceName: /dev/xvda + Ebs: + DeleteOnTermination: true + VolumeSize: !Ref NodeVolumeSize + VolumeType: gp2 + IamInstanceProfile: !Ref NodeInstanceProfile + ImageId: !If + - HasNodeImageId + - Ref: NodeImageId + - Ref: NodeImageIdSSMParam + InstanceType: !Ref NodeInstanceType + KeyName: !Ref KeyName + SecurityGroups: + - Ref: NodeSecurityGroup + UserData: !Base64 + "Fn::Sub": | + #!/bin/bash + set -o xtrace + /etc/eks/bootstrap.sh ${ClusterName} ${BootstrapArguments} + /opt/aws/bin/cfn-signal --exit-code $? \ + --stack ${AWS::StackName} \ + --resource NodeGroup \ + --region ${AWS::Region} NodeGroup: - Type: AWS::AutoScaling::AutoScalingGroup + Type: "AWS::AutoScaling::AutoScalingGroup" Properties: DesiredCapacity: !Ref NodeAutoScalingGroupDesiredCapacity LaunchConfigurationName: !Ref NodeLaunchConfig - MinSize: !Ref NodeAutoScalingGroupMinSize MaxSize: !Ref NodeAutoScalingGroupMaxSize - VPCZoneIdentifier: !Ref Subnets + MinSize: !Ref NodeAutoScalingGroupMinSize Tags: - Key: Name + PropagateAtLaunch: "true" Value: !Sub ${ClusterName}-${NodeGroupName}-Node - PropagateAtLaunch: true - Key: !Sub kubernetes.io/cluster/${ClusterName} + PropagateAtLaunch: "true" Value: owned - PropagateAtLaunch: true + VPCZoneIdentifier: !Ref Subnets UpdatePolicy: AutoScalingRollingUpdate: - MaxBatchSize: 1 + MaxBatchSize: "1" MinInstancesInService: !Ref NodeAutoScalingGroupDesiredCapacity PauseTime: PT5M - NodeLaunchConfig: - Type: AWS::AutoScaling::LaunchConfiguration - Properties: - AssociatePublicIpAddress: true - IamInstanceProfile: !Ref NodeInstanceProfile - ImageId: !Ref NodeImageId - InstanceType: !Ref NodeInstanceType - KeyName: !Ref KeyName - SecurityGroups: - - !Ref NodeSecurityGroup - BlockDeviceMappings: - - DeviceName: /dev/xvda - Ebs: - VolumeSize: !Ref NodeVolumeSize - VolumeType: gp2 - DeleteOnTermination: true - UserData: - Fn::Base64: - !Sub | - #!/bin/bash - set -o xtrace - /etc/eks/bootstrap.sh ${ClusterName} ${BootstrapArguments} - /opt/aws/bin/cfn-signal --exit-code $? \ - --stack ${AWS::StackName} \ - --resource NodeGroup \ - --region ${AWS::Region} - Outputs: - NodeInstanceRole: Description: The node instance role Value: !GetAtt NodeInstanceRole.Arn @@ -454,3 +476,4 @@ Outputs: NodeSecurityGroup: Description: The security group for the node group Value: !Ref NodeSecurityGroup + From ade31b047f0d0318ca471712da87ec61e66f6d9a Mon Sep 17 00:00:00 2001 From: Claes Mogren Date: Fri, 20 Sep 2019 10:34:45 -0700 Subject: [PATCH 002/621] Add support for g4 instance family --- files/eni-max-pods.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 7ad015812..019f0d811 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -64,6 +64,13 @@ g3s.xlarge 58 g3.4xlarge 234 g3.8xlarge 234 g3.16xlarge 452 +g4dn.xlarge 29 +g4dn.2xlarge 29 +g4dn.4xlarge 29 +g4dn.8xlarge 58 +g4dn.16xlarge 58 +g4dn.12xlarge 234 +g4dn.metal 737 h1.2xlarge 58 h1.4xlarge 234 h1.8xlarge 234 From 0f11f6c241191612b64176aada2b7cdd78c33245 Mon Sep 17 00:00:00 2001 From: Jiaxin Shan Date: Thu, 26 Sep 2019 16:07:04 -0700 Subject: [PATCH 003/621] Add G4DN instance family to node group template --- amazon-eks-nodegroup.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/amazon-eks-nodegroup.yaml b/amazon-eks-nodegroup.yaml index 467f0c28c..d45579c13 100644 --- a/amazon-eks-nodegroup.yaml +++ b/amazon-eks-nodegroup.yaml @@ -212,6 +212,13 @@ Parameters: - p3.8xlarge - p3.16xlarge - p3dn.24xlarge + - g4dn.xlarge + - g4dn.2xlarge + - g4dn.4xlarge + - g4dn.8xlarge + - g4dn.12xlarge + - g4dn.16xlarge + - g4dn.metal - r3.large - r3.xlarge - r3.2xlarge From 8fece4f422873e5f79fb8f9eb3e9d36d9b0d3489 Mon Sep 17 00:00:00 2001 From: Jiaxin Shan Date: Mon, 30 Sep 2019 14:01:15 -0700 Subject: [PATCH 004/621] Add change log for AMI Release v20190927 (#345) --- CHANGELOG.md | 18 +++++++++++++++++- README.md | 1 + 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 359a4c48e..9f5aa887b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,20 @@ # Changelog + +### AMI Release v20190927 +* amazon-eks-node-1.14-v20190927 +* amazon-eks-gpu-node-1.14-v20190927 +* amazon-eks-node-1.13-v20190927 +* amazon-eks-gpu-node-1.13-v20190927 +* amazon-eks-node-1.12-v20190927 +* amazon-eks-gpu-node-1.12-v20190927 +* amazon-eks-node-1.11-v20190927 +* amazon-eks-gpu-node-1.11-v20190927 + +Changes: +* 0f11f6c Add G4DN instance family to node group template +* ade31b0 Add support for g4 instance family +* d9147f1 sync nodegroup template to latest available + ### AMI Release v20190906 * amazon-eks-node-1.14-v20190906 * amazon-eks-gpu-node-1.14-v20190906 @@ -10,7 +26,7 @@ * amazon-eks-gpu-node-1.11-v20190906 Changes: -* c1ae2f3 Adding new directory and file for 1.14 and above by removing --allow-privileged=true flag (#327) +* c1ae2f3 Adding new directory and file for 1.14 and above by removing --allow-privileged=true flag (#327) * 5335ea8 add support for me-south-1 region (#322) * c4e03c1 Update list of instance types (#320) * 389f4ba update S3_URL_BASE environment variable in install-worker.sh diff --git a/README.md b/README.md index a6270111c..dbdec62ab 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,7 @@ versions by running `aws s3 ls s3://amazon-eks/cloudformation/`. | CloudFormation Version | EKS AMI versions | [amazon-vpc-cni-k8s](https://github.com/aws/amazon-vpc-cni-k8s/releases) | | ---------------------- | ------------------------------------------ | -------------------- | +| 2019-09-27 | amazon-eks-node-(1.14,1.13,1.12,1.11)-v20190927 | v1.5.4 | 2019-09-17 | amazon-eks-node-(1.14,1.13,1.12,1.11)-v20190906 | v1.5.3 | 2019-02-11 | amazon-eks-node-(1.12,1.11,1.10)-v20190327 | v1.3.2 (for p3dn.24xlarge instances) | | 2019-02-11 | amazon-eks-node-(1.11,1.10)-v20190220 | v1.3.2 (for p3dn.24xlarge instances) | From c47e0c078c76a563b04c462a21da3492b5f138ac Mon Sep 17 00:00:00 2001 From: Will Thames Date: Wed, 9 Oct 2019 04:11:21 +1000 Subject: [PATCH 005/621] Add 1.14 to the EKS Makefile and update older versions (#336) Add 1.14 to the list of Makefile targets. Remove 1.10 as it's no longer a supported version Update versions and build dates for older EKS versions --- Makefile | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 38a0e299b..e2f5f75b2 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.10 1.11 1.12 1.13 +all: 1.11 1.12 1.13 1.14 .PHONY: validate validate: @@ -30,18 +30,20 @@ k8s: validate @echo "$(T_GREEN)Building AMI for version $(T_YELLOW)$(kubernetes_version)$(T_GREEN) on $(T_YELLOW)$(arch)$(T_RESET)" $(PACKER_BINARY) build $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)=$($(packerVar)),)) eks-worker-al2.json -.PHONY: 1.10 -1.10: - $(MAKE) k8s kubernetes_version=1.10.13 kubernetes_build_date=2019-03-27 +# Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html .PHONY: 1.11 1.11: - $(MAKE) k8s kubernetes_version=1.11.9 kubernetes_build_date=2019-03-27 + $(MAKE) k8s kubernetes_version=1.11.10 kubernetes_build_date=2019-08-14 .PHONY: 1.12 1.12: - $(MAKE) k8s kubernetes_version=1.12.7 kubernetes_build_date=2019-03-27 + $(MAKE) k8s kubernetes_version=1.12.10 kubernetes_build_date=2019-08-14 .PHONY: 1.13 1.13: - $(MAKE) k8s kubernetes_version=1.13.7 kubernetes_build_date=2019-06-11 \ No newline at end of file + $(MAKE) k8s kubernetes_version=1.13.8 kubernetes_build_date=2019-08-14 + +.PHONY: 1.14 +1.14: + $(MAKE) k8s kubernetes_version=1.14.6 kubernetes_build_date=2019-08-22 From 39eaa20f2e192aef9accea1c3125d6a88201c95e Mon Sep 17 00:00:00 2001 From: Jiaxin Shan Date: Mon, 14 Oct 2019 13:25:21 -0700 Subject: [PATCH 006/621] Add support for m5n/m5dn/r5n/r5dn instances --- amazon-eks-nodegroup.yaml | 32 ++++++++++++++++++++++++++++++++ files/eni-max-pods.txt | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/amazon-eks-nodegroup.yaml b/amazon-eks-nodegroup.yaml index d45579c13..074f26b58 100644 --- a/amazon-eks-nodegroup.yaml +++ b/amazon-eks-nodegroup.yaml @@ -205,6 +205,22 @@ Parameters: - m5d.16xlarge - m5d.24xlarge - m5d.metal + - m5dn.large + - m5dn.xlarge + - m5dn.2xlarge + - m5dn.4xlarge + - m5dn.8xlarge + - m5dn.12xlarge + - m5dn.16xlarge + - m5dn.24xlarge + - m5n.large + - m5n.xlarge + - m5n.2xlarge + - m5n.4xlarge + - m5n.8xlarge + - m5n.12xlarge + - m5n.16xlarge + - m5n.24xlarge - p2.xlarge - p2.8xlarge - p2.16xlarge @@ -262,6 +278,22 @@ Parameters: - r5d.16xlarge - r5d.24xlarge - r5d.metal + - r5dn.large + - r5dn.xlarge + - r5dn.2xlarge + - r5dn.4xlarge + - r5dn.8xlarge + - r5dn.12xlarge + - r5dn.16xlarge + - r5dn.24xlarge + - r5n.large + - r5n.xlarge + - r5n.2xlarge + - r5n.4xlarge + - r5n.8xlarge + - r5n.12xlarge + - r5n.16xlarge + - r5n.24xlarge - t1.micro - t2.nano - t2.micro diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 019f0d811..4871bc46e 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -143,6 +143,22 @@ m5d.12xlarge 234 m5d.16xlarge 737 m5d.24xlarge 737 m5d.metal 737 +m5dn.large 29 +m5dn.xlarge 58 +m5dn.2xlarge 58 +m5dn.4xlarge 234 +m5dn.8xlarge 234 +m5dn.12xlarge 234 +m5dn.16xlarge 737 +m5dn.24xlarge 737 +m5n.large 29 +m5n.xlarge 58 +m5n.2xlarge 58 +m5n.4xlarge 234 +m5n.8xlarge 234 +m5n.12xlarge 234 +m5n.16xlarge 737 +m5n.24xlarge 737 p2.xlarge 58 p2.8xlarge 234 p2.16xlarge 234 @@ -193,6 +209,22 @@ r5d.12xlarge 234 r5d.16xlarge 737 r5d.24xlarge 737 r5d.metal 737 +r5dn.large 29 +r5dn.xlarge 58 +r5dn.2xlarge 58 +r5dn.4xlarge 234 +r5dn.8xlarge 234 +r5dn.12xlarge 234 +r5dn.16xlarge 737 +r5dn.24xlarge 737 +r5n.large 29 +r5n.xlarge 58 +r5n.2xlarge 58 +r5n.4xlarge 234 +r5n.8xlarge 234 +r5n.12xlarge 234 +r5n.16xlarge 737 +r5n.24xlarge 737 t1.micro 4 t2.nano 4 t2.micro 4 From b1d4bc50403a6d1ee3a83037284697d8e84c1a69 Mon Sep 17 00:00:00 2001 From: Shyam JVS Date: Thu, 17 Oct 2019 13:45:25 -0700 Subject: [PATCH 007/621] Remove snowflake for kubelet secret-polling config (#352) --- files/kubelet-config-with-secret-polling.json | 35 ------------------- install-worker.sh | 9 +---- 2 files changed, 1 insertion(+), 43 deletions(-) delete mode 100644 files/kubelet-config-with-secret-polling.json diff --git a/files/kubelet-config-with-secret-polling.json b/files/kubelet-config-with-secret-polling.json deleted file mode 100644 index 08f7127f5..000000000 --- a/files/kubelet-config-with-secret-polling.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "kind": "KubeletConfiguration", - "apiVersion": "kubelet.config.k8s.io/v1beta1", - "address": "0.0.0.0", - "authentication": { - "anonymous": { - "enabled": false - }, - "webhook": { - "cacheTTL": "2m0s", - "enabled": true - }, - "x509": { - "clientCAFile": "/etc/kubernetes/pki/ca.crt" - } - }, - "authorization": { - "mode": "Webhook", - "webhook": { - "cacheAuthorizedTTL": "5m0s", - "cacheUnauthorizedTTL": "30s" - } - }, - "clusterDomain": "cluster.local", - "hairpinMode": "hairpin-veth", - "cgroupDriver": "cgroupfs", - "cgroupRoot": "/", - "featureGates": { - "RotateKubeletServerCertificate": true - }, - "serializeImagePulls": false, - "serverTLSBootstrap": true, - "configMapAndSecretChangeDetectionStrategy": "Cache", - "tlsCipherSuites": ["TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256"] -} diff --git a/install-worker.sh b/install-worker.sh index 8b4bbe9df..8f58c4c26 100644 --- a/install-worker.sh +++ b/install-worker.sh @@ -181,14 +181,7 @@ for binary in ${BINARIES[*]} ; do done sudo rm *.sha256 -KUBELET_CONFIG="" KUBERNETES_MINOR_VERSION=${KUBERNETES_VERSION%.*} -if [ "$KUBERNETES_MINOR_VERSION" = "1.10" ] || [ "$KUBERNETES_MINOR_VERSION" = "1.11" ]; then - KUBELET_CONFIG=kubelet-config.json -else - # For newer versions use this config to fix https://github.com/kubernetes/kubernetes/issues/74412. - KUBELET_CONFIG=kubelet-config-with-secret-polling.json -fi sudo mkdir -p /etc/kubernetes/kubelet sudo mkdir -p /etc/systemd/system/kubelet.service.d @@ -200,7 +193,7 @@ else sudo mv $TEMPLATE_DIR/kubelet.service /etc/systemd/system/kubelet.service fi sudo chown root:root /etc/systemd/system/kubelet.service -sudo mv $TEMPLATE_DIR/$KUBELET_CONFIG /etc/kubernetes/kubelet/kubelet-config.json +sudo mv $TEMPLATE_DIR/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json sudo chown root:root /etc/kubernetes/kubelet/kubelet-config.json From 9d5ed53308384a34e6d4537ea07dad3250d6fc26 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Thu, 10 Oct 2019 13:50:48 -0700 Subject: [PATCH 008/621] Set a minimum evictionHard and kubeReserved --- files/kubelet-config.json | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/files/kubelet-config.json b/files/kubelet-config.json index 3a41c2a95..97e179317 100644 --- a/files/kubelet-config.json +++ b/files/kubelet-config.json @@ -28,6 +28,16 @@ "featureGates": { "RotateKubeletServerCertificate": true }, + "evictionHard": { + "memory.available": "100Mi", + "nodefs.available": "10%", + "nodefs.inodesFree": "5%" + }, + "kubeReserved": { + "cpu": "60m", + "ephemeral-storage": "1Gi", + "memory": "0.24Gi" + }, "serializeImagePulls": false, "serverTLSBootstrap": true } From 1b59c53014708284031e0c3016e796619c7ed10b Mon Sep 17 00:00:00 2001 From: Dwayne Bailey Date: Mon, 21 Oct 2019 15:47:14 +0100 Subject: [PATCH 009/621] Output the autoscaling group name This name of the AutoScaling Group is useful for things like the Cluster Autoscaler so that it can manage automatic cluster scaling. --- amazon-eks-nodegroup.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/amazon-eks-nodegroup.yaml b/amazon-eks-nodegroup.yaml index 074f26b58..f0536aa3a 100644 --- a/amazon-eks-nodegroup.yaml +++ b/amazon-eks-nodegroup.yaml @@ -516,3 +516,6 @@ Outputs: Description: The security group for the node group Value: !Ref NodeSecurityGroup + NodeAutoScalingGroup: + Description: The autoscaling group + Value: !Ref NodeGroup From 17706d5e72a845d239e6647bdc7b906981d954be Mon Sep 17 00:00:00 2001 From: Andrew Johnstone Date: Tue, 22 Oct 2019 23:23:35 +0100 Subject: [PATCH 010/621] #361 - custom pause container image support (#362) * #361 - custom pause container image support --- files/bootstrap.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 10b9b403f..a02644dad 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -130,6 +130,10 @@ else exit 1 fi +PAUSE_CONTAINER_ACCOUNT=$(get_pause_container_account_for_region "${AWS_DEFAULT_REGION}") +PAUSE_CONTAINER_IMAGE=${PAUSE_CONTAINER_IMAGE:-$PAUSE_CONTAINER_ACCOUNT.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/eks/pause-${ARCH}} +PAUSE_CONTAINER="$PAUSE_CONTAINER_IMAGE:$PAUSE_CONTAINER_VERSION" + ### kubelet kubeconfig CA_CERTIFICATE_DIRECTORY=/etc/kubernetes/pki @@ -200,7 +204,7 @@ fi cat < /etc/systemd/system/kubelet.service.d/10-kubelet-args.conf [Service] -Environment='KUBELET_ARGS=--node-ip=$INTERNAL_IP --pod-infra-container-image=$(get_pause_container_account_for_region "${AWS_DEFAULT_REGION}").dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/eks/pause-${ARCH}:$PAUSE_CONTAINER_VERSION' +Environment='KUBELET_ARGS=--node-ip=$INTERNAL_IP --pod-infra-container-image=$PAUSE_CONTAINER' EOF if [[ -n "$KUBELET_EXTRA_ARGS" ]]; then From d4eca99551498ca14769fbbdf31cba8988591de9 Mon Sep 17 00:00:00 2001 From: natherz97 <55205932+natherz97@users.noreply.github.com> Date: Tue, 12 Nov 2019 12:03:25 -0800 Subject: [PATCH 011/621] Set kubeReserved dynamically and evictionHard statically (#367) --- files/bootstrap.sh | 101 ++++++++++++++++++++++++++++++++++++++ files/kubelet-config.json | 10 ---- 2 files changed, 101 insertions(+), 10 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index a02644dad..ee8c8a211 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -112,6 +112,91 @@ function get_pause_container_account_for_region () { esac } +# Helper function which calculates the amount of the given resource (either CPU or memory) +# to reserve in a given resource range, specified by a start and end of the range and a percentage +# of the resource to reserve. Note that we return zero if the start of the resource range is +# greater than the total resource capacity on the node. Additionally, if the end range exceeds the total +# resource capacity of the node, we use the total resource capacity as the end of the range. +# Args: +# $1 total available resource on the worker node in input unit (either millicores for CPU or Mi for memory) +# $2 start of the resource range in input unit +# $3 end of the resource range in input unit +# $4 percentage of range to reserve in percent*100 (to allow for two decimal digits) +# Return: +# amount of resource to reserve in input unit +get_resource_to_reserve_in_range() { + local total_resource_on_instance=$1 + local start_range=$2 + local end_range=$3 + local percentage=$4 + resources_to_reserve="0" + if (( $total_resource_on_instance > $start_range )); then + resources_to_reserve=$(((($total_resource_on_instance < $end_range ? \ + $total_resource_on_instance : $end_range) - $start_range) * $percentage / 100 / 100)) + fi + echo $resources_to_reserve +} + +# Calculates the amount of memory to reserve for the kubelet in mebibytes from the total memory available on the instance. +# From the total memory capacity of this worker node, we calculate the memory resources to reserve +# by reserving a percentage of the memory in each range up to the total memory available on the instance. +# We are using these memory ranges from GKE (https://cloud.google.com/kubernetes-engine/docs/concepts/cluster-architecture#node_allocatable): +# 255 Mi of memory for machines with less than 1024Mi of memory +# 25% of the first 4096Mi of memory +# 20% of the next 4096Mi of memory (up to 8192Mi) +# 10% of the next 8192Mi of memory (up to 16384Mi) +# 6% of the next 114688Mi of memory (up to 131072Mi) +# 2% of any memory above 131072Mi +# Args: +# $1 total available memory on the machine in Mi +# Return: +# memory to reserve in Mi for the kubelet +get_memory_mebibytes_to_reserve() { + local total_memory_on_instance=$1 + local memory_ranges=(0 4096 8192 16384 131072 $total_memory_on_instance) + local memory_percentage_reserved_for_ranges=(2500 2000 1000 600 200) + if (( $total_memory_on_instance <= 1024 )); then + memory_to_reserve="255" + else + memory_to_reserve="0" + for i in ${!memory_percentage_reserved_for_ranges[@]}; do + local start_range=${memory_ranges[$i]} + local end_range=${memory_ranges[(($i+1))]} + local percentage_to_reserve_for_range=${memory_percentage_reserved_for_ranges[$i]} + memory_to_reserve=$(($memory_to_reserve + \ + $(get_resource_to_reserve_in_range $total_memory_on_instance $start_range $end_range $percentage_to_reserve_for_range))) + done + fi + echo $memory_to_reserve +} + +# Calculates the amount of CPU to reserve for the kubelet in millicores from the total number of vCPUs available on the instance. +# From the total core capacity of this worker node, we calculate the CPU resources to reserve by reserving a percentage +# of the available cores in each range up to the total number of cores available on the instance. +# We are using these CPU ranges from GKE (https://cloud.google.com/kubernetes-engine/docs/concepts/cluster-architecture#node_allocatable): +# 6% of the first core +# 1% of the next core (up to 2 cores) +# 0.5% of the next 2 cores (up to 4 cores) +# 0.25% of any cores above 4 cores +# Args: +# $1 total number of millicores on the instance (number of vCPUs * 1000) +# Return: +# CPU resources to reserve in millicores (m) +get_cpu_millicores_to_reserve() { + local total_cpu_on_instance=$1 + local cpu_ranges=(0 1000 2000 4000 $total_cpu_on_instance) + local cpu_percentage_reserved_for_ranges=(600 100 50 25) + cpu_to_reserve="0" + for i in ${!cpu_percentage_reserved_for_ranges[@]}; do + local start_range=${cpu_ranges[$i]} + local end_range=${cpu_ranges[(($i+1))]} + local percentage_to_reserve_for_range=${cpu_percentage_reserved_for_ranges[$i]} + cpu_to_reserve=$(($cpu_to_reserve + \ + $(get_resource_to_reserve_in_range $total_cpu_on_instance $start_range $end_range $percentage_to_reserve_for_range))) + done + echo $cpu_to_reserve +} + if [ -z "$CLUSTER_NAME" ]; then echo "CLUSTER_NAME is not defined" exit 1 @@ -187,6 +272,22 @@ fi KUBELET_CONFIG=/etc/kubernetes/kubelet/kubelet-config.json echo "$(jq ".clusterDNS=[\"$DNS_CLUSTER_IP\"]" $KUBELET_CONFIG)" > $KUBELET_CONFIG +# Sets kubeReserved and evictionHard in /etc/kubernetes/kubelet/kubelet-config.json for worker nodes. The following two function +# calls calculate the CPU and memory resources to reserve for the kubelet based on instance type of the worker node. +# Note that allocatable memory and CPU resources on worker nodes is calculated by the Kubernetes scheduler +# with this formula when scheduling pods: Allocatable = Capacity - Reserved - Eviction Threshold. + +# gets the memory and CPU capacity of the worker node +MEMORY_MI=$(free -m | grep Mem | awk '{print $2}') +CPU_MILLICORES=$(($(nproc) * 1000)) +# calculates the amount of each resource to reserve +mebibytes_to_reserve=$(get_memory_mebibytes_to_reserve $MEMORY_MI) +cpu_millicores_to_reserve=$(get_cpu_millicores_to_reserve $CPU_MILLICORES) +# writes kubeReserved and evictionHard to the kubelet-config using the amount of CPU and memory to be reserved +echo "$(jq '. += {"evictionHard": {"memory.available": "100Mi", "nodefs.available": "10%", "nodefs.inodesFree": "5%"}}' $KUBELET_CONFIG)" > $KUBELET_CONFIG +echo "$(jq --arg mebibytes_to_reserve "${mebibytes_to_reserve}Mi" --arg cpu_millicores_to_reserve "${cpu_millicores_to_reserve}m" \ + '. += {kubeReserved: {"cpu": $cpu_millicores_to_reserve, "ephemeral-storage": "1Gi", "memory": $mebibytes_to_reserve}}' $KUBELET_CONFIG)" > $KUBELET_CONFIG + INTERNAL_IP=$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4) INSTANCE_TYPE=$(curl -s http://169.254.169.254/latest/meta-data/instance-type) diff --git a/files/kubelet-config.json b/files/kubelet-config.json index 97e179317..3a41c2a95 100644 --- a/files/kubelet-config.json +++ b/files/kubelet-config.json @@ -28,16 +28,6 @@ "featureGates": { "RotateKubeletServerCertificate": true }, - "evictionHard": { - "memory.available": "100Mi", - "nodefs.available": "10%", - "nodefs.inodesFree": "5%" - }, - "kubeReserved": { - "cpu": "60m", - "ephemeral-storage": "1Gi", - "memory": "0.24Gi" - }, "serializeImagePulls": false, "serverTLSBootstrap": true } From da2d05a60929f9d258355b8a597f2917c35896f4 Mon Sep 17 00:00:00 2001 From: natherz97 <55205932+natherz97@users.noreply.github.com> Date: Tue, 12 Nov 2019 15:37:10 -0800 Subject: [PATCH 012/621] Updating Docker version (#373) --- eks-worker-al2.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index a9c490236..86d0cb400 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -14,7 +14,7 @@ "binary_bucket_region": "us-west-2", "kubernetes_version": null, "kubernetes_build_date": null, - "docker_version": "18.06", + "docker_version": "18.09.9ce-2.amzn2", "cni_version": "v0.6.0", "cni_plugin_version": "v0.7.5", From c7528b5ba58be5ad9af140a0d81c555913eef1d1 Mon Sep 17 00:00:00 2001 From: Kausheel Kumar Date: Thu, 14 Nov 2019 03:36:55 +1100 Subject: [PATCH 013/621] Remove the ec2-net-utils package (#368) * Remove the ec2-net-utils package * Add code comment to describe the ec2-net-utils change --- install-worker.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/install-worker.sh b/install-worker.sh index 8f58c4c26..25acb7330 100644 --- a/install-worker.sh +++ b/install-worker.sh @@ -64,6 +64,9 @@ sudo yum install -y \ unzip \ wget +# Remove the ec2-net-utils package, if it's installed. This package interferes with the route setup on the instance. +if yum list installed | grep ec2-net-utils; then sudo yum remove ec2-net-utils -y -q; fi + ################################################################################ ### Time ####################################################################### ################################################################################ From 593691ee46b2df9e7d3fa17818fe63724a78ed59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20Miguel=20Cust=C3=B3dio?= Date: Thu, 14 Nov 2019 08:52:11 +0000 Subject: [PATCH 014/621] Make 'kube-bench' happy. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Bruno Miguel Custódio --- files/bootstrap.sh | 10 ++++++++++ files/kubelet-config.json | 3 +++ install-worker.sh | 10 ++++++++++ 3 files changed, 23 insertions(+) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index ee8c8a211..7c9998b3f 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -331,3 +331,13 @@ fi systemctl daemon-reload systemctl enable kubelet systemctl start kubelet + +# Wait for the TLS certificate and private key to be present and add these to the config file so they are picked up the next time the kubelet starts. +# These cannot be added upfront because they are not present when the kubelet first starts, and adding them would cause the kubelet to crash. +TLS_CERT_FILE="/var/lib/kubelet/pki/kubelet-server-current.pem" +while [[ ! -f $TLS_CERT_FILE ]] +do + sleep 1 +done +echo "$(jq ".tlsCertFile=\"$TLS_CERT_FILE\"" $KUBELET_CONFIG)" > $KUBELET_CONFIG +echo "$(jq ".tlsPrivateKeyFile=\"$TLS_CERT_FILE\"" $KUBELET_CONFIG)" > $KUBELET_CONFIG diff --git a/files/kubelet-config.json b/files/kubelet-config.json index 3a41c2a95..f03c60bde 100644 --- a/files/kubelet-config.json +++ b/files/kubelet-config.json @@ -25,9 +25,12 @@ "hairpinMode": "hairpin-veth", "cgroupDriver": "cgroupfs", "cgroupRoot": "/", + "eventRecordQPS": 0, "featureGates": { "RotateKubeletServerCertificate": true }, + "protectKernelDefaults": true, + "readOnlyPort": 0, "serializeImagePulls": false, "serverTLSBootstrap": true } diff --git a/install-worker.sh b/install-worker.sh index 25acb7330..c370462ad 100644 --- a/install-worker.sh +++ b/install-worker.sh @@ -227,6 +227,16 @@ EOF sudo mv /tmp/release /etc/eks/release sudo chown root:root /etc/eks/* +################################################################################ +### Stuff required by "protectKernelDefaults=true" ############################# +################################################################################ + +cat < Date: Wed, 6 Nov 2019 17:09:33 -0800 Subject: [PATCH 015/621] add support for c5d.12x/c5d.24x/c5d.metal --- amazon-eks-nodegroup.yaml | 3 +++ files/eni-max-pods.txt | 3 +++ 2 files changed, 6 insertions(+) diff --git a/amazon-eks-nodegroup.yaml b/amazon-eks-nodegroup.yaml index f0536aa3a..3016f40d2 100644 --- a/amazon-eks-nodegroup.yaml +++ b/amazon-eks-nodegroup.yaml @@ -111,7 +111,10 @@ Parameters: - c5d.2xlarge - c5d.4xlarge - c5d.9xlarge + - c5d.12xlarge - c5d.18xlarge + - c5d.24xlarge + - c5d.metal - c5n.large - c5n.xlarge - c5n.2xlarge diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 4871bc46e..843c6dc7c 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -42,7 +42,10 @@ c5d.xlarge 58 c5d.2xlarge 58 c5d.4xlarge 234 c5d.9xlarge 234 +c5d.12xlarge 234 c5d.18xlarge 737 +c5d.24xlarge 737 +c5d.metal 737 c5n.large 29 c5n.xlarge 58 c5n.2xlarge 58 From c61b6e7dab14ee63ab92de41d0469680bec02597 Mon Sep 17 00:00:00 2001 From: sramabad1 <53882229+sramabad1@users.noreply.github.com> Date: Tue, 10 Dec 2019 16:34:51 -0800 Subject: [PATCH 016/621] Adding new instance types (m6g) (#378) --- files/eni-max-pods.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 843c6dc7c..6cae64e8a 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -162,6 +162,14 @@ m5n.8xlarge 234 m5n.12xlarge 234 m5n.16xlarge 737 m5n.24xlarge 737 +m6g.medium 8 +m6g.large 29 +m6g.xlarge 58 +m6g.2xlarge 58 +m6g.4xlarge 234 +m6g.8xlarge 234 +m6g.12xlarge 234 +m6g.16xlarge 737 p2.xlarge 58 p2.8xlarge 234 p2.16xlarge 234 From 388317ad6460ba3b23131f2e5f27edf3744ed508 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Wed, 11 Dec 2019 14:24:37 -0800 Subject: [PATCH 017/621] Revert "Make 'kube-bench' happy." since there are changes being concerned (#381) This reverts commit 593691ee46b2df9e7d3fa17818fe63724a78ed59. --- files/bootstrap.sh | 10 ---------- files/kubelet-config.json | 3 --- install-worker.sh | 10 ---------- 3 files changed, 23 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 7c9998b3f..ee8c8a211 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -331,13 +331,3 @@ fi systemctl daemon-reload systemctl enable kubelet systemctl start kubelet - -# Wait for the TLS certificate and private key to be present and add these to the config file so they are picked up the next time the kubelet starts. -# These cannot be added upfront because they are not present when the kubelet first starts, and adding them would cause the kubelet to crash. -TLS_CERT_FILE="/var/lib/kubelet/pki/kubelet-server-current.pem" -while [[ ! -f $TLS_CERT_FILE ]] -do - sleep 1 -done -echo "$(jq ".tlsCertFile=\"$TLS_CERT_FILE\"" $KUBELET_CONFIG)" > $KUBELET_CONFIG -echo "$(jq ".tlsPrivateKeyFile=\"$TLS_CERT_FILE\"" $KUBELET_CONFIG)" > $KUBELET_CONFIG diff --git a/files/kubelet-config.json b/files/kubelet-config.json index f03c60bde..3a41c2a95 100644 --- a/files/kubelet-config.json +++ b/files/kubelet-config.json @@ -25,12 +25,9 @@ "hairpinMode": "hairpin-veth", "cgroupDriver": "cgroupfs", "cgroupRoot": "/", - "eventRecordQPS": 0, "featureGates": { "RotateKubeletServerCertificate": true }, - "protectKernelDefaults": true, - "readOnlyPort": 0, "serializeImagePulls": false, "serverTLSBootstrap": true } diff --git a/install-worker.sh b/install-worker.sh index c370462ad..25acb7330 100644 --- a/install-worker.sh +++ b/install-worker.sh @@ -227,16 +227,6 @@ EOF sudo mv /tmp/release /etc/eks/release sudo chown root:root /etc/eks/* -################################################################################ -### Stuff required by "protectKernelDefaults=true" ############################# -################################################################################ - -cat < Date: Wed, 8 Jan 2020 08:58:12 +0900 Subject: [PATCH 018/621] Fixed setting of DNS_CLUSTER_IP in bootstrap.sh (#226) * Replaced API calls for deciding DNS_CLUSTER_IP with arg * Bypass the metadata calls to avoid 404 errors * Fall back to MAC logic if --dns-cluster-ip is absent * Updated comment for --dns-cluster-ip --- files/bootstrap.sh | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index ee8c8a211..a86076a04 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -23,6 +23,7 @@ function print_help { echo "--enable-docker-bridge Restores the docker default bridge network. (default: false)" echo "--aws-api-retry-attempts Number of retry attempts for AWS API call (DescribeCluster) (default: 3)" echo "--docker-config-json The contents of the /etc/docker/daemon.json file. Useful if you want a custom config differing from the default one in the AMI" + echo "--dns-cluster-ip Overrides the IP address to use for DNS queries within the cluster. Defaults to 10.100.0.10 or 172.20.0.10 based on the IP address of the primary interface" } POSITIONAL=() @@ -79,6 +80,11 @@ while [[ $# -gt 0 ]]; do shift shift ;; + --dns-cluster-ip) + DNS_CLUSTER_IP=$2 + shift + shift + ;; *) # unknown option POSITIONAL+=("$1") # save it in an array for later shift # past argument @@ -262,11 +268,15 @@ sed -i s,CLUSTER_NAME,$CLUSTER_NAME,g /var/lib/kubelet/kubeconfig sed -i s,MASTER_ENDPOINT,$APISERVER_ENDPOINT,g /var/lib/kubelet/kubeconfig ### kubelet.service configuration -MAC=$(curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/ -s | head -n 1 | sed 's/\/$//') -TEN_RANGE=$(curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/$MAC/vpc-ipv4-cidr-blocks | grep -c '^10\..*' || true ) -DNS_CLUSTER_IP=10.100.0.10 -if [[ "$TEN_RANGE" != "0" ]] ; then - DNS_CLUSTER_IP=172.20.0.10; +if [ -z ${DNS_CLUSTER_IP+x} ]; then + MAC=$(curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/ -s | head -n 1 | sed 's/\/$//') + TEN_RANGE=$(curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/$MAC/vpc-ipv4-cidr-blocks | grep -c '^10\..*' || true ) + DNS_CLUSTER_IP=10.100.0.10 + if [[ "$TEN_RANGE" != "0" ]]; then + DNS_CLUSTER_IP=172.20.0.10 + fi +else + DNS_CLUSTER_IP="${DNS_CLUSTER_IP}" fi KUBELET_CONFIG=/etc/kubernetes/kubelet/kubelet-config.json From 7b3366458aeea4eb3dad14d3eb1e56b09bc3306d Mon Sep 17 00:00:00 2001 From: Eric Webster Date: Wed, 8 Jan 2020 12:12:09 +0100 Subject: [PATCH 019/621] Support docker-in-docker by only returning the oldest dockerd process --- log-collector-script/eks-log-collector.sh | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/log-collector-script/eks-log-collector.sh b/log-collector-script/eks-log-collector.sh index 44ae0d7d5..e542464df 100644 --- a/log-collector-script/eks-log-collector.sh +++ b/log-collector-script/eks-log-collector.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env bash +#!/usr/bin/env bash # Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. # @@ -171,7 +171,7 @@ version_output() { log_parameters() { echo mode: "${mode}" >> "${COLLECT_DIR}"/system/script-params.txt echo ignore_introspection: "${ignore_introspection}" >> "${COLLECT_DIR}"/system/script-params.txt - echo ignore_metrics: "${ignore_metrics}" >> "${COLLECT_DIR}"/system/script-params.txt + echo ignore_metrics: "${ignore_metrics}" >> "${COLLECT_DIR}"/system/script-params.txt } systemd_check() { @@ -186,8 +186,8 @@ create_directories() { # Make sure the directory the script lives in is there. Not an issue if # the EKS AMI is used, as it will have it. mkdir --parents "${PROGRAM_DIR}" - - # Common directors creation + + # Common directors creation for directory in ${COMMON_DIRECTORIES[*]}; do mkdir --parents "${COLLECT_DIR}"/"${directory}" done @@ -294,7 +294,7 @@ get_selinux_info() { get_iptables_info() { try "collect iptables information" - + iptables --wait 1 --numeric --verbose --list --table mangle > "${COLLECT_DIR}"/networking/iptables-mangle.txt iptables --wait 1 --numeric --verbose --list --table filter > "${COLLECT_DIR}"/networking/iptables-filter.txt iptables --wait 1 --numeric --verbose --list --table nat > "${COLLECT_DIR}"/networking/iptables-nat.txt @@ -401,7 +401,7 @@ get_ipamd_info() { done else echo "Ignoring IPAM introspection stats as mentioned"| tee -a "${COLLECT_DIR}"/ipamd/ipam_introspection_ignore.txt - + fi if [[ "${ignore_metrics}" == "false" ]]; then @@ -418,7 +418,7 @@ get_sysctls_info() { try "collect sysctls information" # dump all sysctls sysctl --all >> "${COLLECT_DIR}"/sysctls/sysctl_all.txt 2>/dev/null - + ok } @@ -440,7 +440,7 @@ get_cni_config() { if [[ -e "/etc/cni/net.d/" ]]; then cp --force --recursive --dereference /etc/cni/net.d/* "${COLLECT_DIR}"/cni/ - fi + fi ok } @@ -500,7 +500,7 @@ get_system_services() { get_docker_info() { try "collect Docker daemon information" - if [[ "$(pgrep dockerd)" -ne 0 ]]; then + if [[ "$(pgrep -o dockerd)" -ne 0 ]]; then timeout 75 docker info > "${COLLECT_DIR}"/docker/docker-info.txt 2>&1 || echo -e "\tTimed out, ignoring \"docker info output \" " timeout 75 docker ps --all --no-trunc > "${COLLECT_DIR}"/docker/docker-ps.txt 2>&1 || echo -e "\tTimed out, ignoring \"docker ps --all --no-truc output \" " timeout 75 docker images > "${COLLECT_DIR}"/docker/docker-images.txt 2>&1 || echo -e "\tTimed out, ignoring \"docker images output \" " @@ -543,7 +543,7 @@ enable_docker_debug() { confirm_enable_docker_debug() { read -r -p "${1:-Enabled Docker Debug will restart the Docker Daemon and restart all running container. Are you sure? [y/N]} " USER_INPUT case "$USER_INPUT" in - [yY][eE][sS]|[yY]) + [yY][eE][sS]|[yY]) enable_docker_debug ;; *) From 6c5c5a1018a3954ad3acf5bf7ced08f1f65be255 Mon Sep 17 00:00:00 2001 From: Florent Delannoy Date: Thu, 28 Nov 2019 13:57:34 +0000 Subject: [PATCH 020/621] TLS Ciphersuite: restrict to TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 See section 2.1.14 of the CIS benchmark: > [2.1.14] Ensure that the Kubelet only makes use of Strong Cryptographic Ciphers > If using a Kubelet config file, edit the file to set TLSCipherSuites: to TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_GCM_SHA256 > If using executable arguments, edit the kubelet service file /etc/systemd/system/kubelet.service on each worker node and set the below parameter. > --tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_GCM_SHA256 Note that this is a regression, this had been set previously in PR #276 but got lost in #352. --- files/kubelet-config.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/files/kubelet-config.json b/files/kubelet-config.json index 3a41c2a95..e1f051d93 100644 --- a/files/kubelet-config.json +++ b/files/kubelet-config.json @@ -29,5 +29,6 @@ "RotateKubeletServerCertificate": true }, "serializeImagePulls": false, - "serverTLSBootstrap": true + "serverTLSBootstrap": true, + "tlsCipherSuites": ["TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256"] } From b425e435b4b90963b73c6b6964329253a659511f Mon Sep 17 00:00:00 2001 From: Arun Bhagyanath <45223433+arun-amzn@users.noreply.github.com> Date: Fri, 10 Jan 2020 20:36:45 +0200 Subject: [PATCH 021/621] Script for collecting window and ubuntu worker logs (#354) * Script for collecting window worker logs * Ubuntu support and directory re-org * Collect files from EKS logs folder * Updates to kubelet svc and kubeconfig * Updated Readme for Windows --- log-collector-script/README.md | 112 +---- log-collector-script/eks-ssm-content.json | 44 -- log-collector-script/linux/README.md | 110 +++++ .../{ => linux}/eks-log-collector.sh | 37 +- .../linux/eks-ssm-content.json | 81 ++++ log-collector-script/windows/README.md | 114 ++++++ .../windows/eks-log-collector.ps1 | 382 ++++++++++++++++++ .../windows/eks-ssm-content.json | 81 ++++ 8 files changed, 795 insertions(+), 166 deletions(-) delete mode 100644 log-collector-script/eks-ssm-content.json create mode 100644 log-collector-script/linux/README.md rename log-collector-script/{ => linux}/eks-log-collector.sh (90%) create mode 100644 log-collector-script/linux/eks-ssm-content.json create mode 100644 log-collector-script/windows/README.md create mode 100644 log-collector-script/windows/eks-log-collector.ps1 create mode 100644 log-collector-script/windows/eks-ssm-content.json diff --git a/log-collector-script/README.md b/log-collector-script/README.md index 7f0d529f8..79951fa11 100644 --- a/log-collector-script/README.md +++ b/log-collector-script/README.md @@ -1,110 +1,2 @@ -### EKS Logs Collector - -This project was created to collect Amazon EKS log files and OS logs for troubleshooting Amazon EKS customer support cases. - -#### Usage -* Collect EKS logs using SSM agent, jump to below [section](#collect-eks-logs-using-ssm-agent) _(or)_ - -* Run this project as the root user: -``` -curl -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/eks-log-collector.sh -sudo bash eks-log-collector.sh -``` - -Confirm if the tarball file was successfully created (it can be .tgz or .tar.gz) - -#### Retrieving the logs -Download the tarball using your favourite Secure Copy tool. - -#### Example output -The project can be used in normal or enable_debug(**Caution: enable_debug will prompt to confirm if we can restart Docker daemon which would kill running containers**). - -``` -# sudo bash eks-log-collector.sh --help -USAGE: eks-log-collector --help [ --mode=collect|enable_debug --ignore_introspection=true|false --ignore_metrics=true|false ] - -OPTIONS: - --mode Has two parameters 1) collect or 2) enable_debug,: - collect Gathers basic operating system, Docker daemon, and - Amazon EKS related config files and logs. This is the default mode. - enable_debug Enables debug mode for the Docker daemon(Not for production use) - - --ignore_introspection To ignore introspection of IPAMD; Pass this flag if DISABLE_INTROSPECTION is enabled on CNI - - --ignore_metrics To ignore prometheus metrics collection; Pass this flag if DISABLE_METRICS enabled on CNI - - --help Show this help message. - -Example to Ignore IPAMD introspection: -sudo bash eks-log-collector.sh --ignore_introspection=true - -Example to Ignore IPAMD Prometheus metrics collection: -sudo bash eks-log-collector.sh --ignore_metrics=true - -Example to Ignore IPAMD introspection and Prometheus metrics collection: -sudo bash eks-log-collector.sh --ignore_introspection=true --ignore_metrics=true -``` -#### Example output in normal mode -The following output shows this project running in normal mode. - -``` -sudo bash eks-log-collector.sh - - This is version 0.5.0. New versions can be found at https://github.com/awslabs/amazon-eks-ami - -Trying to collect common operating system logs... -Trying to collect kernel logs... -Trying to collect mount points and volume information... -Trying to collect SELinux status... -Trying to collect iptables information... -Trying to collect installed packages... -Trying to collect active system services... -Trying to collect Docker daemon information... -Trying to collect kubelet information... -Trying to collect L-IPAMD information... -Trying to collect sysctls information... -Trying to collect networking infomation... -Trying to collect CNI configuration information... -Trying to collect running Docker containers and gather container data... -Trying to collect Docker daemon logs... -Trying to archive gathered information... - - Done... your bundled logs are located in /opt/log-collector/eks_i-0717c9d54b6cfaa19_2019-02-02_0103-UTC_0.0.4.tar.gz -``` - - -### Collect EKS logs using SSM agent -#### To run EKS log collector script on Worker Node(s) and upload the bundle(tar) to a S3 Bucket using SSM agent, please follow below steps - -##### *Prerequisites*: - -* Configure AWS CLI on the system where you will run the below commands. The IAM entity (User/Role) should have permissions to run/invoke `aws ssm send-command` and `get-command-invocation` commands. - -* SSM agent should be installed and running on Worker Node(s). [How to Install SSM Agent link](https://docs.aws.amazon.com/systems-manager/latest/userguide/sysman-manual-agent-install.html) - -* Worker Node(s) should have required permissions to communicate with SSM service. IAM managed role `AmazonEC2RoleforSSM` will have all the required permission for SSM agent to run on EC2 instances. The IAM managed role `AmazonEC2RoleforSSM` has `S3:PutObject` permission to all S3 resources. - -        *Note:* For more granular control of the IAM permission check [AWS Systems Manager Permissions link ](https://docs.aws.amazon.com/systems-manager/latest/userguide/auth-and-access-control-permissions-reference.html) - -* A S3 bucket location is required which is taken as an input parameter to `aws ssm send-command` command, to which the logs should be pushed. - - -#### *To invoke SSM agent to run EKS log collector script and push bundle to S3 from Worker Node(s):* - -1. Create the SSM document named "EKSLogCollector" using the following command:
-``` -aws ssm create-document --name "EKSLogCollector" --document-type "Command" --content https://raw.githubusercontent.com/nithu0115/eks-logs-collector/master/eks-ssm-content.json -``` -2. To execute the bash script in the SSM document and to collect the logs from worker, run the following command:
-``` -aws ssm send-command --instance-ids --document-name "EKSLogCollector" --parameters "bucketName=" --output json -``` -3. To check the status of SSM command submitted in previous step use the command
-``` -aws ssm get-command-invocation --command-id "" --instance-id "" --output text -``` -    `SSM command ID`One of the response parameters after running `aws ssm send-command` in step2
-    `EC2 Instance ID`The EC2 Instance ID provided in the `aws ssm send-command` in step2 - -4. Once the above command is executed successfully, the logs should be present in the S3 bucket specified in the previous step. - +### EKS Logs Collector +This project was created to collect Amazon EKS log files and OS logs for troubleshooting Amazon EKS customer support cases. \ No newline at end of file diff --git a/log-collector-script/eks-ssm-content.json b/log-collector-script/eks-ssm-content.json deleted file mode 100644 index 8c237a30f..000000000 --- a/log-collector-script/eks-ssm-content.json +++ /dev/null @@ -1,44 +0,0 @@ -{ - "schemaVersion":"2.2", - "description":"EKS Log Collector", - "parameters":{ - "bucketName":{ - "type": "String", - "default": "Enabled" - } - }, - "mainSteps":[ - { - "action":"aws:runShellScript", - "name":"PatchLinux", - "precondition":{ - "StringEquals":[ - "platformType", - "Linux" - ] - }, - "inputs":{ - "runCommand":[ - "curl -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/eks-log-collector.sh", - "bash ./eks-log-collector.sh >/dev/null 2>&1", - "echo \"EKS logs collected\"", - "if [ -f /usr/local/bin/aws ]; then", - "echo \"AWS_already_installed\"", - "else", - "echo \"Installing AWSCLI\"", - "curl \"https://s3.amazonaws.com/aws-cli/awscli-bundle.zip\" -o \"awscli-bundle.zip\" >/dev/null 2>&1", - "yum install unzip -y >/dev/null 2>&1", - "unzip awscli-bundle.zip >/dev/null 2>&1", - "./awscli-bundle/install -i /usr/local/aws -b /usr/local/bin/aws", - "echo \"AWSCLI version is\"", - "/usr/local/bin/aws --version", - "fi", - "echo \"Pushing to S3\"", - "/usr/local/bin/aws s3 cp --recursive /opt/log-collector/ s3://{{bucketName}}", - "echo \"Logs uploaded to S3\"" - ] - } - } - ] -} - diff --git a/log-collector-script/linux/README.md b/log-collector-script/linux/README.md new file mode 100644 index 000000000..3811bc814 --- /dev/null +++ b/log-collector-script/linux/README.md @@ -0,0 +1,110 @@ +### EKS Logs Collector + +This project was created to collect Amazon EKS log files and OS logs for troubleshooting Amazon EKS customer support cases. + +#### Usage +* Collect EKS logs using SSM agent, jump to below [section](#collect-eks-logs-using-ssm-agent) _(or)_ + +* Run this project as the root user: +``` +curl -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-log-collector.sh +sudo bash eks-log-collector.sh +``` + +Confirm if the tarball file was successfully created (it can be .tgz or .tar.gz) + +#### Retrieving the logs +Download the tarball using your favourite Secure Copy tool. + +#### Example output +The project can be used in normal or enable_debug(**Caution: enable_debug will prompt to confirm if we can restart Docker daemon which would kill running containers**). + +``` +# sudo bash eks-log-collector.sh --help +USAGE: eks-log-collector --help [ --mode=collect|enable_debug --ignore_introspection=true|false --ignore_metrics=true|false ] + +OPTIONS: + --mode Has two parameters 1) collect or 2) enable_debug,: + collect Gathers basic operating system, Docker daemon, and + Amazon EKS related config files and logs. This is the default mode. + enable_debug Enables debug mode for the Docker daemon(Not for production use) + + --ignore_introspection To ignore introspection of IPAMD; Pass this flag if DISABLE_INTROSPECTION is enabled on CNI + + --ignore_metrics To ignore prometheus metrics collection; Pass this flag if DISABLE_METRICS enabled on CNI + + --help Show this help message. + +Example to Ignore IPAMD introspection: +sudo bash eks-log-collector.sh --ignore_introspection=true + +Example to Ignore IPAMD Prometheus metrics collection: +sudo bash eks-log-collector.sh --ignore_metrics=true + +Example to Ignore IPAMD introspection and Prometheus metrics collection: +sudo bash eks-log-collector.sh --ignore_introspection=true --ignore_metrics=true +``` +#### Example output in normal mode +The following output shows this project running in normal mode. + +``` +sudo bash eks-log-collector.sh + + This is version 0.5.0. New versions can be found at https://github.com/awslabs/amazon-eks-ami + +Trying to collect common operating system logs... +Trying to collect kernel logs... +Trying to collect mount points and volume information... +Trying to collect SELinux status... +Trying to collect iptables information... +Trying to collect installed packages... +Trying to collect active system services... +Trying to collect Docker daemon information... +Trying to collect kubelet information... +Trying to collect L-IPAMD information... +Trying to collect sysctls information... +Trying to collect networking infomation... +Trying to collect CNI configuration information... +Trying to collect running Docker containers and gather container data... +Trying to collect Docker daemon logs... +Trying to archive gathered information... + + Done... your bundled logs are located in /opt/log-collector/eks_i-0717c9d54b6cfaa19_2019-02-02_0103-UTC_0.0.4.tar.gz +``` + + +### Collect EKS logs using SSM agent +#### To run EKS log collector script on Worker Node(s) and upload the bundle(tar) to a S3 Bucket using SSM agent, please follow below steps + +##### *Prerequisites*: + +* Configure AWS CLI on the system where you will run the below commands. The IAM entity (User/Role) should have permissions to run/invoke `aws ssm send-command` and `get-command-invocation` commands. + +* SSM agent should be installed and running on Worker Node(s). [How to Install SSM Agent link](https://docs.aws.amazon.com/systems-manager/latest/userguide/sysman-manual-agent-install.html) + +* Worker Node(s) should have required permissions to communicate with SSM service. IAM managed role `AmazonEC2RoleforSSM` will have all the required permission for SSM agent to run on EC2 instances. The IAM managed role `AmazonEC2RoleforSSM` has `S3:PutObject` permission to all S3 resources. + +        *Note:* For more granular control of the IAM permission check [AWS Systems Manager Permissions link ](https://docs.aws.amazon.com/systems-manager/latest/userguide/auth-and-access-control-permissions-reference.html) + +* A S3 bucket location is required which is taken as an input parameter to `aws ssm send-command` command, to which the logs should be pushed. + + +#### *To invoke SSM agent to run EKS log collector script and push bundle to S3 from Worker Node(s):* + +1. Create the SSM document named "EKSLogCollector" using the following command:
+``` +aws ssm create-document --name "EKSLogCollector" --document-type "Command" --content https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-ssm-content.json +``` +2. To execute the bash script in the SSM document and to collect the logs from worker, run the following command:
+``` +aws ssm send-command --instance-ids --document-name "EKSLogCollector" --parameters "bucketName=" --output json +``` +3. To check the status of SSM command submitted in previous step use the command
+``` +aws ssm get-command-invocation --command-id "" --instance-id "" --output text +``` +    `SSM command ID`One of the response parameters after running `aws ssm send-command` in step2
+    `EC2 Instance ID`The EC2 Instance ID provided in the `aws ssm send-command` in step2 + +4. Once the above command is executed successfully, the logs should be present in the S3 bucket specified in the previous step. + diff --git a/log-collector-script/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh similarity index 90% rename from log-collector-script/eks-log-collector.sh rename to log-collector-script/linux/eks-log-collector.sh index e542464df..f22907f9e 100644 --- a/log-collector-script/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -21,7 +21,7 @@ export LANG="C" export LC_ALL="C" # Global options -readonly PROGRAM_VERSION="0.5.1" +readonly PROGRAM_VERSION="0.5.2" readonly PROGRAM_SOURCE="https://github.com/awslabs/amazon-eks-ami/blob/master/log-collector-script/" readonly PROGRAM_NAME="$(basename "$0" .sh)" readonly PROGRAM_DIR="/opt/log-collector" @@ -177,7 +177,10 @@ log_parameters() { systemd_check() { if command -v systemctl >/dev/null 2>&1; then INIT_TYPE="systemd" - else + if command -v snap >/dev/null 2>&1; then + INIT_TYPE="snap" + fi + else INIT_TYPE="other" fi } @@ -337,7 +340,7 @@ get_docker_logs() { try "collect Docker daemon logs" case "${INIT_TYPE}" in - systemd) + systemd|snap) journalctl --unit=docker --since "${DAYS_10}" > "${COLLECT_DIR}"/docker/docker.log ;; other) @@ -360,30 +363,40 @@ get_k8s_info() { if [[ -n "${KUBECONFIG:-}" ]]; then command -v kubectl > /dev/null && kubectl get --kubeconfig=${KUBECONFIG} svc > "${COLLECT_DIR}"/kubelet/svc.log - kubectl --kubeconfig=${KUBECONFIG} config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + command -v kubectl > /dev/null && kubectl --kubeconfig=${KUBECONFIG} config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml elif [[ -f /etc/eksctl/kubeconfig.yaml ]]; then KUBECONFIG="/etc/eksctl/kubeconfig.yaml" command -v kubectl > /dev/null && kubectl get --kubeconfig=${KUBECONFIG} svc > "${COLLECT_DIR}"/kubelet/svc.log - kubectl --kubeconfig=${KUBECONFIG} config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + command -v kubectl > /dev/null && kubectl --kubeconfig=${KUBECONFIG} config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml elif [[ -f /etc/systemd/system/kubelet.service ]]; then KUBECONFIG=`grep kubeconfig /etc/systemd/system/kubelet.service | awk '{print $2}'` command -v kubectl > /dev/null && kubectl get --kubeconfig=${KUBECONFIG} svc > "${COLLECT_DIR}"/kubelet/svc.log - kubectl --kubeconfig=${KUBECONFIG} config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + command -v kubectl > /dev/null && kubectl --kubeconfig=${KUBECONFIG} config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + elif [[ -f /var/lib/kubelet/kubeconfig ]]; then + KUBECONFIG="/var/lib/kubelet/kubeconfig" + command -v kubectl > /dev/null && kubectl get --kubeconfig=${KUBECONFIG} svc > "${COLLECT_DIR}"/kubelet/svc.log + command -v kubectl > /dev/null && kubectl --kubeconfig=${KUBECONFIG} config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + else echo "======== Unable to find KUBECONFIG, IGNORING POD DATA =========" >> "${COLLECT_DIR}"/kubelet/svc.log fi + # Try to copy the kubeconfig file if kubectl command doesn't exist + [[ (! -f "${COLLECT_DIR}/kubelet/kubeconfig.yaml") && ( -n ${KUBECONFIG}) ]] && cp ${KUBECONFIG} "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + case "${INIT_TYPE}" in systemd) timeout 75 journalctl --unit=kubelet --since "${DAYS_10}" > "${COLLECT_DIR}"/kubelet/kubelet.log - timeout 75 journalctl --unit=kubeproxy --since "${DAYS_10}" > "${COLLECT_DIR}"/kubelet/kubeproxy.log - for entry in kubelet kube-proxy; do - systemctl cat "${entry}" > "${COLLECT_DIR}"/kubelet/"${entry}"_service.txt 2>&1 - done + systemctl cat kubelet > "${COLLECT_DIR}"/kubelet/kubelet_service.txt 2>&1 + ;; + snap) + timeout 75 snap logs kubelet-eks -n all > "${COLLECT_DIR}"/kubelet/kubelet.log + + timeout 75 snap get kubelet-eks > "${COLLECT_DIR}"/kubelet/kubelet-eks_service.txt 2>&1 ;; *) warning "The current operating system is not supported." @@ -448,7 +461,7 @@ get_cni_config() { get_pkgtype() { if [[ "$(command -v rpm )" ]]; then PACKAGE_TYPE=rpm - elif [[ "$(command -v deb )" ]]; then + elif [[ "$(command -v dpkg )" ]]; then PACKAGE_TYPE=deb else PACKAGE_TYPE='unknown' @@ -477,7 +490,7 @@ get_system_services() { try "collect active system services" case "${INIT_TYPE}" in - systemd) + systemd|snap) systemctl list-units > "${COLLECT_DIR}"/system/services.txt 2>&1 ;; other) diff --git a/log-collector-script/linux/eks-ssm-content.json b/log-collector-script/linux/eks-ssm-content.json new file mode 100644 index 000000000..e0a0f7ced --- /dev/null +++ b/log-collector-script/linux/eks-ssm-content.json @@ -0,0 +1,81 @@ +{ + "schemaVersion": "2.2", + "description": "EKS Log Collector", + "parameters": { + "bucketName": { + "type": "String", + "default": "Enabled" + } + }, + "mainSteps": [ + { + "action": "aws:runShellScript", + "name": "PatchLinux", + "precondition": { + "StringEquals": [ + "platformType", + "Linux" + ] + }, + "inputs": { + "runCommand": [ + "curl -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-log-collector.sh", + "bash ./eks-log-collector.sh >/dev/null 2>&1", + "echo \"EKS logs collected\"", + "if [ -f /usr/local/bin/aws ]; then", + "echo \"AWS_already_installed\"", + "else", + "echo \"Installing AWSCLI\"", + "curl \"https://s3.amazonaws.com/aws-cli/awscli-bundle.zip\" -o \"awscli-bundle.zip\" >/dev/null 2>&1", + "yum install unzip -y >/dev/null 2>&1", + "unzip awscli-bundle.zip >/dev/null 2>&1", + "./awscli-bundle/install -i /usr/local/aws -b /usr/local/bin/aws", + "echo \"AWSCLI version is\"", + "/usr/local/bin/aws --version", + "fi", + "echo \"Pushing to S3\"", + "/usr/local/bin/aws s3 cp --recursive /opt/log-collector/ s3://{{bucketName}}", + "echo \"Logs uploaded to S3\"" + ] + } + }, + { + "precondition": { + "StringEquals": [ + "platformType", + "Windows" + ] + }, + "action": "aws:runPowerShellScript", + "name": "PatchWindows", + "inputs": { + "runCommand": [ + "if (!(Get-Module 'AWSPowerShell')) { ", + " Write-Host 'AWSPowerShell does not exist' ", + " Install-Module -Name AWSPowerShell -Force ", + "} ", + "try { ", + " Write-Host 'Downloading EKS Log collector script' ", + " Invoke-WebRequest -UseBasicParsing 'https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/windows/eks-log-collector.ps1' -OutFile eks-log-collector.ps1 ", + "} ", + "catch { ", + " Write-Host 'Uploaded to S3 failed' ", + " break ", + "} ", + "powershell .\\eks-log-collector.ps1", + "try { ", + " Write-Host 'Pushing to S3' ", + " Write-S3Object -BucketName {{bucketName}} -Folder C:\\log-collector -KeyPrefix eks-log-collector\\ -SearchPattern *.zip ", + " Write-Host 'Logs uploaded to S3' ", + "} ", + "catch { ", + " Write-Host 'Uploaded to S3 failed' ", + " break ", + "} ", + "", + "Remove-Item -Force .\\eks-log-collector.ps1 -ErrorAction Ignore " + ] + } + } + ] +} \ No newline at end of file diff --git a/log-collector-script/windows/README.md b/log-collector-script/windows/README.md new file mode 100644 index 000000000..e7d12cd07 --- /dev/null +++ b/log-collector-script/windows/README.md @@ -0,0 +1,114 @@ +### EKS Logs Collector + +This project was created to collect Amazon EKS log files and OS logs for troubleshooting Amazon EKS customer support cases. + +#### Usage +* Collect EKS logs using SSM agent, jump to below [section](#collect-eks-logs-using-ssm-agent) _(or)_ + +* Run this project as the Administrator user: +``` +Invoke-WebRequest -OutFile eks-log-collector.ps1 https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/windows/eks-log-collector.ps1 +.\eks-log-collector.ps1 +``` + +#### Example output +The project can be used in normal or Enable/Disable Debug(**Caution: Enable/Disable Debug will restart Docker daemon which would kill running containers**). + +``` +# eks-log-collector.ps1 +USAGE: eks-log-collector [ -RunMode =Collect|EnableDebug|DisableDebug ] + +OPTIONS: + -RunMode Has three parameters 1) Collect, 2) EnableDebug 3) DisableDebug: + Collect Gathers basic operating system, Docker daemon, and + Amazon EKS related config files and logs. This is the default mode. + EnableDebug Enables debug mode for Docker daemon (Not for production use) + DisableDebug Disable debug mode for Docker daemon + +Enables debug mode for the Docker daemon: +eks-log-collector.ps1 -RunMode EnableDebug +``` +#### Example output in normal mode +The following output shows this project running in normal mode. + +``` +.\eks-log-collector.ps1 +Running Default(Collect) Mode +Cleaning up directory +OK +Creating temporary directory +OK +Collecting System information +OK +Checking free disk space +C: drive has 58% free space +OK +Collecting System Logs +OK +Collecting Application Logs +OK +Collecting Volume info +OK +Collecting Windows Firewall info +Collecting Rules for Domain profile +Collecting Rules for Private profile +Collecting Rules for Public profile +OK +Collecting installed applications list +OK +Collecting Services list +OK +Collecting Docker daemon information +OK +Collecting Kubelet logs +OK +Collecting Kube-proxy logs +OK +Collecting kubelet information +OK +Collecting Docker daemon logs +OK +Collecting EKS logs +OK +Collecting network Information +OK +Archiving gathered data +Done... your bundled logs are located in C:\log-collector\eks_i-0b318f704c74b6ab2_20200101T0620179658Z.zip +``` + + +### Collect EKS logs using SSM agent +#### To run EKS log collector script on Worker Node(s) and upload the bundle(tar) to a S3 Bucket using SSM agent, please follow below steps + +##### *Prerequisites*: + +* Configure AWS CLI on the system where you will run the below commands. The IAM entity (User/Role) should have permissions to run/invoke `aws ssm send-command` and `get-command-invocation` commands. + +* SSM agent should be installed and running on Worker Node(s). [How to Install SSM Agent link](https://docs.aws.amazon.com/systems-manager/latest/userguide/sysman-manual-agent-install.html) + +* Worker Node(s) should have required permissions to communicate with SSM service. IAM managed role `AmazonEC2RoleforSSM` will have all the required permission for SSM agent to run on EC2 instances. The IAM managed role `AmazonEC2RoleforSSM` has `S3:PutObject` permission to all S3 resources. + +        *Note:* For more granular control of the IAM permission check [AWS Systems Manager Permissions link ](https://docs.aws.amazon.com/systems-manager/latest/userguide/auth-and-access-control-permissions-reference.html) + +* A S3 bucket location is required which is taken as an input parameter to `aws ssm send-command` command, to which the logs should be pushed. + + +#### *To invoke SSM agent to run EKS log collector script and push bundle to S3 from Worker Node(s):* + +1. Create the SSM document named "EKSLogCollector" using the following command:
+``` +aws ssm create-document --name "EKSLogCollector" --document-type "Command" --content https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/windows/eks-ssm-content.json +``` +2. To execute the bash script in the SSM document and to collect the logs from worker, run the following command:
+``` +aws ssm send-command --instance-ids --document-name "EKSLogCollector" --parameters "bucketName=" --output json +``` +3. To check the status of SSM command submitted in previous step use the command
+``` +aws ssm get-command-invocation --command-id "" --instance-id "" --output text +``` +    `SSM command ID`One of the response parameters after running `aws ssm send-command` in step2
+    `EC2 Instance ID`The EC2 Instance ID provided in the `aws ssm send-command` in step2 + +4. Once the above command is executed successfully, the logs should be present in the S3 bucket specified in the previous step. + diff --git a/log-collector-script/windows/eks-log-collector.ps1 b/log-collector-script/windows/eks-log-collector.ps1 new file mode 100644 index 000000000..feb173efb --- /dev/null +++ b/log-collector-script/windows/eks-log-collector.ps1 @@ -0,0 +1,382 @@ +<# + Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. + Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at + + http://aws.amazon.com/apache2.0/ + + or in the "license" file accompanying this file. + This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + +.SYNOPSIS + Collects EKS Logs +.DESCRIPTION + Run the script to gather basic operating system, Docker daemon, and kubelet logs. + +.NOTES + You need to run this script with Elevated permissions to allow for the collection of the installed applications list +.EXAMPLE + eks-log-collector.ps1 + Gather basic operating system, Docker daemon, and kubelet logs. +.EXAMPLE + eks-log-collector.ps1 -RunMode EnableDebug + Enables debug mode for the Docker daemon. + +#> + +param( + [Parameter(Mandatory=$False)][string]$RunMode = "Collect" + ) + +# Common options +$basedir="C:\log-collector" +$instanceid = Invoke-RestMethod -uri http://169.254.169.254/latest/meta-data/instance-id +$curtime = Get-Date -Format FileDateTimeUniversal +$outfilename = "eks_" + $instanceid + "_" + $curtime + ".zip" +$infodir="$basedir\collect" +$info_system="$infodir\system" + + +# Common functions +# --------------------------------------------------------------------------------------- + +Function is_elevated{ + If (-NOT ([Security.Principal.WindowsPrincipal] [Security.Principal.WindowsIdentity]::GetCurrent()).IsInRole(` + [Security.Principal.WindowsBuiltInRole] "Administrator")) { + Write-warning "This script requires elevated privileges to copy registry keys to the EKS logs collector folder." + Write-Host "Please re-launch as Administrator." -foreground "red" -background "black" + break + } +} + + +Function create_working_dir{ + try { + Write-Host "Creating temporary directory" + New-Item -type directory -path $info_system -Force >$null + New-Item -type directory -path $info_system\eks -Force >$null + New-Item -type directory -path $info_system\docker -Force >$null + New-Item -type directory -path $info_system\firewall -Force >$null + New-Item -type directory -path $info_system\kubelet -Force >$null + New-Item -type directory -path $info_system\kube-proxy -Force >$null + New-Item -type directory -path $info_system\cni -Force >$null + New-Item -type directory -path $info_system\docker_log -Force >$null + New-Item -type directory -path $info_system\network -Force >$null + New-Item -type directory -path $info_system\network\hns -Force >$null + Write-Host "OK" -ForegroundColor "green" + } + catch { + Write-Host "Unable to create temporary directory" + Write-Host "Please ensure you have enough permissions to create directories" + Write-Error "Failed to create temporary directory" + Break + } +} + +Function get_sysinfo{ + try { + Write-Host "Collecting System information" + systeminfo.exe > $info_system\sysinfo + Write-Host "OK" -ForegroundColor "green" + } + catch { + Write-Error "Unable to collect system information" + Break + } + +} + +Function is_diskfull{ + $threshold = 30 + try { + Write-Host "Checking free disk space" + $drive = Get-WmiObject Win32_LogicalDisk -Filter "DeviceID='C:'" + $percent = ([math]::round($drive.FreeSpace/1GB, 0) / ([math]::round($drive.Size/1GB, 0)) * 100) + Write-Host "C: drive has $percent% free space" + Write-Host "OK" -ForegroundColor "green" + } + catch { + Write-Error "Unable to Determine Free Disk Space" + Break + } + if ($percent -lt $threshold){ + Write-Error "C: drive only has $percent% free space, please ensure there is at least $threshold% free disk space to collect and store the log files" + Break + } +} + +Function get_system_logs{ + try { + Write-Host "Collecting System Logs" + Get-WinEvent -LogName System | Select-Object timecreated,leveldisplayname,machinename,message | export-csv -Path $info_system\system-eventlogs.csv + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to Collect System Logs" + break + } +} + +Function get_application_logs{ + try { + Write-Host "Collecting Application Logs" + Get-WinEvent -LogName Application | Select-Object timecreated,leveldisplayname,machinename,message | export-csv -Path $info_system\application-eventlogs.csv + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to Collect Application Logs" + break + } +} + +Function get_volumes_info{ + try { + Write-Host "Collecting Volume info" + Get-psdrive -PSProvider 'FileSystem' | Out-file $info_system\volumes + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to Collect Volume information" + break + } +} + +Function get_firewall_info{ + try { + Write-Host "Collecting Windows Firewall info" + $fw = Get-NetFirewallProfile + foreach ($f in $fw){ + if ($f.Enabled -eq "True"){ + $file = $f.name + Write-Host "Collecting Rules for" $f.name "profile" + Get-NetFirewallProfile -Name $f.name | Get-NetFirewallRule | Out-file $info_system\firewall\firewall-$file + } + } + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to Collect Windows Firewall information" + break + } +} + +Function get_softwarelist{ + try { + Write-Host "Collecting installed applications list" + gp HKLM:\Software\Microsoft\Windows\CurrentVersion\Uninstall\* |Select DisplayName, DisplayVersion, Publisher, InstallDate, HelpLink, UninstallString | out-file $info_system\installed-64bit-apps.txt + gp HKLM:\Software\Wow6432Node\Microsoft\Windows\CurrentVersion\Uninstall\* |Select DisplayName, DisplayVersion, Publisher, InstallDate, HelpLink, UninstallString | out-file $info_system\installed-32bit-apps.txt + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to collect installed applications list" + break + } +} + +Function get_system_services{ + try { + Write-Host "Collecting Services list" + get-service | fl | out-file $info_system\services + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to collect Services list" + break + } +} + +Function get_docker_info{ + try { + Write-Host "Collecting Docker daemon information" + docker info > $info_system\docker\docker-info.txt 2>&1 + docker ps --all --no-trunc > $info_system\docker\docker-ps.txt 2>&1 + docker images > $info_system\docker\docker-images.txt 2>&1 + docker version > $info_system\docker\docker-version.txt 2>&1 + Write-Host "OK" -foregroundcolor "green" + } + catch{ + Write-Error "Unable to collect Docker daemon information" + Break + } +} + +Function get_eks_logs{ + try { + Write-Host "Collecting EKS logs" + copy C:\ProgramData\Amazon\EKS\logs\* $info_system\eks\ + Write-Host "OK" -foregroundcolor "green" + } + catch{ + Write-Error "Unable to collect ECS Agent logs" + Break + } +} + +Function get_k8s_info{ + try { + Write-Host "Collecting Kubelet logs" + Get-EventLog -LogName EKS -Source kubelet | Sort-Object Time | Export-CSV $info_system/kubelet/kubelet-service.csv + Write-Host "OK" -foregroundcolor "green" + } + catch{ + Write-Error "Unable to collect Kubelet logs" + Break + } + + try { + Write-Host "Collecting Kube-proxy logs" + Get-EventLog -LogName EKS -Source kube-proxy | Sort-Object Time | Export-CSV $info_system/kube-proxy/kube-proxy-service.csv + Write-Host "OK" -foregroundcolor "green" + } + catch{ + Write-Error "Unable to collect Kube-proxy logs" + Break + } + + try { + Write-Host "Collecting kubelet information" + copy C:\ProgramData\kubernetes\kubeconfig $info_system\kubelet\ + copy C:\ProgramData\kubernetes\kubelet-config.json $info_system\kubelet\ + copy C:\ProgramData\Amazon\EKS\cni\config\vpc-shared-eni.conf $info_system\cni\ + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to collect kubelet information" + Break + } +} + +Function get_docker_logs{ + try { + Write-Host "Collecting Docker daemon logs" + Get-EventLog -LogName Application -Source Docker | Sort-Object Time | Export-CSV $info_system/docker_log/docker-daemon.csv + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to collect Docker daemon logs" + Break + } +} + +Function get_network_info{ + try { + Write-Host "Collecting network Information" + Get-HnsNetwork | Select Name, Type, Id, AddressPrefix > $info_system\network\hns\network.txt + Get-hnsnetwork | Convertto-json -Depth 20 >> $info_system\network\hns\network.txt + Get-hnsnetwork | % { Get-HnsNetwork -Id $_.ID -Detailed } | Convertto-json -Depth 20 >> $info_system\network\hns\networkdetailed.txt + + Get-HnsEndpoint | Select IpAddress, MacAddress, IsRemoteEndpoint, State > $info_system\network\hns\endpoint.txt + Get-hnsendpoint | Convertto-json -Depth 20 >> $info_system\network\hns\endpoint.txt + + Get-hnspolicylist | Convertto-json -Depth 20 > $info_system\network\hns\policy.txt + + vfpctrl.exe /list-vmswitch-port > $info_system\network\ports.txt + ipconfig /allcompartments /all > $info_system\network\ip.txt + route print > $info_system\network\routes.txt + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to collect network information" + Break + } +} +Function enable_docker_debug{ + try { + Write-Host "Enabling debug mode for the Docker Service" + if (sc.exe qc docker | where-object {$_ -like '*-D*'}){ + Write-Host "Debug mode already enabled" -foregroundcolor "yellow" + } + else { + sc.exe config docker binPath= "C:\Program Files\Docker\dockerd.exe --run-service -D" + Restart-service Docker + Write-Host "OK" -foregroundcolor "green" + } + } + catch { + Write-Error "Failed to enable debug mode" + Break + } +} + +Function disable_docker_debug{ + try { + Write-Host "Disabling debug mode for the Docker Service" + if (sc.exe qc docker | where-object {$_ -like '*-D*'}){ + sc.exe config docker binPath= "C:\Program Files\Docker\dockerd.exe --run-service" + Restart-service Docker + Write-Host "OK" -foregroundcolor "green" + } + else { + Write-Host "Debug mode already disabled" -foregroundcolor "yellow" + } + } + catch { + Write-Error "Failed to disable debug mode" + Break + } +} + +Function cleanup{ + Write-Host "Cleaning up directory" + Remove-Item -Recurse -Force $basedir -ErrorAction Ignore + Write-Host "OK" -foregroundcolor green +} + +Function pack{ + try { + Write-Host "Archiving gathered data" + Compress-Archive -Path $infodir\* -CompressionLevel Optimal -DestinationPath $basedir\$outfilename + Remove-Item -Recurse -Force $infodir -ErrorAction Ignore + Write-Host "Done... your bundled logs are located in " $basedir\$outfilename + } + catch { + Write-Error "Unable to archive data" + Break + } +} + +Function init{ + is_elevated + create_working_dir + get_sysinfo +} + +Function collect{ + init + is_diskfull + get_system_logs + get_application_logs + get_volumes_info + get_firewall_info + get_softwarelist + get_system_services + get_docker_info + get_k8s_info + get_docker_logs + get_eks_logs + get_network_info + +} + +Function enable_debug{ + enable_docker_debug +} + +Function disable_debug{ + disable_docker_debug +} + +if ($RunMode -eq "Collect"){ + Write-Host "Running Default(Collect) Mode" -foregroundcolor "blue" + cleanup + collect + pack +} elseif ($RunMode -eq "EnableDebug"){ + Write-Host "Enabling Debug for Docker" -foregroundcolor "blue" + enable_debug +} elseif ($RunMode -eq "DisableDebug"){ + Write-Host "Disabling Debug for Docker" -foregroundcolor "blue" + disable_debug +} else { + Write-Host "You need to specify either Collect, EnableDebug or DisableDebug RunMode" -ForegroundColor "red" + Break +} diff --git a/log-collector-script/windows/eks-ssm-content.json b/log-collector-script/windows/eks-ssm-content.json new file mode 100644 index 000000000..c2f4f4ed5 --- /dev/null +++ b/log-collector-script/windows/eks-ssm-content.json @@ -0,0 +1,81 @@ +{ + "schemaVersion":"2.2", + "description":"EKS Log Collector", + "parameters":{ + "bucketName":{ + "type": "String", + "default": "Enabled" + } + }, + "mainSteps":[ + { + "action": "aws:runShellScript", + "name": "PatchLinux", + "precondition": { + "StringEquals": [ + "platformType", + "Linux" + ] + }, + "inputs": { + "runCommand": [ + "curl -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-log-collector.sh", + "bash ./eks-log-collector.sh >/dev/null 2>&1", + "echo \"EKS logs collected\"", + "if [ -f /usr/local/bin/aws ]; then", + "echo \"AWS_already_installed\"", + "else", + "echo \"Installing AWSCLI\"", + "curl \"https://s3.amazonaws.com/aws-cli/awscli-bundle.zip\" -o \"awscli-bundle.zip\" >/dev/null 2>&1", + "yum install unzip -y >/dev/null 2>&1", + "unzip awscli-bundle.zip >/dev/null 2>&1", + "./awscli-bundle/install -i /usr/local/aws -b /usr/local/bin/aws", + "echo \"AWSCLI version is\"", + "/usr/local/bin/aws --version", + "fi", + "echo \"Pushing to S3\"", + "/usr/local/bin/aws s3 cp --recursive /opt/log-collector/ s3://{{bucketName}}", + "echo \"Logs uploaded to S3\"" + ] + } + }, + { + "precondition": { + "StringEquals": [ + "platformType", + "Windows" + ] + }, + "action": "aws:runPowerShellScript", + "name": "PatchWindows", + "inputs": { + "runCommand": [ + "if (!(Get-Module 'AWSPowerShell')) { ", + " Write-Host 'AWSPowerShell does not exist' ", + " Install-Module -Name AWSPowerShell -Force ", + "} ", + "try { ", + " Write-Host 'Downloading EKS Log collector script' ", + " Invoke-WebRequest -UseBasicParsing 'https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/windows/eks-log-collector.ps1' -OutFile eks-log-collector.ps1 ", + "} ", + "catch { ", + " Write-Host 'Uploaded to S3 failed' ", + " break ", + "} ", + "powershell .\\eks-log-collector.ps1", + "try { ", + " Write-Host 'Pushing to S3' ", + " Write-S3Object -BucketName {{bucketName}} -Folder C:\\log-collector -KeyPrefix eks-log-collector\\ -SearchPattern *.zip ", + " Write-Host 'Logs uploaded to S3' ", + "} ", + "catch { ", + " Write-Host 'Uploaded to S3 failed' ", + " break ", + "} ", + "", + "Remove-Item -Force .\\eks-log-collector.ps1 -ErrorAction Ignore " + ] + } + } + ] +} \ No newline at end of file From b5c3f953b03433a4687ce9b58a10676c865c695e Mon Sep 17 00:00:00 2001 From: M00nF1sh Date: Thu, 16 Jan 2020 11:26:45 -0800 Subject: [PATCH 022/621] add ability to specify aws_region & binary_bucket_region & source_ami_owners (#396) --- Makefile | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index e2f5f75b2..edc36a766 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,11 @@ PACKER_BINARY ?= packer -PACKER_VARIABLES := ami_name binary_bucket_name kubernetes_version kubernetes_build_date docker_version cni_version cni_plugin_version source_ami_id arch instance_type -AWS_DEFAULT_REGION ?= us-west-2 +PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date docker_version cni_version cni_plugin_version source_ami_id source_ami_owners arch instance_type K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) +aws_region ?= $(AWS_DEFAULT_REGION) +binary_bucket_region ?= $(AWS_DEFAULT_REGION) ami_name ?= amazon-eks-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') arch ?= x86_64 ifeq ($(arch), arm64) @@ -13,6 +14,10 @@ else instance_type ?= m4.large endif +ifeq ($(aws_region), cn-northwest-1) +source_ami_owners ?= 141808717104 +endif + T_RED := \e[0;31m T_GREEN := \e[0;32m T_YELLOW := \e[0;33m From 7695621fce7e2400eec946bafb3fca7ce521f4fd Mon Sep 17 00:00:00 2001 From: M00nF1sh Date: Tue, 21 Jan 2020 11:57:19 -0800 Subject: [PATCH 023/621] adding support for china regions (#398) --- amazon-eks-nodegroup.yaml | 15 ++++++++++----- files/bootstrap.sh | 8 +++++++- files/kubelet-kubeconfig | 2 ++ install-worker.sh | 8 ++++---- 4 files changed, 23 insertions(+), 10 deletions(-) diff --git a/amazon-eks-nodegroup.yaml b/amazon-eks-nodegroup.yaml index 3016f40d2..4f4c6cede 100644 --- a/amazon-eks-nodegroup.yaml +++ b/amazon-eks-nodegroup.yaml @@ -353,12 +353,17 @@ Parameters: Type: "AWS::EC2::VPC::Id" Description: The VPC of the worker instances +Mappings: + PartitionMap: + aws: + EC2ServicePrincipal: "ec2.amazonaws.com" + aws-cn: + EC2ServicePrincipal: "ec2.amazonaws.com.cn" Conditions: HasNodeImageId: !Not - "Fn::Equals": - Ref: NodeImageId - "" - Resources: NodeInstanceRole: Type: "AWS::IAM::Role" @@ -369,13 +374,13 @@ Resources: - Effect: Allow Principal: Service: - - ec2.amazonaws.com + - !FindInMap [PartitionMap, !Ref "AWS::Partition", EC2ServicePrincipal] Action: - "sts:AssumeRole" ManagedPolicyArns: - - "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy" - - "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy" - - "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly" + - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonEKSWorkerNodePolicy" + - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonEKS_CNI_Policy" + - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly" Path: / NodeInstanceProfile: diff --git a/files/bootstrap.sh b/files/bootstrap.sh index a86076a04..262faba9b 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -113,6 +113,10 @@ function get_pause_container_account_for_region () { echo "${PAUSE_CONTAINER_ACCOUNT:-800184023465}";; me-south-1) echo "${PAUSE_CONTAINER_ACCOUNT:-558608220178}";; + cn-north-1) + echo "${PAUSE_CONTAINER_ACCOUNT:-918309763551}";; + cn-northwest-1) + echo "${PAUSE_CONTAINER_ACCOUNT:-961992271922}";; *) echo "${PAUSE_CONTAINER_ACCOUNT:-602401143452}";; esac @@ -210,6 +214,7 @@ fi ZONE=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone) AWS_DEFAULT_REGION=$(echo $ZONE | awk '{print substr($0, 1, length($0)-1)}') +AWS_SERVICES_DOMAIN=$(curl -s http://169.254.169.254/2018-09-24/meta-data/services/domain) MACHINE=$(uname -m) if [ "$MACHINE" == "x86_64" ]; then @@ -222,7 +227,7 @@ else fi PAUSE_CONTAINER_ACCOUNT=$(get_pause_container_account_for_region "${AWS_DEFAULT_REGION}") -PAUSE_CONTAINER_IMAGE=${PAUSE_CONTAINER_IMAGE:-$PAUSE_CONTAINER_ACCOUNT.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/eks/pause-${ARCH}} +PAUSE_CONTAINER_IMAGE=${PAUSE_CONTAINER_IMAGE:-$PAUSE_CONTAINER_ACCOUNT.dkr.ecr.$AWS_DEFAULT_REGION.$AWS_SERVICES_DOMAIN/eks/pause-${ARCH}} PAUSE_CONTAINER="$PAUSE_CONTAINER_IMAGE:$PAUSE_CONTAINER_VERSION" ### kubelet kubeconfig @@ -266,6 +271,7 @@ echo $B64_CLUSTER_CA | base64 -d > $CA_CERTIFICATE_FILE_PATH sed -i s,CLUSTER_NAME,$CLUSTER_NAME,g /var/lib/kubelet/kubeconfig sed -i s,MASTER_ENDPOINT,$APISERVER_ENDPOINT,g /var/lib/kubelet/kubeconfig +sed -i s,AWS_REGION,$AWS_DEFAULT_REGION,g /var/lib/kubelet/kubeconfig ### kubelet.service configuration if [ -z ${DNS_CLUSTER_IP+x} ]; then diff --git a/files/kubelet-kubeconfig b/files/kubelet-kubeconfig index 8c1436883..91c25cf29 100644 --- a/files/kubelet-kubeconfig +++ b/files/kubelet-kubeconfig @@ -21,3 +21,5 @@ users: - "token" - "-i" - "CLUSTER_NAME" + - --region + - "AWS_REGION" \ No newline at end of file diff --git a/install-worker.sh b/install-worker.sh index 25acb7330..3ae8d78cf 100644 --- a/install-worker.sh +++ b/install-worker.sh @@ -157,11 +157,11 @@ sudo tar -xvf cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz -C /opt/cni/bin rm cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz.sha512 echo "Downloading binaries from: s3://$BINARY_BUCKET_NAME" -S3_DOMAIN="s3-$BINARY_BUCKET_REGION" -if [ "$BINARY_BUCKET_REGION" = "us-east-1" ]; then - S3_DOMAIN="s3" +S3_DOMAIN="amazonaws.com" +if [ "$BINARY_BUCKET_REGION" = "cn-north-1" ] || [ "$BINARY_BUCKET_REGION" = "cn-northwest-1" ]; then + S3_DOMAIN="amazonaws.com.cn" fi -S3_URL_BASE="https://$BINARY_BUCKET_NAME.$S3_DOMAIN.amazonaws.com/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/linux/$ARCH" +S3_URL_BASE="https://$BINARY_BUCKET_NAME.s3.$BINARY_BUCKET_REGION.$S3_DOMAIN/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/linux/$ARCH" S3_PATH="s3://$BINARY_BUCKET_NAME/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/linux/$ARCH" BINARIES=( From 128a7149b6b50f0392d9a6951e24bd30f19f8e2a Mon Sep 17 00:00:00 2001 From: Justin Owen Date: Tue, 21 Jan 2020 21:36:55 -0800 Subject: [PATCH 024/621] kubelet.service should wait for iptables lock (#401) This commit makes kubelet.service wait up to 5 seconds for an iptables lock in the `ExecStartPre` step, instead of failing immediately if something else is holding the lock. --- files/kubelet.service | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/kubelet.service b/files/kubelet.service index 125e06798..ae33350f1 100644 --- a/files/kubelet.service +++ b/files/kubelet.service @@ -5,7 +5,7 @@ After=docker.service Requires=docker.service [Service] -ExecStartPre=/sbin/iptables -P FORWARD ACCEPT +ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 ExecStart=/usr/bin/kubelet --cloud-provider aws \ --config /etc/kubernetes/kubelet/kubelet-config.json \ --allow-privileged=true \ From 2edc668badc2ef0cc6269373e359741e40ed0c27 Mon Sep 17 00:00:00 2001 From: M00nF1sh Date: Tue, 21 Jan 2020 21:38:51 -0800 Subject: [PATCH 025/621] fix tls suit to be recommended by cis bench (#403) --- files/kubelet-config.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/kubelet-config.json b/files/kubelet-config.json index e1f051d93..af57dbf90 100644 --- a/files/kubelet-config.json +++ b/files/kubelet-config.json @@ -30,5 +30,5 @@ }, "serializeImagePulls": false, "serverTLSBootstrap": true, - "tlsCipherSuites": ["TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256"] + "tlsCipherSuites": ["TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256", "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256", "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305", "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384", "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305", "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384", "TLS_RSA_WITH_AES_256_GCM_SHA384", "TLS_RSA_WITH_AES_128_GCM_SHA256"] } From f80c682680dfe2f2ffd65dde01e98da198e84c1f Mon Sep 17 00:00:00 2001 From: Aaron Ackerman Date: Thu, 23 Jan 2020 11:09:25 -0600 Subject: [PATCH 026/621] Fix retries in bootstrap.sh If `aws eks describe-cluster` fails the first time, the retries never work because the `rc` value is never able to be set back to zero --- files/bootstrap.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 262faba9b..9cbac1780 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -237,9 +237,10 @@ CA_CERTIFICATE_FILE_PATH=$CA_CERTIFICATE_DIRECTORY/ca.crt mkdir -p $CA_CERTIFICATE_DIRECTORY if [[ -z "${B64_CLUSTER_CA}" ]] && [[ -z "${APISERVER_ENDPOINT}" ]]; then DESCRIBE_CLUSTER_RESULT="/tmp/describe_cluster_result.txt" - rc=0 - # Retry the DescribleCluster API for API_RETRY_ATTEMPTS + + # Retry the DescribeCluster API for API_RETRY_ATTEMPTS for attempt in `seq 0 $API_RETRY_ATTEMPTS`; do + rc=0 if [[ $attempt -gt 0 ]]; then echo "Attempt $attempt of $API_RETRY_ATTEMPTS" fi From 9a8d80a65a813640b19ed9d1b6b44faf24b5a0da Mon Sep 17 00:00:00 2001 From: M00nF1sh Date: Mon, 27 Jan 2020 09:21:47 -0800 Subject: [PATCH 027/621] update binaries to use latest ones (#408) --- Makefile | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index edc36a766..90467fa18 100644 --- a/Makefile +++ b/Makefile @@ -37,18 +37,14 @@ k8s: validate # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html -.PHONY: 1.11 -1.11: - $(MAKE) k8s kubernetes_version=1.11.10 kubernetes_build_date=2019-08-14 - .PHONY: 1.12 1.12: - $(MAKE) k8s kubernetes_version=1.12.10 kubernetes_build_date=2019-08-14 + $(MAKE) k8s kubernetes_version=1.12.10 kubernetes_build_date=2020-01-22 .PHONY: 1.13 1.13: - $(MAKE) k8s kubernetes_version=1.13.8 kubernetes_build_date=2019-08-14 + $(MAKE) k8s kubernetes_version=1.13.12 kubernetes_build_date=2020-01-22 .PHONY: 1.14 1.14: - $(MAKE) k8s kubernetes_version=1.14.6 kubernetes_build_date=2019-08-22 + $(MAKE) k8s kubernetes_version=1.14.9 kubernetes_build_date=2020-01-22 \ No newline at end of file From 3bb08b6b574a4a0cf7d4c766d080511d71182623 Mon Sep 17 00:00:00 2001 From: M00nF1sh Date: Wed, 29 Jan 2020 00:14:14 -0800 Subject: [PATCH 028/621] validate_yum (#411) --- Makefile | 6 +-- eks-worker-al2.json | 23 +++++++++++- scripts/cleanup_additional_repos.sh | 27 ++++++++++++++ .../install-worker.sh | 0 scripts/install_additional_repos.sh | 37 +++++++++++++++++++ scripts/validate.sh | 36 ++++++++++++++++++ 6 files changed, 124 insertions(+), 5 deletions(-) create mode 100644 scripts/cleanup_additional_repos.sh rename install-worker.sh => scripts/install-worker.sh (100%) create mode 100644 scripts/install_additional_repos.sh create mode 100644 scripts/validate.sh diff --git a/Makefile b/Makefile index 90467fa18..2a2fa3a8e 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ PACKER_BINARY ?= packer -PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date docker_version cni_version cni_plugin_version source_ami_id source_ami_owners arch instance_type +PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date docker_version cni_version cni_plugin_version source_ami_id source_ami_owners arch instance_type additional_yum_repos K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) @@ -28,12 +28,12 @@ all: 1.11 1.12 1.13 1.14 .PHONY: validate validate: - $(PACKER_BINARY) validate $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)=$($(packerVar)),)) eks-worker-al2.json + $(PACKER_BINARY) validate $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json .PHONY: k8s k8s: validate @echo "$(T_GREEN)Building AMI for version $(T_YELLOW)$(kubernetes_version)$(T_GREEN) on $(T_YELLOW)$(arch)$(T_RESET)" - $(PACKER_BINARY) build $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)=$($(packerVar)),)) eks-worker-al2.json + $(PACKER_BINARY) build $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 86d0cb400..bf360d91e 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -29,8 +29,9 @@ "ssh_username": "ec2-user", "temporary_security_group_source_cidrs": "", "associate_public_ip_address": "", - "subnet_id": "" + "subnet_id": "", + "additional_yum_repos": "" }, "builders": [ @@ -101,9 +102,16 @@ "source": "{{template_dir}}/files/", "destination": "/tmp/worker/" }, + { + "type": "shell", + "script": "{{template_dir}}/scripts/install_additional_repos.sh", + "environment_vars": [ + "ADDITIONAL_YUM_REPOS={{user `additional_yum_repos`}}" + ] + }, { "type": "shell", - "script": "{{template_dir}}/install-worker.sh", + "script": "{{template_dir}}/scripts/install-worker.sh", "environment_vars": [ "KUBERNETES_VERSION={{user `kubernetes_version`}}", "KUBERNETES_BUILD_DATE={{user `kubernetes_build_date`}}", @@ -116,6 +124,17 @@ "AWS_SECRET_ACCESS_KEY={{user `aws_secret_access_key`}}", "AWS_SESSION_TOKEN={{user `aws_session_token`}}" ] + }, + { + "type": "shell", + "script": "{{template_dir}}/scripts/cleanup_additional_repos.sh", + "environment_vars": [ + "ADDITIONAL_YUM_REPOS={{user `additional_yum_repos`}}" + ] + }, + { + "type": "shell", + "script": "{{template_dir}}/scripts/validate.sh" } ], "post-processors": [ diff --git a/scripts/cleanup_additional_repos.sh b/scripts/cleanup_additional_repos.sh new file mode 100644 index 000000000..e2665b484 --- /dev/null +++ b/scripts/cleanup_additional_repos.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +# +# Clean up additional YUM repositories, typically used for security patches. +# The format of ADDITIONAL_YUM_REPOS is: "repo=patches-repo,name=Install patches,baseurl=http://amazonlinux.$awsregion.$awsdomain/xxxx,priority=10" +# Multiple yum repos can be specified, separated by ';' + +if [ -z "${ADDITIONAL_YUM_REPOS}" ]; then + echo "no additional yum repo, skipping" + exit 0 +fi + + +AWK_CMD=' +BEGIN {RS=";";FS=","} +{ + delete vars; + for(i = 1; i <= NF; ++i) { + n = index($i, "="); + if(n) { + vars[substr($i, 1, n-1)] = substr($i, n + 1) + } + } + Repo = "/etc/yum.repos.d/"vars["repo"]".repo" +} +{cmd="rm -f " Repo; system(cmd)} +' +sudo awk "$AWK_CMD" <<< "${ADDITIONAL_YUM_REPOS}" \ No newline at end of file diff --git a/install-worker.sh b/scripts/install-worker.sh similarity index 100% rename from install-worker.sh rename to scripts/install-worker.sh diff --git a/scripts/install_additional_repos.sh b/scripts/install_additional_repos.sh new file mode 100644 index 000000000..afe7efed2 --- /dev/null +++ b/scripts/install_additional_repos.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# +# Install additional YUM repositories, typically used for security patches. +# The format of ADDITIONAL_YUM_REPOS is: "repo=patches-repo,name=Install patches,baseurl=http://amazonlinux.$awsregion.$awsdomain/xxxx,priority=10" +# which will create the file '/etc/yum.repos.d/patches-repo.repo' having the following content: +# ``` +# [patches-repo] +# name=Install patches +# baseurl=http://amazonlinux.$awsregion.$awsdomain/xxxx +# priority=10 +# ``` +# Note that priority is optional, but the other parameters are required. Multiple yum repos can be specified, each one separated by ';' + +if [ -z "${ADDITIONAL_YUM_REPOS}" ]; then + echo "no additional yum repo, skipping" + exit 0 +fi + + +AWK_CMD=' +BEGIN {RS=";";FS=","} +{ + delete vars; + for(i = 1; i <= NF; ++i) { + n = index($i, "="); + if(n) { + vars[substr($i, 1, n-1)] = substr($i, n + 1) + } + } + Repo = "/etc/yum.repos.d/"vars["repo"]".repo" +} +{print "["vars["repo"]"]" > Repo} +{print "name="vars["name"] > Repo} +{print "baseurl="vars["baseurl"] > Repo} +{if (length(vars["priority"]) != 0) print "priority="vars["priority"] > Repo} +' +sudo awk "$AWK_CMD" <<< "${ADDITIONAL_YUM_REPOS}" \ No newline at end of file diff --git a/scripts/validate.sh b/scripts/validate.sh new file mode 100644 index 000000000..cc6e22e8e --- /dev/null +++ b/scripts/validate.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# +# Do basic validation of the generated AMI + +# Validates that a file or blob doesn't exist +# +# Arguments: +# a file name or blob +# Returns: +# 1 if a file exists, after printing an error +validate_file_nonexists() { + local file_blob=$1 + for f in $file_blob; do + if [ -e "$f" ]; then + echo "$f shouldn't exists" + exit 1 + fi + done +} + +validate_file_nonexists '/etc/hostname' +validate_file_nonexists '/etc/resolv.conf' +validate_file_nonexists '/etc/ssh/ssh_host*' +validate_file_nonexists '/home/ec2-user/.ssh/authorized_keys' +validate_file_nonexists '/root/.ssh/authorized_keys' +validate_file_nonexists '/var/lib/cloud/data' +validate_file_nonexists '/var/lib/cloud/instance' +validate_file_nonexists '/var/lib/cloud/instances' +validate_file_nonexists '/var/lib/cloud/sem' +validate_file_nonexists '/var/lib/dhclient/*' +validate_file_nonexists '/var/lib/dhcp/dhclient.*' +validate_file_nonexists '/var/lib/yum/history' +validate_file_nonexists '/var/log/cloud-init-output.log' +validate_file_nonexists '/var/log/cloud-init.log' +validate_file_nonexists '/var/log/secure' +validate_file_nonexists '/var/log/wtmp' From e8f2a2eeb7f917e135f43e9f24d651ea460129b5 Mon Sep 17 00:00:00 2001 From: M00nF1sh Date: Wed, 29 Jan 2020 13:04:08 -0800 Subject: [PATCH 029/621] add ability to use precreated security group (#412) --- Makefile | 2 +- eks-worker-al2.json | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2a2fa3a8e..15f0d9d6e 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ PACKER_BINARY ?= packer -PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date docker_version cni_version cni_plugin_version source_ami_id source_ami_owners arch instance_type additional_yum_repos +PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date docker_version cni_version cni_plugin_version source_ami_id source_ami_owners arch instance_type security_group_id additional_yum_repos K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index bf360d91e..53c30f636 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -28,6 +28,7 @@ "ssh_interface": "", "ssh_username": "ec2-user", "temporary_security_group_source_cidrs": "", + "security_group_id": "", "associate_public_ip_address": "", "subnet_id": "", @@ -70,6 +71,7 @@ "ssh_username": "{{user `ssh_username`}}", "ssh_interface": "{{user `ssh_interface`}}", "temporary_security_group_source_cidrs": "{{user `temporary_security_group_source_cidrs`}}", + "security_group_id": "{{user `security_group_id`}}", "associate_public_ip_address": "{{user `associate_public_ip_address`}}", "ssh_pty": true, "encrypt_boot": "{{user `encrypted`}}", From 126dad842607cb1c8600402a99dc483eb140985b Mon Sep 17 00:00:00 2001 From: M00nF1sh Date: Fri, 31 Jan 2020 09:23:36 -0800 Subject: [PATCH 030/621] add scripts folder (#413) --- ArchiveBuildConfig.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ArchiveBuildConfig.yaml b/ArchiveBuildConfig.yaml index f3631b356..e6663a0fa 100644 --- a/ArchiveBuildConfig.yaml +++ b/ArchiveBuildConfig.yaml @@ -8,10 +8,10 @@ dependencies: source: dirs: - src: files/ + - src: scripts/ files: - src: Makefile - src: eks-worker-al2.json - - src: install-worker.sh - src: amazon-eks-nodegroup.yaml archive: name: amazon-eks-ami.tar.gz From 2c0d75154226ddce8b9aa3cebce51e65bb19d9d3 Mon Sep 17 00:00:00 2001 From: Tam Mach Date: Thu, 20 Feb 2020 14:47:34 +1100 Subject: [PATCH 031/621] Remove invalid target 1.11 (#421) Currently, AWS EKS is no longer support Kubernetes 1.11 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 15f0d9d6e..4f9bba762 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.11 1.12 1.13 1.14 +all: 1.12 1.13 1.14 .PHONY: validate validate: From 1b3642c730cb72f291e2f055254cd443a98fa9f5 Mon Sep 17 00:00:00 2001 From: zadowsmash Date: Mon, 24 Feb 2020 09:59:24 +1100 Subject: [PATCH 032/621] Update install-worker.sh and eks-worker-al2.json (#402) * Update install-worker.sh and eks-worker-al2.json * Update kubelet.service * added ability to share amis in builder * added ability to share amis in builder * Rebasing from master * Added remote_folder to cleanup_additional_repos.sh provisioner * Added remote_folder to install_additional_repos.sh provisioner * Added remote_folder to validate.sh provisioner --- eks-worker-al2.json | 13 +++++++++++-- files/1.14/kubelet.service | 2 +- scripts/install-worker.sh | 2 +- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 53c30f636..06862b4fc 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -31,7 +31,9 @@ "security_group_id": "", "associate_public_ip_address": "", "subnet_id": "", - + "remote_folder": "", + "launch_block_device_mappings_volume_size": "4", + "ami_users": "", "additional_yum_repos": "" }, @@ -40,6 +42,8 @@ "type": "amazon-ebs", "region": "{{user `aws_region`}}", "source_ami": "{{user `source_ami_id`}}", + "ami_users": "{{user `ami_users`}}", + "snapshot_users": "{{user `ami_users`}}", "source_ami_filter": { "filters": { "name": "{{user `source_ami_filter_name`}}", @@ -56,7 +60,7 @@ { "device_name": "/dev/xvda", "volume_type": "gp2", - "volume_size": 4, + "volume_size": "{{user `launch_block_device_mappings_volume_size`}}", "delete_on_termination": true } ], @@ -97,6 +101,7 @@ "provisioners": [ { "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", "inline": ["mkdir -p /tmp/worker/"] }, { @@ -106,6 +111,7 @@ }, { "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", "script": "{{template_dir}}/scripts/install_additional_repos.sh", "environment_vars": [ "ADDITIONAL_YUM_REPOS={{user `additional_yum_repos`}}" @@ -113,6 +119,7 @@ }, { "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", "script": "{{template_dir}}/scripts/install-worker.sh", "environment_vars": [ "KUBERNETES_VERSION={{user `kubernetes_version`}}", @@ -129,6 +136,7 @@ }, { "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", "script": "{{template_dir}}/scripts/cleanup_additional_repos.sh", "environment_vars": [ "ADDITIONAL_YUM_REPOS={{user `additional_yum_repos`}}" @@ -136,6 +144,7 @@ }, { "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", "script": "{{template_dir}}/scripts/validate.sh" } ], diff --git a/files/1.14/kubelet.service b/files/1.14/kubelet.service index 28a3b062a..358b659ae 100644 --- a/files/1.14/kubelet.service +++ b/files/1.14/kubelet.service @@ -5,7 +5,7 @@ After=docker.service Requires=docker.service [Service] -ExecStartPre=/sbin/iptables -P FORWARD ACCEPT +ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 ExecStart=/usr/bin/kubelet --cloud-provider aws \ --config /etc/kubernetes/kubelet/kubelet-config.json \ --kubeconfig /var/lib/kubelet/kubeconfig \ diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 3ae8d78cf..db66f39b5 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -225,7 +225,7 @@ BUILD_KERNEL="$(uname -r)" ARCH="$(uname -m)" EOF sudo mv /tmp/release /etc/eks/release -sudo chown root:root /etc/eks/* +sudo chown -R root:root /etc/eks ################################################################################ ### Cleanup #################################################################### From 7a53fbb2f2fc001a659363862a05f16e94fb6d2f Mon Sep 17 00:00:00 2001 From: Murcherla Date: Fri, 6 Mar 2020 16:07:38 -0600 Subject: [PATCH 033/621] Remove mutating calls and ignore collection of unknown logs --- log-collector-script/linux/README.md | 7 +- .../linux/eks-log-collector.sh | 96 +++++-------------- log-collector-script/windows/README.md | 13 +-- .../windows/eks-log-collector.ps1 | 62 ++---------- 4 files changed, 30 insertions(+), 148 deletions(-) diff --git a/log-collector-script/linux/README.md b/log-collector-script/linux/README.md index 3811bc814..f9032aaca 100644 --- a/log-collector-script/linux/README.md +++ b/log-collector-script/linux/README.md @@ -21,14 +21,9 @@ The project can be used in normal or enable_debug(**Caution: enable_debug will p ``` # sudo bash eks-log-collector.sh --help -USAGE: eks-log-collector --help [ --mode=collect|enable_debug --ignore_introspection=true|false --ignore_metrics=true|false ] +USAGE: eks-log-collector --help [ --ignore_introspection=true|false --ignore_metrics=true|false ] OPTIONS: - --mode Has two parameters 1) collect or 2) enable_debug,: - collect Gathers basic operating system, Docker daemon, and - Amazon EKS related config files and logs. This is the default mode. - enable_debug Enables debug mode for the Docker daemon(Not for production use) - --ignore_introspection To ignore introspection of IPAMD; Pass this flag if DISABLE_INTROSPECTION is enabled on CNI --ignore_metrics To ignore prometheus metrics collection; Pass this flag if DISABLE_METRICS enabled on CNI diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index f22907f9e..9a1813293 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -21,7 +21,7 @@ export LANG="C" export LC_ALL="C" # Global options -readonly PROGRAM_VERSION="0.5.2" +readonly PROGRAM_VERSION="0.6.0" readonly PROGRAM_SOURCE="https://github.com/awslabs/amazon-eks-ami/blob/master/log-collector-script/" readonly PROGRAM_NAME="$(basename "$0" .sh)" readonly PROGRAM_DIR="/opt/log-collector" @@ -32,7 +32,6 @@ INIT_TYPE="" PACKAGE_TYPE="" # Script run defaults -mode='collect' ignore_introspection='false' ignore_metrics='false' @@ -85,13 +84,9 @@ IPAMD_DATA=( help() { echo "" - echo "USAGE: ${PROGRAM_NAME} --help [ --mode=collect|enable_debug --ignore_introspection=true|false --ignore_metrics=true|false ]" + echo "USAGE: ${PROGRAM_NAME} --help [ --ignore_introspection=true|false --ignore_metrics=true|false ]" echo "" echo "OPTIONS:" - echo " --mode Has two parameters 1) collect or 2) enable_debug,:" - echo " collect Gathers basic operating system, Docker daemon, and" - echo " Amazon EKS related config files and logs. This is the default mode." - echo " enable_debug Enables debug mode for the Docker daemon(Not for production use)" echo "" echo " --ignore_introspection To ignore introspection of IPAMD; Pass this flag if DISABLE_INTROSPECTION is enabled on CNI" echo "" @@ -110,9 +105,6 @@ parse_options() { val="$(echo "${arg}" | awk -F '=' '{print $2}')" case "${param}" in - mode) - eval "${param}"="${val}" - ;; ignore_introspection) eval "${param}"="${val}" ;; @@ -169,7 +161,6 @@ version_output() { } log_parameters() { - echo mode: "${mode}" >> "${COLLECT_DIR}"/system/script-params.txt echo ignore_introspection: "${ignore_introspection}" >> "${COLLECT_DIR}"/system/script-params.txt echo ignore_metrics: "${ignore_metrics}" >> "${COLLECT_DIR}"/system/script-params.txt } @@ -250,11 +241,6 @@ collect() { get_docker_logs } -enable_debug() { - init - enable_docker_debug -} - pack() { try "archive gathered information" @@ -264,10 +250,8 @@ pack() { } finished() { - if [[ "${mode}" == "collect" ]]; then - cleanup - echo -e "\n\tDone... your bundled logs are located in ${PROGRAM_DIR}/eks_${INSTANCE_ID}_$(date --utc +%Y-%m-%d_%H%M-%Z)_${PROGRAM_VERSION}.tar.gz\n" - fi + cleanup + echo -e "\n\tDone... your bundled logs are located in ${PROGRAM_DIR}/eks_${INSTANCE_ID}_$(date --utc +%Y-%m-%d_%H%M-%Z)_${PROGRAM_VERSION}.tar.gz\n" } get_mounts_info() { @@ -316,6 +300,18 @@ get_common_logs() { tail -c 10M /var/log/messages > "${COLLECT_DIR}"/var_log/messages continue fi + if [[ "${entry}" == "containers" ]]; then + cp --force --recursive /var/log/containers/aws-node* "${COLLECT_DIR}"/var_log/ + cp --force --recursive /var/log/containers/coredns-* "${COLLECT_DIR}"/var_log/ + cp --force --recursive /var/log/containers/kube-proxy* "${COLLECT_DIR}"/var_log/ + continue + fi + if [[ "${entry}" == "pods" ]]; then + cp --force --recursive /var/log/pods/kube-system_aws-node* "${COLLECT_DIR}"/var_log/ + cp --force --recursive /var/log/pods//kube-system_coredns* "${COLLECT_DIR}"/var_log/ + cp --force --recursive /var/log/pods/kube-system_kube-proxy* "${COLLECT_DIR}"/var_log/ + continue + fi cp --force --recursive --dereference /var/log/"${entry}" "${COLLECT_DIR}"/var_log/ fi done @@ -525,59 +521,11 @@ get_docker_info() { ok } -enable_docker_debug() { - try "enable debug mode for the Docker daemon" - - case "${PACKAGE_TYPE}" in - rpm) - - if [[ -e /etc/sysconfig/docker ]] && grep -q "^\s*OPTIONS=\"-D" /etc/sysconfig/docker - then - echo "Debug mode is already enabled." - ok - else - if [[ -e /etc/sysconfig/docker ]]; then - echo "OPTIONS=\"-D \$OPTIONS\"" >> /etc/sysconfig/docker - - try "restart Docker daemon to enable debug mode" - service docker restart - ok - fi - fi - ;; - *) - warning "The current operating system is not supported." - - ok - ;; - esac -} - -confirm_enable_docker_debug() { - read -r -p "${1:-Enabled Docker Debug will restart the Docker Daemon and restart all running container. Are you sure? [y/N]} " USER_INPUT - case "$USER_INPUT" in - [yY][eE][sS]|[yY]) - enable_docker_debug - ;; - *) - die "\"No\" was selected." - ;; - esac -} - +# ----------------------------------------------------------------------------- +# Entrypoint parse_options "$@" -case "${mode}" in - collect) - collect - pack - finished - ;; - enable_debug) - confirm_enable_docker_debug - finished - ;; - *) - help && exit 1 - ;; -esac +collect +pack +finished + diff --git a/log-collector-script/windows/README.md b/log-collector-script/windows/README.md index e7d12cd07..b9a1612b8 100644 --- a/log-collector-script/windows/README.md +++ b/log-collector-script/windows/README.md @@ -15,18 +15,7 @@ Invoke-WebRequest -OutFile eks-log-collector.ps1 https://raw.githubusercontent.c The project can be used in normal or Enable/Disable Debug(**Caution: Enable/Disable Debug will restart Docker daemon which would kill running containers**). ``` -# eks-log-collector.ps1 -USAGE: eks-log-collector [ -RunMode =Collect|EnableDebug|DisableDebug ] - -OPTIONS: - -RunMode Has three parameters 1) Collect, 2) EnableDebug 3) DisableDebug: - Collect Gathers basic operating system, Docker daemon, and - Amazon EKS related config files and logs. This is the default mode. - EnableDebug Enables debug mode for Docker daemon (Not for production use) - DisableDebug Disable debug mode for Docker daemon - -Enables debug mode for the Docker daemon: -eks-log-collector.ps1 -RunMode EnableDebug +USAGE: .\eks-log-collector.ps1 ``` #### Example output in normal mode The following output shows this project running in normal mode. diff --git a/log-collector-script/windows/eks-log-collector.ps1 b/log-collector-script/windows/eks-log-collector.ps1 index feb173efb..2d538376d 100644 --- a/log-collector-script/windows/eks-log-collector.ps1 +++ b/log-collector-script/windows/eks-log-collector.ps1 @@ -17,9 +17,6 @@ .EXAMPLE eks-log-collector.ps1 Gather basic operating system, Docker daemon, and kubelet logs. -.EXAMPLE - eks-log-collector.ps1 -RunMode EnableDebug - Enables debug mode for the Docker daemon. #> @@ -279,41 +276,6 @@ Function get_network_info{ Break } } -Function enable_docker_debug{ - try { - Write-Host "Enabling debug mode for the Docker Service" - if (sc.exe qc docker | where-object {$_ -like '*-D*'}){ - Write-Host "Debug mode already enabled" -foregroundcolor "yellow" - } - else { - sc.exe config docker binPath= "C:\Program Files\Docker\dockerd.exe --run-service -D" - Restart-service Docker - Write-Host "OK" -foregroundcolor "green" - } - } - catch { - Write-Error "Failed to enable debug mode" - Break - } -} - -Function disable_docker_debug{ - try { - Write-Host "Disabling debug mode for the Docker Service" - if (sc.exe qc docker | where-object {$_ -like '*-D*'}){ - sc.exe config docker binPath= "C:\Program Files\Docker\dockerd.exe --run-service" - Restart-service Docker - Write-Host "OK" -foregroundcolor "green" - } - else { - Write-Host "Debug mode already disabled" -foregroundcolor "yellow" - } - } - catch { - Write-Error "Failed to disable debug mode" - Break - } -} Function cleanup{ Write-Host "Cleaning up directory" @@ -357,26 +319,14 @@ Function collect{ } -Function enable_debug{ - enable_docker_debug -} - -Function disable_debug{ - disable_docker_debug -} - -if ($RunMode -eq "Collect"){ +#-------------------------- +#Main-function +Function main { Write-Host "Running Default(Collect) Mode" -foregroundcolor "blue" cleanup collect pack -} elseif ($RunMode -eq "EnableDebug"){ - Write-Host "Enabling Debug for Docker" -foregroundcolor "blue" - enable_debug -} elseif ($RunMode -eq "DisableDebug"){ - Write-Host "Disabling Debug for Docker" -foregroundcolor "blue" - disable_debug -} else { - Write-Host "You need to specify either Collect, EnableDebug or DisableDebug RunMode" -ForegroundColor "red" - Break } + +#Entry point +main From b98cec20b1768208bef8870c924811ac57badc2b Mon Sep 17 00:00:00 2001 From: Abeer Sethi <38733827+abeer91@users.noreply.github.com> Date: Tue, 10 Mar 2020 16:13:55 -0700 Subject: [PATCH 034/621] Added 1.15 support and removed --allow-privileged flag from all EKS supported versions (1.12+). (#428) --- Makefile | 8 ++++++-- files/1.14/kubelet.service | 21 --------------------- files/kubelet.service | 1 - scripts/install-worker.sh | 6 +----- 4 files changed, 7 insertions(+), 29 deletions(-) delete mode 100644 files/1.14/kubelet.service diff --git a/Makefile b/Makefile index 4f9bba762..1556a552c 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.12 1.13 1.14 +all: 1.12 1.13 1.14 1.15 .PHONY: validate validate: @@ -47,4 +47,8 @@ k8s: validate .PHONY: 1.14 1.14: - $(MAKE) k8s kubernetes_version=1.14.9 kubernetes_build_date=2020-01-22 \ No newline at end of file + $(MAKE) k8s kubernetes_version=1.14.9 kubernetes_build_date=2020-01-22 + +.PHONY: 1.15 +1.15: + $(MAKE) k8s kubernetes_version=1.15.10 kubernetes_build_date=2020-02-14 \ No newline at end of file diff --git a/files/1.14/kubelet.service b/files/1.14/kubelet.service deleted file mode 100644 index 358b659ae..000000000 --- a/files/1.14/kubelet.service +++ /dev/null @@ -1,21 +0,0 @@ -[Unit] -Description=Kubernetes Kubelet -Documentation=https://github.com/kubernetes/kubernetes -After=docker.service -Requires=docker.service - -[Service] -ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 -ExecStart=/usr/bin/kubelet --cloud-provider aws \ - --config /etc/kubernetes/kubelet/kubelet-config.json \ - --kubeconfig /var/lib/kubelet/kubeconfig \ - --container-runtime docker \ - --network-plugin cni $KUBELET_ARGS $KUBELET_EXTRA_ARGS - -Restart=on-failure -RestartForceExitStatus=SIGPIPE -RestartSec=5 -KillMode=process - -[Install] -WantedBy=multi-user.target diff --git a/files/kubelet.service b/files/kubelet.service index ae33350f1..358b659ae 100644 --- a/files/kubelet.service +++ b/files/kubelet.service @@ -8,7 +8,6 @@ Requires=docker.service ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 ExecStart=/usr/bin/kubelet --cloud-provider aws \ --config /etc/kubernetes/kubelet/kubelet-config.json \ - --allow-privileged=true \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime docker \ --network-plugin cni $KUBELET_ARGS $KUBELET_EXTRA_ARGS diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index db66f39b5..edea0beab 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -190,11 +190,7 @@ sudo mkdir -p /etc/kubernetes/kubelet sudo mkdir -p /etc/systemd/system/kubelet.service.d sudo mv $TEMPLATE_DIR/kubelet-kubeconfig /var/lib/kubelet/kubeconfig sudo chown root:root /var/lib/kubelet/kubeconfig -if [ "$KUBERNETES_MINOR_VERSION" = "1.14" ]; then - sudo mv $TEMPLATE_DIR/1.14/kubelet.service /etc/systemd/system/kubelet.service -else - sudo mv $TEMPLATE_DIR/kubelet.service /etc/systemd/system/kubelet.service -fi +sudo mv $TEMPLATE_DIR/kubelet.service /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/kubelet.service sudo mv $TEMPLATE_DIR/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json sudo chown root:root /etc/kubernetes/kubelet/kubelet-config.json From 729dcb5f6dee3428e17c10b46c573c11802e4002 Mon Sep 17 00:00:00 2001 From: Will Thames Date: Thu, 12 Mar 2020 06:10:17 +1000 Subject: [PATCH 035/621] Fix URL for 1.15 binaries (#429) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1556a552c..d229d4bc9 100644 --- a/Makefile +++ b/Makefile @@ -51,4 +51,4 @@ k8s: validate .PHONY: 1.15 1.15: - $(MAKE) k8s kubernetes_version=1.15.10 kubernetes_build_date=2020-02-14 \ No newline at end of file + $(MAKE) k8s kubernetes_version=1.15.10 kubernetes_build_date=2020-02-22 From 6585196df54d2d388fd54a77feb30d6472d6e395 Mon Sep 17 00:00:00 2001 From: Octavio Martin Date: Sat, 21 Mar 2020 21:41:22 +0000 Subject: [PATCH 036/621] Fixed amazon-eks-nodegroup.yaml lint issues --- amazon-eks-nodegroup.yaml | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/amazon-eks-nodegroup.yaml b/amazon-eks-nodegroup.yaml index 4f4c6cede..e44c4df94 100644 --- a/amazon-eks-nodegroup.yaml +++ b/amazon-eks-nodegroup.yaml @@ -401,7 +401,6 @@ Resources: NodeSecurityGroupIngress: Type: "AWS::EC2::SecurityGroupIngress" - DependsOn: NodeSecurityGroup Properties: Description: Allow node to communicate with each other FromPort: 0 @@ -412,7 +411,6 @@ Resources: ClusterControlPlaneSecurityGroupIngress: Type: "AWS::EC2::SecurityGroupIngress" - DependsOn: NodeSecurityGroup Properties: Description: Allow pods to communicate with the cluster API Server FromPort: 443 @@ -423,7 +421,6 @@ Resources: ControlPlaneEgressToNodeSecurityGroup: Type: "AWS::EC2::SecurityGroupEgress" - DependsOn: NodeSecurityGroup Properties: Description: Allow the cluster control plane to communicate with worker Kubelet and pods DestinationSecurityGroupId: !Ref NodeSecurityGroup @@ -434,7 +431,6 @@ Resources: ControlPlaneEgressToNodeSecurityGroupOn443: Type: "AWS::EC2::SecurityGroupEgress" - DependsOn: NodeSecurityGroup Properties: Description: Allow the cluster control plane to communicate with pods running extension API servers on port 443 DestinationSecurityGroupId: !Ref NodeSecurityGroup @@ -445,7 +441,6 @@ Resources: NodeSecurityGroupFromControlPlaneIngress: Type: "AWS::EC2::SecurityGroupIngress" - DependsOn: NodeSecurityGroup Properties: Description: Allow worker Kubelets and pods to receive communication from the cluster control plane FromPort: 1025 @@ -456,7 +451,6 @@ Resources: NodeSecurityGroupFromControlPlaneOn443Ingress: Type: "AWS::EC2::SecurityGroupIngress" - DependsOn: NodeSecurityGroup Properties: Description: Allow pods running extension API servers on port 443 to receive communication from cluster control plane FromPort: 443 @@ -468,7 +462,7 @@ Resources: NodeLaunchConfig: Type: "AWS::AutoScaling::LaunchConfiguration" Properties: - AssociatePublicIpAddress: "true" + AssociatePublicIpAddress: true BlockDeviceMappings: - DeviceName: /dev/xvda Ebs: @@ -503,15 +497,15 @@ Resources: MinSize: !Ref NodeAutoScalingGroupMinSize Tags: - Key: Name - PropagateAtLaunch: "true" + PropagateAtLaunch: true Value: !Sub ${ClusterName}-${NodeGroupName}-Node - Key: !Sub kubernetes.io/cluster/${ClusterName} - PropagateAtLaunch: "true" + PropagateAtLaunch: true Value: owned VPCZoneIdentifier: !Ref Subnets UpdatePolicy: AutoScalingRollingUpdate: - MaxBatchSize: "1" + MaxBatchSize: 1 MinInstancesInService: !Ref NodeAutoScalingGroupDesiredCapacity PauseTime: PT5M From 14e85265e8b23be4d2706abe18144ca6de73edcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C4=81nis=20Orlovs?= Date: Tue, 24 Mar 2020 23:53:29 +0200 Subject: [PATCH 037/621] Consistent Docker GID version in Image (#430) * Docker install across versions change GID for docker, this causes problems for consistency. This commit solves it by adding same GID to docker install * Docker install across versions change GID for docker, this causes problems for consistency. This commit solves it by adding same GID to docker install Co-authored-by: Janis Orlovs --- .gitignore | 1 + scripts/install-worker.sh | 1 + 2 files changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 3a369e0c2..467547614 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ manifest.json *.swp +.idea \ No newline at end of file diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index edea0beab..c49364ba1 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -110,6 +110,7 @@ sudo yum install -y yum-utils device-mapper-persistent-data lvm2 INSTALL_DOCKER="${INSTALL_DOCKER:-true}" if [[ "$INSTALL_DOCKER" == "true" ]]; then sudo amazon-linux-extras enable docker + sudo useradd --gid 1950 docker sudo yum install -y docker-${DOCKER_VERSION}* sudo usermod -aG docker $USER From b576e9e9084ab4253f87f8e435ee5cdca6e73cb1 Mon Sep 17 00:00:00 2001 From: Claes Mogren Date: Tue, 24 Mar 2020 16:13:02 -0700 Subject: [PATCH 038/621] Move compressed file to /var/log (#436) --- log-collector-script/linux/README.md | 4 +- .../linux/eks-log-collector.sh | 50 +++++++++---------- .../linux/eks-ssm-content.json | 4 +- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/log-collector-script/linux/README.md b/log-collector-script/linux/README.md index f9032aaca..e544db9f3 100644 --- a/log-collector-script/linux/README.md +++ b/log-collector-script/linux/README.md @@ -45,7 +45,7 @@ The following output shows this project running in normal mode. ``` sudo bash eks-log-collector.sh - This is version 0.5.0. New versions can be found at https://github.com/awslabs/amazon-eks-ami + This is version 0.6.1. New versions can be found at https://github.com/awslabs/amazon-eks-ami Trying to collect common operating system logs... Trying to collect kernel logs... @@ -64,7 +64,7 @@ Trying to collect running Docker containers and gather container data... Trying to collect Docker daemon logs... Trying to archive gathered information... - Done... your bundled logs are located in /opt/log-collector/eks_i-0717c9d54b6cfaa19_2019-02-02_0103-UTC_0.0.4.tar.gz + Done... your bundled logs are located in /var/log/eks_i-0717c9d54b6cfaa19_2020-03-24_0103-UTC_0.6.1.tar.gz ``` diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 9a1813293..f41728c04 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -1,6 +1,5 @@ #!/usr/bin/env bash - -# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You may # not use this file except in compliance with the License. A copy of the @@ -21,10 +20,11 @@ export LANG="C" export LC_ALL="C" # Global options -readonly PROGRAM_VERSION="0.6.0" +readonly PROGRAM_VERSION="0.6.1" readonly PROGRAM_SOURCE="https://github.com/awslabs/amazon-eks-ami/blob/master/log-collector-script/" readonly PROGRAM_NAME="$(basename "$0" .sh)" readonly PROGRAM_DIR="/opt/log-collector" +readonly LOG_DIR="/var/log" readonly COLLECT_DIR="/tmp/${PROGRAM_NAME}" readonly DAYS_10=$(date -d "-10 days" '+%Y-%m-%d %H:%M') INSTANCE_ID="" @@ -149,7 +149,7 @@ is_root() { check_required_utils() { for utils in ${REQUIRED_UTILS[*]}; do - # if exit code of "command -v" not equal to 0, fail + # If exit code of "command -v" not equal to 0, fail if ! command -v "${utils}" >/dev/null 2>&1; then die "Application \"${utils}\" is missing, please install \"${utils}\" as this script requires it, and will not function without it." fi @@ -179,11 +179,11 @@ systemd_check() { create_directories() { # Make sure the directory the script lives in is there. Not an issue if # the EKS AMI is used, as it will have it. - mkdir --parents "${PROGRAM_DIR}" + mkdir -p "${PROGRAM_DIR}" - # Common directors creation + # Common directories creation for directory in ${COMMON_DIRECTORIES[*]}; do - mkdir --parents "${COLLECT_DIR}"/"${directory}" + mkdir -p "${COLLECT_DIR}"/"${directory}" done } @@ -244,14 +244,14 @@ collect() { pack() { try "archive gathered information" - tar --create --verbose --gzip --file "${PROGRAM_DIR}"/eks_"${INSTANCE_ID}"_"$(date --utc +%Y-%m-%d_%H%M-%Z)"_"${PROGRAM_VERSION}".tar.gz --directory="${COLLECT_DIR}" . > /dev/null 2>&1 + tar --create --verbose --gzip --file "${LOG_DIR}"/eks_"${INSTANCE_ID}"_"$(date --utc +%Y-%m-%d_%H%M-%Z)"_"${PROGRAM_VERSION}".tar.gz --directory="${COLLECT_DIR}" . > /dev/null 2>&1 ok } finished() { cleanup - echo -e "\n\tDone... your bundled logs are located in ${PROGRAM_DIR}/eks_${INSTANCE_ID}_$(date --utc +%Y-%m-%d_%H%M-%Z)_${PROGRAM_VERSION}.tar.gz\n" + echo -e "\n\tDone... your bundled logs are located in ${LOG_DIR}/eks_${INSTANCE_ID}_$(date --utc +%Y-%m-%d_%H%M-%Z)_${PROGRAM_VERSION}.tar.gz\n" } get_mounts_info() { @@ -301,18 +301,20 @@ get_common_logs() { continue fi if [[ "${entry}" == "containers" ]]; then - cp --force --recursive /var/log/containers/aws-node* "${COLLECT_DIR}"/var_log/ - cp --force --recursive /var/log/containers/coredns-* "${COLLECT_DIR}"/var_log/ - cp --force --recursive /var/log/containers/kube-proxy* "${COLLECT_DIR}"/var_log/ + cp --force --recursive /var/log/containers/aws-node* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --recursive /var/log/containers/kube-system_cni-metrics-helper* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --recursive /var/log/containers/coredns-* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --recursive /var/log/containers/kube-proxy* "${COLLECT_DIR}"/var_log/ 2>/dev/null continue fi if [[ "${entry}" == "pods" ]]; then - cp --force --recursive /var/log/pods/kube-system_aws-node* "${COLLECT_DIR}"/var_log/ - cp --force --recursive /var/log/pods//kube-system_coredns* "${COLLECT_DIR}"/var_log/ - cp --force --recursive /var/log/pods/kube-system_kube-proxy* "${COLLECT_DIR}"/var_log/ + cp --force --recursive /var/log/pods/kube-system_aws-node* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --recursive /var/log/pods/kube-system_cni-metrics-helper* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --recursive /var/log/pods/kube-system_coredns* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --recursive /var/log/pods/kube-system_kube-proxy* "${COLLECT_DIR}"/var_log/ 2>/dev/null continue fi - cp --force --recursive --dereference /var/log/"${entry}" "${COLLECT_DIR}"/var_log/ + cp --force --recursive --dereference /var/log/"${entry}" "${COLLECT_DIR}"/var_log/ 2>/dev/null fi done @@ -358,18 +360,18 @@ get_k8s_info() { try "collect kubelet information" if [[ -n "${KUBECONFIG:-}" ]]; then - command -v kubectl > /dev/null && kubectl get --kubeconfig=${KUBECONFIG} svc > "${COLLECT_DIR}"/kubelet/svc.log - command -v kubectl > /dev/null && kubectl --kubeconfig=${KUBECONFIG} config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + command -v kubectl > /dev/null && kubectl get --kubeconfig="${KUBECONFIG}" svc > "${COLLECT_DIR}"/kubelet/svc.log + command -v kubectl > /dev/null && kubectl --kubeconfig="${KUBECONFIG}" config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml elif [[ -f /etc/eksctl/kubeconfig.yaml ]]; then KUBECONFIG="/etc/eksctl/kubeconfig.yaml" - command -v kubectl > /dev/null && kubectl get --kubeconfig=${KUBECONFIG} svc > "${COLLECT_DIR}"/kubelet/svc.log - command -v kubectl > /dev/null && kubectl --kubeconfig=${KUBECONFIG} config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + command -v kubectl > /dev/null && kubectl get --kubeconfig="${KUBECONFIG}" svc > "${COLLECT_DIR}"/kubelet/svc.log + command -v kubectl > /dev/null && kubectl --kubeconfig="${KUBECONFIG}" config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml elif [[ -f /etc/systemd/system/kubelet.service ]]; then - KUBECONFIG=`grep kubeconfig /etc/systemd/system/kubelet.service | awk '{print $2}'` - command -v kubectl > /dev/null && kubectl get --kubeconfig=${KUBECONFIG} svc > "${COLLECT_DIR}"/kubelet/svc.log - command -v kubectl > /dev/null && kubectl --kubeconfig=${KUBECONFIG} config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + KUBECONFIG=$(grep kubeconfig /etc/systemd/system/kubelet.service | awk '{print $2}') + command -v kubectl > /dev/null && kubectl get --kubeconfig="${KUBECONFIG}" svc > "${COLLECT_DIR}"/kubelet/svc.log + command -v kubectl > /dev/null && kubectl --kubeconfig="${KUBECONFIG}" config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml elif [[ -f /var/lib/kubelet/kubeconfig ]]; then KUBECONFIG="/var/lib/kubelet/kubeconfig" @@ -410,7 +412,6 @@ get_ipamd_info() { done else echo "Ignoring IPAM introspection stats as mentioned"| tee -a "${COLLECT_DIR}"/ipamd/ipam_introspection_ignore.txt - fi if [[ "${ignore_metrics}" == "false" ]]; then @@ -528,4 +529,3 @@ parse_options "$@" collect pack finished - diff --git a/log-collector-script/linux/eks-ssm-content.json b/log-collector-script/linux/eks-ssm-content.json index e0a0f7ced..5484b224a 100644 --- a/log-collector-script/linux/eks-ssm-content.json +++ b/log-collector-script/linux/eks-ssm-content.json @@ -34,7 +34,7 @@ "/usr/local/bin/aws --version", "fi", "echo \"Pushing to S3\"", - "/usr/local/bin/aws s3 cp --recursive /opt/log-collector/ s3://{{bucketName}}", + "/usr/local/bin/aws s3 cp /var/log/eks_i* s3://{{bucketName}}", "echo \"Logs uploaded to S3\"" ] } @@ -78,4 +78,4 @@ } } ] -} \ No newline at end of file +} From 51105608a55f096724fafcca0012dfa4fe930ca7 Mon Sep 17 00:00:00 2001 From: Claes Mogren Date: Wed, 25 Mar 2020 13:14:51 -0700 Subject: [PATCH 039/621] Force create the group id (#437) "the -f is force, -o is overwrite, meaning if there is an existing group with number 1950, it will create a new one with the name docker" --- scripts/install-worker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index c49364ba1..24aa9ced8 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -110,7 +110,7 @@ sudo yum install -y yum-utils device-mapper-persistent-data lvm2 INSTALL_DOCKER="${INSTALL_DOCKER:-true}" if [[ "$INSTALL_DOCKER" == "true" ]]; then sudo amazon-linux-extras enable docker - sudo useradd --gid 1950 docker + sudo groupadd -fog 1950 docker && useradd --gid 1950 docker sudo yum install -y docker-${DOCKER_VERSION}* sudo usermod -aG docker $USER From ff690788dfaf399e6919eebb59371ee923617df4 Mon Sep 17 00:00:00 2001 From: Divyesh Khandeshi Date: Wed, 25 Mar 2020 20:45:31 -0400 Subject: [PATCH 040/621] Fix useradd to run with privileges --- scripts/install-worker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 24aa9ced8..bc66a1948 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -110,7 +110,7 @@ sudo yum install -y yum-utils device-mapper-persistent-data lvm2 INSTALL_DOCKER="${INSTALL_DOCKER:-true}" if [[ "$INSTALL_DOCKER" == "true" ]]; then sudo amazon-linux-extras enable docker - sudo groupadd -fog 1950 docker && useradd --gid 1950 docker + sudo groupadd -fog 1950 docker && sudo useradd --gid 1950 docker sudo yum install -y docker-${DOCKER_VERSION}* sudo usermod -aG docker $USER From 4e0e9164885e07851ed4a737461349ae6012765e Mon Sep 17 00:00:00 2001 From: Abeer Sethi <38733827+abeer91@users.noreply.github.com> Date: Thu, 26 Mar 2020 18:09:28 -0700 Subject: [PATCH 041/621] Removing dependency on Authenticator binary (#440) --- files/kubelet-kubeconfig | 14 +++++++++----- scripts/install-worker.sh | 1 - 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/files/kubelet-kubeconfig b/files/kubelet-kubeconfig index 91c25cf29..61ef53a77 100644 --- a/files/kubelet-kubeconfig +++ b/files/kubelet-kubeconfig @@ -16,10 +16,14 @@ users: user: exec: apiVersion: client.authentication.k8s.io/v1alpha1 - command: /usr/bin/aws-iam-authenticator + command: aws + env: + - name: AWS_STS_REGIONAL_ENDPOINTS + value: regional args: - - "token" - - "-i" - - "CLUSTER_NAME" + - eks - --region - - "AWS_REGION" \ No newline at end of file + - "AWS_REGION" + - get-token + - --cluster-name + - "CLUSTER_NAME" diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index bc66a1948..eeb6cf8ba 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -167,7 +167,6 @@ S3_PATH="s3://$BINARY_BUCKET_NAME/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin BINARIES=( kubelet - aws-iam-authenticator ) for binary in ${BINARIES[*]} ; do if [[ ! -z "$AWS_ACCESS_KEY_ID" ]]; then From b56a25d659fb8f66cdad6377a164346c10642f61 Mon Sep 17 00:00:00 2001 From: natherz97 <55205932+natherz97@users.noreply.github.com> Date: Tue, 7 Apr 2020 17:25:39 -0700 Subject: [PATCH 042/621] Reducing memory allocated in kubeReserved (#419) --- files/bootstrap.sh | 55 ++++++++++++++-------------------------------- 1 file changed, 16 insertions(+), 39 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 9cbac1780..956800a5e 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -147,40 +147,21 @@ get_resource_to_reserve_in_range() { echo $resources_to_reserve } -# Calculates the amount of memory to reserve for the kubelet in mebibytes from the total memory available on the instance. -# From the total memory capacity of this worker node, we calculate the memory resources to reserve -# by reserving a percentage of the memory in each range up to the total memory available on the instance. -# We are using these memory ranges from GKE (https://cloud.google.com/kubernetes-engine/docs/concepts/cluster-architecture#node_allocatable): -# 255 Mi of memory for machines with less than 1024Mi of memory -# 25% of the first 4096Mi of memory -# 20% of the next 4096Mi of memory (up to 8192Mi) -# 10% of the next 8192Mi of memory (up to 16384Mi) -# 6% of the next 114688Mi of memory (up to 131072Mi) -# 2% of any memory above 131072Mi +# Calculates the amount of memory to reserve for kubeReserved in mebibytes. KubeReserved is a function of pod +# density so we are calculating the amount of memory to reserve for Kubernetes systems daemons by +# considering the maximum number of pods this instance type supports. # Args: -# $1 total available memory on the machine in Mi +# $1 the instance type of the worker node # Return: # memory to reserve in Mi for the kubelet get_memory_mebibytes_to_reserve() { - local total_memory_on_instance=$1 - local memory_ranges=(0 4096 8192 16384 131072 $total_memory_on_instance) - local memory_percentage_reserved_for_ranges=(2500 2000 1000 600 200) - if (( $total_memory_on_instance <= 1024 )); then - memory_to_reserve="255" - else - memory_to_reserve="0" - for i in ${!memory_percentage_reserved_for_ranges[@]}; do - local start_range=${memory_ranges[$i]} - local end_range=${memory_ranges[(($i+1))]} - local percentage_to_reserve_for_range=${memory_percentage_reserved_for_ranges[$i]} - memory_to_reserve=$(($memory_to_reserve + \ - $(get_resource_to_reserve_in_range $total_memory_on_instance $start_range $end_range $percentage_to_reserve_for_range))) - done - fi + local instance_type=$1 + max_num_pods=$(cat /etc/eks/eni-max-pods.txt | grep $instance_type | awk '{print $2;}') + memory_to_reserve=$((11 * $max_num_pods + 255)) echo $memory_to_reserve } -# Calculates the amount of CPU to reserve for the kubelet in millicores from the total number of vCPUs available on the instance. +# Calculates the amount of CPU to reserve for kubeReserved in millicores from the total number of vCPUs available on the instance. # From the total core capacity of this worker node, we calculate the CPU resources to reserve by reserving a percentage # of the available cores in each range up to the total number of cores available on the instance. # We are using these CPU ranges from GKE (https://cloud.google.com/kubernetes-engine/docs/concepts/cluster-architecture#node_allocatable): @@ -188,12 +169,10 @@ get_memory_mebibytes_to_reserve() { # 1% of the next core (up to 2 cores) # 0.5% of the next 2 cores (up to 4 cores) # 0.25% of any cores above 4 cores -# Args: -# $1 total number of millicores on the instance (number of vCPUs * 1000) # Return: # CPU resources to reserve in millicores (m) get_cpu_millicores_to_reserve() { - local total_cpu_on_instance=$1 + local total_cpu_on_instance=$(($(nproc) * 1000)) local cpu_ranges=(0 1000 2000 4000 $total_cpu_on_instance) local cpu_percentage_reserved_for_ranges=(600 100 50 25) cpu_to_reserve="0" @@ -289,25 +268,22 @@ fi KUBELET_CONFIG=/etc/kubernetes/kubelet/kubelet-config.json echo "$(jq ".clusterDNS=[\"$DNS_CLUSTER_IP\"]" $KUBELET_CONFIG)" > $KUBELET_CONFIG +INTERNAL_IP=$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4) +INSTANCE_TYPE=$(curl -s http://169.254.169.254/latest/meta-data/instance-type) + # Sets kubeReserved and evictionHard in /etc/kubernetes/kubelet/kubelet-config.json for worker nodes. The following two function -# calls calculate the CPU and memory resources to reserve for the kubelet based on instance type of the worker node. +# calls calculate the CPU and memory resources to reserve for kubeReserved based on the instance type of the worker node. # Note that allocatable memory and CPU resources on worker nodes is calculated by the Kubernetes scheduler # with this formula when scheduling pods: Allocatable = Capacity - Reserved - Eviction Threshold. -# gets the memory and CPU capacity of the worker node -MEMORY_MI=$(free -m | grep Mem | awk '{print $2}') -CPU_MILLICORES=$(($(nproc) * 1000)) # calculates the amount of each resource to reserve -mebibytes_to_reserve=$(get_memory_mebibytes_to_reserve $MEMORY_MI) -cpu_millicores_to_reserve=$(get_cpu_millicores_to_reserve $CPU_MILLICORES) +mebibytes_to_reserve=$(get_memory_mebibytes_to_reserve $INSTANCE_TYPE) +cpu_millicores_to_reserve=$(get_cpu_millicores_to_reserve) # writes kubeReserved and evictionHard to the kubelet-config using the amount of CPU and memory to be reserved echo "$(jq '. += {"evictionHard": {"memory.available": "100Mi", "nodefs.available": "10%", "nodefs.inodesFree": "5%"}}' $KUBELET_CONFIG)" > $KUBELET_CONFIG echo "$(jq --arg mebibytes_to_reserve "${mebibytes_to_reserve}Mi" --arg cpu_millicores_to_reserve "${cpu_millicores_to_reserve}m" \ '. += {kubeReserved: {"cpu": $cpu_millicores_to_reserve, "ephemeral-storage": "1Gi", "memory": $mebibytes_to_reserve}}' $KUBELET_CONFIG)" > $KUBELET_CONFIG -INTERNAL_IP=$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4) -INSTANCE_TYPE=$(curl -s http://169.254.169.254/latest/meta-data/instance-type) - if [[ "$USE_MAX_PODS" = "true" ]]; then MAX_PODS_FILE="/etc/eks/eni-max-pods.txt" set +o pipefail @@ -348,3 +324,4 @@ fi systemctl daemon-reload systemctl enable kubelet systemctl start kubelet + From d6e021b87edb9861fd478dece0e09cfe4bead8c4 Mon Sep 17 00:00:00 2001 From: Abeer Sethi <38733827+abeer91@users.noreply.github.com> Date: Tue, 7 Apr 2020 17:41:50 -0700 Subject: [PATCH 043/621] Revert "Removing dependency on Authenticator binary (#440)" (#446) This reverts commit 4e0e9164885e07851ed4a737461349ae6012765e. --- files/kubelet-kubeconfig | 14 +++++--------- scripts/install-worker.sh | 1 + 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/files/kubelet-kubeconfig b/files/kubelet-kubeconfig index 61ef53a77..91c25cf29 100644 --- a/files/kubelet-kubeconfig +++ b/files/kubelet-kubeconfig @@ -16,14 +16,10 @@ users: user: exec: apiVersion: client.authentication.k8s.io/v1alpha1 - command: aws - env: - - name: AWS_STS_REGIONAL_ENDPOINTS - value: regional + command: /usr/bin/aws-iam-authenticator args: - - eks - - --region - - "AWS_REGION" - - get-token - - --cluster-name + - "token" + - "-i" - "CLUSTER_NAME" + - --region + - "AWS_REGION" \ No newline at end of file diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index eeb6cf8ba..bc66a1948 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -167,6 +167,7 @@ S3_PATH="s3://$BINARY_BUCKET_NAME/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin BINARIES=( kubelet + aws-iam-authenticator ) for binary in ${BINARIES[*]} ; do if [[ ! -z "$AWS_ACCESS_KEY_ID" ]]; then From fcf6ea5bec4640adf907dc4f9cbb93e921338840 Mon Sep 17 00:00:00 2001 From: Abeer Sethi <38733827+abeer91@users.noreply.github.com> Date: Wed, 8 Apr 2020 10:17:56 -0700 Subject: [PATCH 044/621] Adding support to upgrade kernel while building AMI (#447) --- eks-worker-al2.json | 7 +++++++ scripts/upgrade_kernel.sh | 8 ++++++++ 2 files changed, 15 insertions(+) create mode 100644 scripts/upgrade_kernel.sh diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 06862b4fc..a91a548ec 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -117,10 +117,17 @@ "ADDITIONAL_YUM_REPOS={{user `additional_yum_repos`}}" ] }, + { + "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", + "expect_disconnect": true, + "script": "{{template_dir}}/scripts/upgrade_kernel.sh" + }, { "type": "shell", "remote_folder": "{{ user `remote_folder`}}", "script": "{{template_dir}}/scripts/install-worker.sh", + "pause_before": "90s", "environment_vars": [ "KUBERNETES_VERSION={{user `kubernetes_version`}}", "KUBERNETES_BUILD_DATE={{user `kubernetes_build_date`}}", diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh new file mode 100644 index 000000000..150c290fc --- /dev/null +++ b/scripts/upgrade_kernel.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +set -o pipefail +set -o nounset +set -o errexit + +sudo yum update -y kernel +sudo reboot \ No newline at end of file From 5a5046dc1f4cbb373633aca864c3fd6746329497 Mon Sep 17 00:00:00 2001 From: cmdallas Date: Mon, 13 Apr 2020 18:51:40 -0700 Subject: [PATCH 045/621] fix(amazon-eks-nodegroup): add ec2 service principals for isolated regions --- amazon-eks-nodegroup.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/amazon-eks-nodegroup.yaml b/amazon-eks-nodegroup.yaml index e44c4df94..0a0772534 100644 --- a/amazon-eks-nodegroup.yaml +++ b/amazon-eks-nodegroup.yaml @@ -359,6 +359,10 @@ Mappings: EC2ServicePrincipal: "ec2.amazonaws.com" aws-cn: EC2ServicePrincipal: "ec2.amazonaws.com.cn" + aws-iso: + EC2ServicePrincipal: "ec2.c2s.ic.gov" + aws-iso-b: + EC2ServicePrincipal: "ec2.sc2s.sgov.gov" Conditions: HasNodeImageId: !Not - "Fn::Equals": From 09a8fbe729c3694d000a309cda2a3873e43e77b8 Mon Sep 17 00:00:00 2001 From: gaogilb Date: Fri, 27 Mar 2020 16:07:23 -0700 Subject: [PATCH 046/621] Add inf1 instance family in EKS AMI packer configuration --- amazon-eks-nodegroup.yaml | 4 ++++ files/eni-max-pods.txt | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/amazon-eks-nodegroup.yaml b/amazon-eks-nodegroup.yaml index 0a0772534..1eb393dea 100644 --- a/amazon-eks-nodegroup.yaml +++ b/amazon-eks-nodegroup.yaml @@ -159,6 +159,10 @@ Parameters: - i3en.6xlarge - i3en.12xlarge - i3en.24xlarge + - inf1.xlarge + - inf1.2xlarge + - inf1.6xlarge + - inf1.24xlarge - m1.small - m1.medium - m1.large diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 6cae64e8a..307ebef71 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -97,6 +97,10 @@ i3en.3xlarge 58 i3en.6xlarge 234 i3en.12xlarge 234 i3en.24xlarge 737 +inf1.xlarge 38 +inf1.2xlarge 38 +inf1.6xlarge 234 +inf1.24xlarge 437 m1.small 8 m1.medium 12 m1.large 29 From 8c61db5239205b27c4afa687eb4f05738f0b7312 Mon Sep 17 00:00:00 2001 From: Vishal Gupta Date: Wed, 15 Apr 2020 16:24:14 -0700 Subject: [PATCH 047/621] Removed AssociatePublicIpAddress setting from NodeLaunchCongig and added NodeSecurityGroup dependency to SG Ingress/Egress (#450) Co-authored-by: Vishal Gupta --- amazon-eks-nodegroup.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/amazon-eks-nodegroup.yaml b/amazon-eks-nodegroup.yaml index 1eb393dea..88fa1397c 100644 --- a/amazon-eks-nodegroup.yaml +++ b/amazon-eks-nodegroup.yaml @@ -409,6 +409,7 @@ Resources: NodeSecurityGroupIngress: Type: "AWS::EC2::SecurityGroupIngress" + DependsOn: NodeSecurityGroup Properties: Description: Allow node to communicate with each other FromPort: 0 @@ -419,6 +420,7 @@ Resources: ClusterControlPlaneSecurityGroupIngress: Type: "AWS::EC2::SecurityGroupIngress" + DependsOn: NodeSecurityGroup Properties: Description: Allow pods to communicate with the cluster API Server FromPort: 443 @@ -429,6 +431,7 @@ Resources: ControlPlaneEgressToNodeSecurityGroup: Type: "AWS::EC2::SecurityGroupEgress" + DependsOn: NodeSecurityGroup Properties: Description: Allow the cluster control plane to communicate with worker Kubelet and pods DestinationSecurityGroupId: !Ref NodeSecurityGroup @@ -439,6 +442,7 @@ Resources: ControlPlaneEgressToNodeSecurityGroupOn443: Type: "AWS::EC2::SecurityGroupEgress" + DependsOn: NodeSecurityGroup Properties: Description: Allow the cluster control plane to communicate with pods running extension API servers on port 443 DestinationSecurityGroupId: !Ref NodeSecurityGroup @@ -449,6 +453,7 @@ Resources: NodeSecurityGroupFromControlPlaneIngress: Type: "AWS::EC2::SecurityGroupIngress" + DependsOn: NodeSecurityGroup Properties: Description: Allow worker Kubelets and pods to receive communication from the cluster control plane FromPort: 1025 @@ -459,6 +464,7 @@ Resources: NodeSecurityGroupFromControlPlaneOn443Ingress: Type: "AWS::EC2::SecurityGroupIngress" + DependsOn: NodeSecurityGroup Properties: Description: Allow pods running extension API servers on port 443 to receive communication from cluster control plane FromPort: 443 @@ -470,7 +476,6 @@ Resources: NodeLaunchConfig: Type: "AWS::AutoScaling::LaunchConfiguration" Properties: - AssociatePublicIpAddress: true BlockDeviceMappings: - DeviceName: /dev/xvda Ebs: From a27a0e4be9c93963b6837085a91e654d5adc9135 Mon Sep 17 00:00:00 2001 From: Abeer Sethi <38733827+abeer91@users.noreply.github.com> Date: Wed, 29 Apr 2020 10:32:36 -0700 Subject: [PATCH 048/621] Add a flag that allows CNI packages to be pulled from S3 instead of Github. (#457) The default behavior is unchanged and will still pull assets from Github. --- Makefile | 10 ++++---- eks-worker-al2.json | 2 ++ scripts/install-worker.sh | 48 +++++++++++++++++++++++++++++---------- 3 files changed, 43 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index d229d4bc9..d36c61077 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ PACKER_BINARY ?= packer -PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date docker_version cni_version cni_plugin_version source_ami_id source_ami_owners arch instance_type security_group_id additional_yum_repos +PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date docker_version cni_version cni_plugin_version source_ami_id source_ami_owners arch instance_type security_group_id additional_yum_repos pull_cni_from_github K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) @@ -39,16 +39,16 @@ k8s: validate .PHONY: 1.12 1.12: - $(MAKE) k8s kubernetes_version=1.12.10 kubernetes_build_date=2020-01-22 + $(MAKE) k8s kubernetes_version=1.12.10 kubernetes_build_date=2020-04-17 pull_cni_from_github=true .PHONY: 1.13 1.13: - $(MAKE) k8s kubernetes_version=1.13.12 kubernetes_build_date=2020-01-22 + $(MAKE) k8s kubernetes_version=1.13.12 kubernetes_build_date=2020-04-16 pull_cni_from_github=true .PHONY: 1.14 1.14: - $(MAKE) k8s kubernetes_version=1.14.9 kubernetes_build_date=2020-01-22 + $(MAKE) k8s kubernetes_version=1.14.9 kubernetes_build_date=2020-04-16 pull_cni_from_github=true .PHONY: 1.15 1.15: - $(MAKE) k8s kubernetes_version=1.15.10 kubernetes_build_date=2020-02-22 + $(MAKE) k8s kubernetes_version=1.15.11 kubernetes_build_date=2020-04-16 pull_cni_from_github=true diff --git a/eks-worker-al2.json b/eks-worker-al2.json index a91a548ec..221d2483f 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -17,6 +17,7 @@ "docker_version": "18.09.9ce-2.amzn2", "cni_version": "v0.6.0", "cni_plugin_version": "v0.7.5", + "pull_cni_from_github": "true", "source_ami_id": "", "source_ami_owners": "137112412989", @@ -136,6 +137,7 @@ "DOCKER_VERSION={{user `docker_version`}}", "CNI_VERSION={{user `cni_version`}}", "CNI_PLUGIN_VERSION={{user `cni_plugin_version`}}", + "PULL_CNI_FROM_GITHUB={{user `pull_cni_from_github`}}", "AWS_ACCESS_KEY_ID={{user `aws_access_key_id`}}", "AWS_SECRET_ACCESS_KEY={{user `aws_secret_access_key`}}", "AWS_SESSION_TOKEN={{user `aws_session_token`}}" diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index bc66a1948..823370ef7 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -28,6 +28,7 @@ validate_env_set CNI_VERSION validate_env_set CNI_PLUGIN_VERSION validate_env_set KUBERNETES_VERSION validate_env_set KUBERNETES_BUILD_DATE +validate_env_set PULL_CNI_FROM_GITHUB ################################################################################ ### Machine Architecture ####################################################### @@ -145,18 +146,6 @@ sudo mkdir -p /var/lib/kubernetes sudo mkdir -p /var/lib/kubelet sudo mkdir -p /opt/cni/bin -wget https://github.com/containernetworking/cni/releases/download/${CNI_VERSION}/cni-${ARCH}-${CNI_VERSION}.tgz -wget https://github.com/containernetworking/cni/releases/download/${CNI_VERSION}/cni-${ARCH}-${CNI_VERSION}.tgz.sha512 -sudo sha512sum -c cni-${ARCH}-${CNI_VERSION}.tgz.sha512 -sudo tar -xvf cni-${ARCH}-${CNI_VERSION}.tgz -C /opt/cni/bin -rm cni-${ARCH}-${CNI_VERSION}.tgz cni-${ARCH}-${CNI_VERSION}.tgz.sha512 - -wget https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGIN_VERSION}/cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz -wget https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGIN_VERSION}/cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz.sha512 -sudo sha512sum -c cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz.sha512 -sudo tar -xvf cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz -C /opt/cni/bin -rm cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz.sha512 - echo "Downloading binaries from: s3://$BINARY_BUCKET_NAME" S3_DOMAIN="amazonaws.com" if [ "$BINARY_BUCKET_REGION" = "cn-north-1" ] || [ "$BINARY_BUCKET_REGION" = "cn-northwest-1" ]; then @@ -183,6 +172,41 @@ for binary in ${BINARIES[*]} ; do sudo chmod +x $binary sudo mv $binary /usr/bin/ done + +if [ "$PULL_CNI_FROM_GITHUB" = "true" ]; then + echo "Downloading CNI assets from Github" + wget https://github.com/containernetworking/cni/releases/download/${CNI_VERSION}/cni-${ARCH}-${CNI_VERSION}.tgz + wget https://github.com/containernetworking/cni/releases/download/${CNI_VERSION}/cni-${ARCH}-${CNI_VERSION}.tgz.sha512 + + wget https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGIN_VERSION}/cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz + wget https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGIN_VERSION}/cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz.sha512 + sudo sha512sum -c cni-${ARCH}-${CNI_VERSION}.tgz.sha512 + sudo sha512sum -c cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz.sha512 + rm cni-${ARCH}-${CNI_VERSION}.tgz.sha512 + rm cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz.sha512 +else + CNI_BINARIES=( + cni-${ARCH}-${CNI_VERSION}.tgz + cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz + ) + for binary in ${CNI_BINARIES[*]} ; do + if [[ ! -z "$AWS_ACCESS_KEY_ID" ]]; then + echo "AWS cli present - using it to copy binaries from s3." + aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/$binary . + aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/$binary.sha256 . + sudo sha256sum -c $binary.sha256 + else + echo "AWS cli missing - using wget to fetch cni binaries from s3. Note: This won't work for private bucket." + sudo wget $S3_URL_BASE/$binary + sudo wget $S3_URL_BASE/$binary.sha256 + fi + done +fi +sudo tar -xvf cni-${ARCH}-${CNI_VERSION}.tgz -C /opt/cni/bin +sudo tar -xvf cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz -C /opt/cni/bin +rm cni-${ARCH}-${CNI_VERSION}.tgz +rm cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz + sudo rm *.sha256 KUBERNETES_MINOR_VERSION=${KUBERNETES_VERSION%.*} From 7d4aae8b6787ecfe36ecc0bc45f8118de1dde370 Mon Sep 17 00:00:00 2001 From: Bronson Mirafuentes Date: Wed, 29 Apr 2020 11:19:25 -0700 Subject: [PATCH 049/621] update source AMI owner and ECR repo for govcloud (#458) --- Makefile | 6 +++++- files/bootstrap.sh | 5 ++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index d36c61077..d6fd53a9d 100644 --- a/Makefile +++ b/Makefile @@ -18,6 +18,10 @@ ifeq ($(aws_region), cn-northwest-1) source_ami_owners ?= 141808717104 endif +ifeq ($(aws_region), us-gov-west-1) +source_ami_owners ?= 045324592363 +endif + T_RED := \e[0;31m T_GREEN := \e[0;32m T_YELLOW := \e[0;33m @@ -48,7 +52,7 @@ k8s: validate .PHONY: 1.14 1.14: $(MAKE) k8s kubernetes_version=1.14.9 kubernetes_build_date=2020-04-16 pull_cni_from_github=true - + .PHONY: 1.15 1.15: $(MAKE) k8s kubernetes_version=1.15.11 kubernetes_build_date=2020-04-16 pull_cni_from_github=true diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 956800a5e..8a7e8f5ca 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -117,6 +117,10 @@ function get_pause_container_account_for_region () { echo "${PAUSE_CONTAINER_ACCOUNT:-918309763551}";; cn-northwest-1) echo "${PAUSE_CONTAINER_ACCOUNT:-961992271922}";; + us-gov-west-1) + echo "${PAUSE_CONTAINER_ACCOUNT:-013241004608}";; + us-gov-east-1) + echo "${PAUSE_CONTAINER_ACCOUNT:-151742754352}";; *) echo "${PAUSE_CONTAINER_ACCOUNT:-602401143452}";; esac @@ -324,4 +328,3 @@ fi systemctl daemon-reload systemctl enable kubelet systemctl start kubelet - From 035e956b99556b435a9d0bfbc5c87b373f302d2f Mon Sep 17 00:00:00 2001 From: Sai Teja Penugonda Date: Wed, 29 Apr 2020 14:32:45 -0400 Subject: [PATCH 050/621] updated ipamd information files extension to json (#451) * updated ipamd data file extension to json * updated ipamd metrics file extension --- log-collector-script/linux/eks-log-collector.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index f41728c04..dcea6da54 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -408,7 +408,7 @@ get_ipamd_info() { if [[ "${ignore_introspection}" == "false" ]]; then try "collect L-IPAMD introspectioon information" for entry in ${IPAMD_DATA[*]}; do - curl --max-time 3 --silent http://localhost:61679/v1/"${entry}" >> "${COLLECT_DIR}"/ipamd/"${entry}".txt + curl --max-time 3 --silent http://localhost:61679/v1/"${entry}" >> "${COLLECT_DIR}"/ipamd/"${entry}".json done else echo "Ignoring IPAM introspection stats as mentioned"| tee -a "${COLLECT_DIR}"/ipamd/ipam_introspection_ignore.txt @@ -416,7 +416,7 @@ get_ipamd_info() { if [[ "${ignore_metrics}" == "false" ]]; then try "collect L-IPAMD prometheus metrics" - curl --max-time 3 --silent http://localhost:61678/metrics > "${COLLECT_DIR}"/ipamd/metrics.txt 2>&1 + curl --max-time 3 --silent http://localhost:61678/metrics > "${COLLECT_DIR}"/ipamd/metrics.json 2>&1 else echo "Ignoring Prometheus Metrics collection as mentioned"| tee -a "${COLLECT_DIR}"/ipamd/ipam_metrics_ignore.txt fi From 032799738ace12510da02a14f64c2dd10a5de599 Mon Sep 17 00:00:00 2001 From: Abeer Sethi <38733827+abeer91@users.noreply.github.com> Date: Thu, 30 Apr 2020 12:16:42 -0700 Subject: [PATCH 051/621] Adding 1.16 to Makefile (#459) --- Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Makefile b/Makefile index d6fd53a9d..a0e3d0980 100644 --- a/Makefile +++ b/Makefile @@ -56,3 +56,8 @@ k8s: validate .PHONY: 1.15 1.15: $(MAKE) k8s kubernetes_version=1.15.11 kubernetes_build_date=2020-04-16 pull_cni_from_github=true + +.PHONY: 1.16 +1.16: + $(MAKE) k8s kubernetes_version=1.16.8 kubernetes_build_date=2020-04-16 pull_cni_from_github=true + From 795ecd495594b83b2aa74689b011d99fb66ae349 Mon Sep 17 00:00:00 2001 From: Saurav Agarwalla Date: Mon, 11 May 2020 23:58:02 -0700 Subject: [PATCH 052/621] Add a new manifest containing the AMI name (#471) This commit adds a new manifest which contains AMI name in the manifest filename so that parallel builds can be triggered. Even though the new manifest is now generated along with the current one for backwards compatibility, eventually the old manifest (manifest.json) will be deprecated. --- eks-worker-al2.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 221d2483f..4121439f3 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -162,6 +162,11 @@ "type": "manifest", "output": "manifest.json", "strip_path": true + }, + { + "type": "manifest", + "output": "{{user `ami_name`}}-manifest.json", + "strip_path": true } ] } From eaf6ade70ad423b197783389467493fad74934c2 Mon Sep 17 00:00:00 2001 From: Murcherla Date: Wed, 20 May 2020 14:21:04 -0500 Subject: [PATCH 053/621] Added --dereference flag to copy symlink logs --- log-collector-script/linux/eks-log-collector.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index dcea6da54..b57fce93e 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -301,17 +301,17 @@ get_common_logs() { continue fi if [[ "${entry}" == "containers" ]]; then - cp --force --recursive /var/log/containers/aws-node* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --recursive /var/log/containers/kube-system_cni-metrics-helper* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --recursive /var/log/containers/coredns-* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --recursive /var/log/containers/kube-proxy* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --dereference --recursive /var/log/containers/aws-node* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --dereference --recursive /var/log/containers/kube-system_cni-metrics-helper* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --dereference --recursive /var/log/containers/coredns-* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --dereference --recursive /var/log/containers/kube-proxy* "${COLLECT_DIR}"/var_log/ 2>/dev/null continue fi if [[ "${entry}" == "pods" ]]; then - cp --force --recursive /var/log/pods/kube-system_aws-node* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --recursive /var/log/pods/kube-system_cni-metrics-helper* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --recursive /var/log/pods/kube-system_coredns* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --recursive /var/log/pods/kube-system_kube-proxy* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_aws-node* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_cni-metrics-helper* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_coredns* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_kube-proxy* "${COLLECT_DIR}"/var_log/ 2>/dev/null continue fi cp --force --recursive --dereference /var/log/"${entry}" "${COLLECT_DIR}"/var_log/ 2>/dev/null From 3dd3650111ff0f8f33f3a3ff337bd8b14606ae3d Mon Sep 17 00:00:00 2001 From: Mike Stefaniak <3597741+mikestef9@users.noreply.github.com> Date: Mon, 1 Jun 2020 19:53:35 -0700 Subject: [PATCH 054/621] Update eni-max-pods.txt (#483) --- files/eni-max-pods.txt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 307ebef71..39308be2f 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -37,6 +37,24 @@ c5.12xlarge 234 c5.18xlarge 737 c5.24xlarge 737 c5.metal 737 +c5a.large 29 +c5a.xlarge 58 +c5a.2xlarge 58 +c5a.4xlarge 234 +c5a.8xlarge 234 +c5a.12xlarge 234 +c5a.16xlarge 737 +c5a.24xlarge 737 +c5a.metal 737 +c5ad.large 29 +c5ad.xlarge 58 +c5ad.2xlarge 58 +c5ad.4xlarge 234 +c5ad.8xlarge 234 +c5ad.12xlarge 234 +c5ad.16xlarge 737 +c5ad.24xlarge 737 +c5ad.metal 737 c5d.large 29 c5d.xlarge 58 c5d.2xlarge 58 From 1b6f279d8364e16f054f1f96285c20a0848399da Mon Sep 17 00:00:00 2001 From: jitran Date: Tue, 19 May 2020 15:32:42 +1000 Subject: [PATCH 055/621] Moved the file copy after the execution of upgrade_kernel.sh. upgrade_kernel.sh performs a system reboot which will remove the contents of /tmp. --- eks-worker-al2.json | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 4121439f3..13da07be3 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -5,7 +5,7 @@ "creator": "{{env `USER`}}", "encrypted": "false", "kms_key_id": "", - + "aws_access_key_id": "{{env `AWS_ACCESS_KEY_ID`}}", "aws_secret_access_key": "{{env `AWS_SECRET_ACCESS_KEY`}}", "aws_session_token": "{{env `AWS_SESSION_TOKEN`}}", @@ -65,7 +65,7 @@ "delete_on_termination": true } ], - "ami_block_device_mappings": [ + "ami_block_device_mappings": [ { "device_name": "/dev/xvda", "volume_type": "gp2", @@ -100,16 +100,6 @@ ], "provisioners": [ - { - "type": "shell", - "remote_folder": "{{ user `remote_folder`}}", - "inline": ["mkdir -p /tmp/worker/"] - }, - { - "type": "file", - "source": "{{template_dir}}/files/", - "destination": "/tmp/worker/" - }, { "type": "shell", "remote_folder": "{{ user `remote_folder`}}", @@ -124,6 +114,16 @@ "expect_disconnect": true, "script": "{{template_dir}}/scripts/upgrade_kernel.sh" }, + { + "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", + "inline": ["mkdir -p /tmp/worker/"] + }, + { + "type": "file", + "source": "{{template_dir}}/files/", + "destination": "/tmp/worker/" + }, { "type": "shell", "remote_folder": "{{ user `remote_folder`}}", From 707fc575a64df66b449a43a1e09cc618d6221370 Mon Sep 17 00:00:00 2001 From: Bronson Mirafuentes Date: Wed, 3 Jun 2020 09:39:05 -0700 Subject: [PATCH 056/621] update logrotate.conf to compress rotated logs (#479) --- files/logrotate.conf | 31 +++++++++++++++++++++++++++++++ scripts/install-worker.sh | 1 + 2 files changed, 32 insertions(+) create mode 100644 files/logrotate.conf diff --git a/files/logrotate.conf b/files/logrotate.conf new file mode 100644 index 000000000..13af94c03 --- /dev/null +++ b/files/logrotate.conf @@ -0,0 +1,31 @@ +# see "man logrotate" for details +# rotate log files weekly +weekly + +# keep 4 weeks worth of backlogs +rotate 4 + +# create new (empty) log files after rotating old ones +create + +# use date as a suffix of the rotated file +dateext + +compress + +# RPM packages drop log rotation information into this directory +include /etc/logrotate.d + +/var/log/wtmp { + monthly + create 0664 root utmp + minsize 1M + rotate 1 +} + +/var/log/btmp { + missingok + monthly + create 0600 root utmp + rotate 1 +} diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 823370ef7..2679b6131 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -134,6 +134,7 @@ fi # kubelet uses journald which has built-in rotation and capped size. # See man 5 journald.conf sudo mv $TEMPLATE_DIR/logrotate-kube-proxy /etc/logrotate.d/kube-proxy +sudo mv $TEMPLATE_DIR/logrotate.conf /etc/logrotate.conf sudo chown root:root /etc/logrotate.d/kube-proxy sudo mkdir -p /var/log/journal From 4ebf57dfaa09e6a4a8d771f7ee40dcef61fe910f Mon Sep 17 00:00:00 2001 From: Murcherla Date: Wed, 3 Jun 2020 15:39:33 -0500 Subject: [PATCH 057/621] Collect conntrack info --- log-collector-script/linux/eks-log-collector.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index b57fce93e..a9f25078b 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -20,7 +20,7 @@ export LANG="C" export LC_ALL="C" # Global options -readonly PROGRAM_VERSION="0.6.1" +readonly PROGRAM_VERSION="0.6.2" readonly PROGRAM_SOURCE="https://github.com/awslabs/amazon-eks-ami/blob/master/log-collector-script/" readonly PROGRAM_NAME="$(basename "$0" .sh)" readonly PROGRAM_DIR="/opt/log-collector" @@ -435,6 +435,12 @@ get_sysctls_info() { get_networking_info() { try "collect networking infomation" + # conntrack info + echo "*** Output of conntrack -S *** " >> "${COLLECT_DIR}"/networking/conntrack.txt + timeout 75 conntrack -S >> "${COLLECT_DIR}"/networking/conntrack.txt + echo "*** Output of conntrack -L ***" >> "${COLLECT_DIR}"/networking/conntrack.txt + timeout 75 conntrack -L >> "${COLLECT_DIR}"/networking/conntrack.txt + # ifconfig timeout 75 ifconfig > "${COLLECT_DIR}"/networking/ifconfig.txt From df3b24612859ec5a3f358d30ab93ca34c56de355 Mon Sep 17 00:00:00 2001 From: Daniel Koo Date: Fri, 5 Jun 2020 10:11:43 -0700 Subject: [PATCH 058/621] update docker to 19.03.6ce-4.amzn2 (#488) Co-authored-by: Daniel Koo --- eks-worker-al2.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 13da07be3..d6ca6b2b2 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -14,7 +14,7 @@ "binary_bucket_region": "us-west-2", "kubernetes_version": null, "kubernetes_build_date": null, - "docker_version": "18.09.9ce-2.amzn2", + "docker_version": "19.03.6ce-4.amzn2", "cni_version": "v0.6.0", "cni_plugin_version": "v0.7.5", "pull_cni_from_github": "true", From 32a58b6410e306d689a7af507532b67c47f0142b Mon Sep 17 00:00:00 2001 From: Angus Lees Date: Fri, 5 Jun 2020 20:50:18 +1000 Subject: [PATCH 059/621] Collect /var/run/aws-node/ipam.json L-IPAMD now checkpoints IPAM state to a file. Collect /var/run/aws-node/ipam.json file (if present) for debugging. See also aws/amazon-vpc-cni-k8s#972 --- log-collector-script/linux/eks-log-collector.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index a9f25078b..053b4c9cc 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -377,7 +377,7 @@ get_k8s_info() { KUBECONFIG="/var/lib/kubelet/kubeconfig" command -v kubectl > /dev/null && kubectl get --kubeconfig=${KUBECONFIG} svc > "${COLLECT_DIR}"/kubelet/svc.log command -v kubectl > /dev/null && kubectl --kubeconfig=${KUBECONFIG} config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml - + else echo "======== Unable to find KUBECONFIG, IGNORING POD DATA =========" >> "${COLLECT_DIR}"/kubelet/svc.log fi @@ -406,7 +406,7 @@ get_k8s_info() { get_ipamd_info() { if [[ "${ignore_introspection}" == "false" ]]; then - try "collect L-IPAMD introspectioon information" + try "collect L-IPAMD introspection information" for entry in ${IPAMD_DATA[*]}; do curl --max-time 3 --silent http://localhost:61679/v1/"${entry}" >> "${COLLECT_DIR}"/ipamd/"${entry}".json done @@ -421,6 +421,9 @@ get_ipamd_info() { echo "Ignoring Prometheus Metrics collection as mentioned"| tee -a "${COLLECT_DIR}"/ipamd/ipam_metrics_ignore.txt fi + try "collect L-IPAMD checkpoint" + cp /var/run/aws-node/ipam.json "${COLLECT_DIR}"/ipamd/ipam.json + ok } From ecad54588c6625c8e350b122b48b8a2abee2590f Mon Sep 17 00:00:00 2001 From: Kevin Frommelt Date: Fri, 5 Jun 2020 14:49:27 -0500 Subject: [PATCH 060/621] Report consistent output filename in linux log collector --- log-collector-script/linux/eks-log-collector.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 053b4c9cc..a0e65ad56 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -26,6 +26,7 @@ readonly PROGRAM_NAME="$(basename "$0" .sh)" readonly PROGRAM_DIR="/opt/log-collector" readonly LOG_DIR="/var/log" readonly COLLECT_DIR="/tmp/${PROGRAM_NAME}" +readonly CURRENT_TIME=$(date --utc +%Y-%m-%d_%H%M-%Z) readonly DAYS_10=$(date -d "-10 days" '+%Y-%m-%d %H:%M') INSTANCE_ID="" INIT_TYPE="" @@ -244,14 +245,14 @@ collect() { pack() { try "archive gathered information" - tar --create --verbose --gzip --file "${LOG_DIR}"/eks_"${INSTANCE_ID}"_"$(date --utc +%Y-%m-%d_%H%M-%Z)"_"${PROGRAM_VERSION}".tar.gz --directory="${COLLECT_DIR}" . > /dev/null 2>&1 + tar --create --verbose --gzip --file "${LOG_DIR}"/eks_"${INSTANCE_ID}"_"${CURRENT_TIME}"_"${PROGRAM_VERSION}".tar.gz --directory="${COLLECT_DIR}" . > /dev/null 2>&1 ok } finished() { cleanup - echo -e "\n\tDone... your bundled logs are located in ${LOG_DIR}/eks_${INSTANCE_ID}_$(date --utc +%Y-%m-%d_%H%M-%Z)_${PROGRAM_VERSION}.tar.gz\n" + echo -e "\n\tDone... your bundled logs are located in ${LOG_DIR}/eks_${INSTANCE_ID}_${CURRENT_TIME}_${PROGRAM_VERSION}.tar.gz\n" } get_mounts_info() { From e988f28f4d56b98e0a7f7d66d71c55a498224e29 Mon Sep 17 00:00:00 2001 From: dianibar-work <46203030+dianibar-work@users.noreply.github.com> Date: Tue, 9 Jun 2020 03:49:46 +1000 Subject: [PATCH 061/621] Update bootstrap.sh to get the correct $max_num_pods for instance types f1.16xlarge, g3.16xlarge, h1.16xlarge, i3.16xlarge, and r4.16xlarge (#465) * Update bootstrap.sh * Update bootstrap.sh * Update bootstrap.sh --- files/bootstrap.sh | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 8a7e8f5ca..92ff073b5 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -159,8 +159,7 @@ get_resource_to_reserve_in_range() { # Return: # memory to reserve in Mi for the kubelet get_memory_mebibytes_to_reserve() { - local instance_type=$1 - max_num_pods=$(cat /etc/eks/eni-max-pods.txt | grep $instance_type | awk '{print $2;}') + local max_num_pods=$1 memory_to_reserve=$((11 * $max_num_pods + 255)) echo $memory_to_reserve } @@ -280,8 +279,18 @@ INSTANCE_TYPE=$(curl -s http://169.254.169.254/latest/meta-data/instance-type) # Note that allocatable memory and CPU resources on worker nodes is calculated by the Kubernetes scheduler # with this formula when scheduling pods: Allocatable = Capacity - Reserved - Eviction Threshold. +#calculate the max number of pods per instance type +MAX_PODS_FILE="/etc/eks/eni-max-pods.txt" +set +o pipefail +MAX_PODS=$(cat $MAX_PODS_FILE | awk "/^$INSTANCE_TYPE/"' { print $2 }') +set -o pipefail +if [ -z "$MAX_PODS" ]; then + echo 'No entry for $INSTANCE_TYPE in $MAX_PODS_FILE' + exit 1 +fi + # calculates the amount of each resource to reserve -mebibytes_to_reserve=$(get_memory_mebibytes_to_reserve $INSTANCE_TYPE) +mebibytes_to_reserve=$(get_memory_mebibytes_to_reserve $MAX_PODS) cpu_millicores_to_reserve=$(get_cpu_millicores_to_reserve) # writes kubeReserved and evictionHard to the kubelet-config using the amount of CPU and memory to be reserved echo "$(jq '. += {"evictionHard": {"memory.available": "100Mi", "nodefs.available": "10%", "nodefs.inodesFree": "5%"}}' $KUBELET_CONFIG)" > $KUBELET_CONFIG @@ -289,10 +298,6 @@ echo "$(jq --arg mebibytes_to_reserve "${mebibytes_to_reserve}Mi" --arg cpu_mill '. += {kubeReserved: {"cpu": $cpu_millicores_to_reserve, "ephemeral-storage": "1Gi", "memory": $mebibytes_to_reserve}}' $KUBELET_CONFIG)" > $KUBELET_CONFIG if [[ "$USE_MAX_PODS" = "true" ]]; then - MAX_PODS_FILE="/etc/eks/eni-max-pods.txt" - set +o pipefail - MAX_PODS=$(grep ^$INSTANCE_TYPE $MAX_PODS_FILE | awk '{print $2}') - set -o pipefail if [[ -n "$MAX_PODS" ]]; then echo "$(jq ".maxPods=$MAX_PODS" $KUBELET_CONFIG)" > $KUBELET_CONFIG else From 2e1f63f951c82a76fd20b19b811592535962c82d Mon Sep 17 00:00:00 2001 From: Florent Delannoy Date: Mon, 8 Jun 2020 18:55:12 +0100 Subject: [PATCH 062/621] Set readOnlyPort to 0 (#390) See CIS Benchmark 4.2.4 Co-authored-by: Florent Delannoy --- files/kubelet-config.json | 1 + 1 file changed, 1 insertion(+) diff --git a/files/kubelet-config.json b/files/kubelet-config.json index af57dbf90..ad7aa2edd 100644 --- a/files/kubelet-config.json +++ b/files/kubelet-config.json @@ -23,6 +23,7 @@ }, "clusterDomain": "cluster.local", "hairpinMode": "hairpin-veth", + "readOnlyPort": 0, "cgroupDriver": "cgroupfs", "cgroupRoot": "/", "featureGates": { From 06cb77298984d84fc0edd0a876fb571b576d6ae8 Mon Sep 17 00:00:00 2001 From: Florent Delannoy Date: Tue, 9 Jun 2020 17:37:57 +0100 Subject: [PATCH 063/621] Set protectKernelDefaults to true (#392) See CIS Benchmark 4.2.6 --- files/kubelet-config.json | 1 + scripts/install-worker.sh | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/files/kubelet-config.json b/files/kubelet-config.json index ad7aa2edd..b78510c6a 100644 --- a/files/kubelet-config.json +++ b/files/kubelet-config.json @@ -29,6 +29,7 @@ "featureGates": { "RotateKubeletServerCertificate": true }, + "protectKernelDefaults": true, "serializeImagePulls": false, "serverTLSBootstrap": true, "tlsCipherSuites": ["TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256", "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256", "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305", "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384", "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305", "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384", "TLS_RSA_WITH_AES_256_GCM_SHA384", "TLS_RSA_WITH_AES_128_GCM_SHA256"] diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 2679b6131..03bc05709 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -249,6 +249,16 @@ EOF sudo mv /tmp/release /etc/eks/release sudo chown -R root:root /etc/eks +################################################################################ +### Stuff required by "protectKernelDefaults=true" ############################# +################################################################################ + +cat < Date: Thu, 7 May 2020 09:00:31 -0700 Subject: [PATCH 064/621] Change to use instance-identity/document to fetch region --- files/bootstrap.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 92ff073b5..99d74ca2f 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -194,9 +194,10 @@ if [ -z "$CLUSTER_NAME" ]; then exit 1 fi -ZONE=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone) -AWS_DEFAULT_REGION=$(echo $ZONE | awk '{print substr($0, 1, length($0)-1)}') -AWS_SERVICES_DOMAIN=$(curl -s http://169.254.169.254/2018-09-24/meta-data/services/domain) + +TOKEN=$(curl -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 600" "http://169.254.169.254/latest/api/token") +AWS_DEFAULT_REGION=$(curl -s --retry 5 -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/dynamic/instance-identity/document | jq .region -r) +AWS_SERVICES_DOMAIN=$(curl -s --retry 5 -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/2018-09-24/meta-data/services/domain) MACHINE=$(uname -m) if [ "$MACHINE" == "x86_64" ]; then From 9f7e4f7430fc2f87668a6033b2057e900aa9ed86 Mon Sep 17 00:00:00 2001 From: Abeer Sethi <38733827+abeer91@users.noreply.github.com> Date: Thu, 11 Jun 2020 09:53:54 -0700 Subject: [PATCH 065/621] Adding GovCloud support and migrating from LaunchConfig to LaunchTemplate for the Nodegroup (#491) --- amazon-eks-nodegroup.yaml | 62 +++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/amazon-eks-nodegroup.yaml b/amazon-eks-nodegroup.yaml index 88fa1397c..d1f1b159b 100644 --- a/amazon-eks-nodegroup.yaml +++ b/amazon-eks-nodegroup.yaml @@ -361,17 +361,21 @@ Mappings: PartitionMap: aws: EC2ServicePrincipal: "ec2.amazonaws.com" + aws-us-gov: + EC2ServicePrincipal: "ec2.amazonaws.com" aws-cn: EC2ServicePrincipal: "ec2.amazonaws.com.cn" aws-iso: EC2ServicePrincipal: "ec2.c2s.ic.gov" aws-iso-b: EC2ServicePrincipal: "ec2.sc2s.sgov.gov" + Conditions: HasNodeImageId: !Not - "Fn::Equals": - Ref: NodeImageId - "" + Resources: NodeInstanceRole: Type: "AWS::IAM::Role" @@ -473,39 +477,45 @@ Resources: SourceSecurityGroupId: !Ref ClusterControlPlaneSecurityGroup ToPort: 443 - NodeLaunchConfig: - Type: "AWS::AutoScaling::LaunchConfiguration" + NodeLaunchTemplate: + Type: "AWS::EC2::LaunchTemplate" Properties: - BlockDeviceMappings: - - DeviceName: /dev/xvda - Ebs: - DeleteOnTermination: true - VolumeSize: !Ref NodeVolumeSize - VolumeType: gp2 - IamInstanceProfile: !Ref NodeInstanceProfile - ImageId: !If - - HasNodeImageId - - Ref: NodeImageId - - Ref: NodeImageIdSSMParam - InstanceType: !Ref NodeInstanceType - KeyName: !Ref KeyName - SecurityGroups: + LaunchTemplateData: + BlockDeviceMappings: + - DeviceName: /dev/xvda + Ebs: + DeleteOnTermination: true + VolumeSize: !Ref NodeVolumeSize + VolumeType: gp2 + IamInstanceProfile: + Arn: !GetAtt NodeInstanceProfile.Arn + ImageId: !If + - HasNodeImageId + - Ref: NodeImageId + - Ref: NodeImageIdSSMParam + InstanceType: !Ref NodeInstanceType + KeyName: !Ref KeyName + SecurityGroupIds: - Ref: NodeSecurityGroup - UserData: !Base64 - "Fn::Sub": | - #!/bin/bash - set -o xtrace - /etc/eks/bootstrap.sh ${ClusterName} ${BootstrapArguments} - /opt/aws/bin/cfn-signal --exit-code $? \ - --stack ${AWS::StackName} \ - --resource NodeGroup \ - --region ${AWS::Region} + UserData: !Base64 + "Fn::Sub": | + #!/bin/bash + set -o xtrace + /etc/eks/bootstrap.sh ${ClusterName} ${BootstrapArguments} + /opt/aws/bin/cfn-signal --exit-code $? \ + --stack ${AWS::StackName} \ + --resource NodeGroup \ + --region ${AWS::Region} + MetadataOptions: + "HttpPutResponseHopLimit" : 2 NodeGroup: Type: "AWS::AutoScaling::AutoScalingGroup" Properties: DesiredCapacity: !Ref NodeAutoScalingGroupDesiredCapacity - LaunchConfigurationName: !Ref NodeLaunchConfig + LaunchTemplate: + LaunchTemplateId: !Ref NodeLaunchTemplate + Version: !GetAtt NodeLaunchTemplate.LatestVersionNumber MaxSize: !Ref NodeAutoScalingGroupMaxSize MinSize: !Ref NodeAutoScalingGroupMinSize Tags: From 5bac3ca1ca6ab0e00dfce3f10c4b2481694609dc Mon Sep 17 00:00:00 2001 From: Abeer Sethi <38733827+abeer91@users.noreply.github.com> Date: Thu, 11 Jun 2020 10:26:36 -0700 Subject: [PATCH 066/621] Adding pause_before to right step after disconnect (#492) --- eks-worker-al2.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index d6ca6b2b2..e5621e519 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -116,6 +116,7 @@ }, { "type": "shell", + "pause_before": "90s", "remote_folder": "{{ user `remote_folder`}}", "inline": ["mkdir -p /tmp/worker/"] }, @@ -128,7 +129,6 @@ "type": "shell", "remote_folder": "{{ user `remote_folder`}}", "script": "{{template_dir}}/scripts/install-worker.sh", - "pause_before": "90s", "environment_vars": [ "KUBERNETES_VERSION={{user `kubernetes_version`}}", "KUBERNETES_BUILD_DATE={{user `kubernetes_build_date`}}", From 822aa4ab3894706f2b309b88df0d8c886a6e782d Mon Sep 17 00:00:00 2001 From: Murcherla Date: Wed, 24 Jun 2020 12:31:04 -0500 Subject: [PATCH 067/621] Bump CNI plugin version to 0.8.6 --- eks-worker-al2.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index e5621e519..c86abc176 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -16,7 +16,7 @@ "kubernetes_build_date": null, "docker_version": "19.03.6ce-4.amzn2", "cni_version": "v0.6.0", - "cni_plugin_version": "v0.7.5", + "cni_plugin_version": "v0.8.6", "pull_cni_from_github": "true", "source_ami_id": "", From ed9c8be88dc0f6c29626c84ae8239e22aceac7d1 Mon Sep 17 00:00:00 2001 From: Dan Quackenbush Date: Tue, 30 Jun 2020 12:06:20 -0400 Subject: [PATCH 068/621] :lock: Use token based auth for accessing metadata --- files/bootstrap.sh | 8 ++++---- log-collector-script/linux/eks-log-collector.sh | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 99d74ca2f..7db5a476a 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -259,8 +259,8 @@ sed -i s,AWS_REGION,$AWS_DEFAULT_REGION,g /var/lib/kubelet/kubeconfig ### kubelet.service configuration if [ -z ${DNS_CLUSTER_IP+x} ]; then - MAC=$(curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/ -s | head -n 1 | sed 's/\/$//') - TEN_RANGE=$(curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/$MAC/vpc-ipv4-cidr-blocks | grep -c '^10\..*' || true ) + MAC=$(curl -H "X-aws-ec2-metadata-token: $TOKEN" -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/ -s | head -n 1 | sed 's/\/$//') + TEN_RANGE=$(curl -H "X-aws-ec2-metadata-token: $TOKEN" -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/$MAC/vpc-ipv4-cidr-blocks | grep -c '^10\..*' || true ) DNS_CLUSTER_IP=10.100.0.10 if [[ "$TEN_RANGE" != "0" ]]; then DNS_CLUSTER_IP=172.20.0.10 @@ -272,8 +272,8 @@ fi KUBELET_CONFIG=/etc/kubernetes/kubelet/kubelet-config.json echo "$(jq ".clusterDNS=[\"$DNS_CLUSTER_IP\"]" $KUBELET_CONFIG)" > $KUBELET_CONFIG -INTERNAL_IP=$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4) -INSTANCE_TYPE=$(curl -s http://169.254.169.254/latest/meta-data/instance-type) +INTERNAL_IP=$(curl -H "X-aws-ec2-metadata-token: $TOKEN" -s http://169.254.169.254/latest/meta-data/local-ipv4) +INSTANCE_TYPE=$(curl -H "X-aws-ec2-metadata-token: $TOKEN" -s http://169.254.169.254/latest/meta-data/instance-type) # Sets kubeReserved and evictionHard in /etc/kubernetes/kubelet/kubelet-config.json for worker nodes. The following two function # calls calculate the CPU and memory resources to reserve for kubeReserved based on the instance type of the worker node. diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index a0e65ad56..19e79af2a 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -28,6 +28,7 @@ readonly LOG_DIR="/var/log" readonly COLLECT_DIR="/tmp/${PROGRAM_NAME}" readonly CURRENT_TIME=$(date --utc +%Y-%m-%d_%H%M-%Z) readonly DAYS_10=$(date -d "-10 days" '+%Y-%m-%d %H:%M') +readonly TOKEN=$(curl -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 600" "http://169.254.169.254/latest/api/token") INSTANCE_ID="" INIT_TYPE="" PACKAGE_TYPE="" @@ -189,7 +190,7 @@ create_directories() { } get_instance_metadata() { - readonly INSTANCE_ID=$(curl --max-time 3 --silent http://169.254.169.254/latest/meta-data/instance-id 2>/dev/null) + readonly INSTANCE_ID=$(curl --max-time 3 -H "X-aws-ec2-metadata-token: $TOKEN" --silent http://169.254.169.254/latest/meta-data/instance-id 2>/dev/null) echo "${INSTANCE_ID}" > "${COLLECT_DIR}"/system/instance-id.txt } From 01bbf8590081448bc47529a687b02a8f98cad91d Mon Sep 17 00:00:00 2001 From: Daniel Koo Date: Tue, 7 Jul 2020 11:09:08 -0700 Subject: [PATCH 069/621] Revert "Bump CNI plugin version to 0.8.6" (#503) This reverts commit 822aa4ab3894706f2b309b88df0d8c886a6e782d. Co-authored-by: Daniel Koo --- eks-worker-al2.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index c86abc176..e5621e519 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -16,7 +16,7 @@ "kubernetes_build_date": null, "docker_version": "19.03.6ce-4.amzn2", "cni_version": "v0.6.0", - "cni_plugin_version": "v0.8.6", + "cni_plugin_version": "v0.7.5", "pull_cni_from_github": "true", "source_ami_id": "", From 88aa8ddef477a12ca33dbab1be0fc516e50b8355 Mon Sep 17 00:00:00 2001 From: Murcherla Date: Thu, 9 Jul 2020 13:40:35 -0500 Subject: [PATCH 070/621] Gaurd rails for clean up function --- log-collector-script/linux/eks-log-collector.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 19e79af2a..32f1dccde 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -25,7 +25,7 @@ readonly PROGRAM_SOURCE="https://github.com/awslabs/amazon-eks-ami/blob/master/l readonly PROGRAM_NAME="$(basename "$0" .sh)" readonly PROGRAM_DIR="/opt/log-collector" readonly LOG_DIR="/var/log" -readonly COLLECT_DIR="/tmp/${PROGRAM_NAME}" +readonly COLLECT_DIR="/tmp/eks-log-collector" readonly CURRENT_TIME=$(date --utc +%Y-%m-%d_%H%M-%Z) readonly DAYS_10=$(date -d "-10 days" '+%Y-%m-%d %H:%M') readonly TOKEN=$(curl -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 600" "http://169.254.169.254/latest/api/token") @@ -209,7 +209,12 @@ is_diskfull() { } cleanup() { - rm --recursive --force "${COLLECT_DIR}" >/dev/null 2>&1 + #guard rails to avoid accidental deletion of unknown data + if [[ "${COLLECT_DIR}" == "/tmp/eks-log-collector" ]]; then + rm --recursive --force "${COLLECT_DIR}" >/dev/null 2>&1 + else + echo "Unable to Cleanup as {COLLECT_DIR} variable is modified. Please cleanup manually!" + fi } init() { From 21426e27e3845319dbca92e7df32e5c4b984a1d1 Mon Sep 17 00:00:00 2001 From: Saurav Agarwalla Date: Fri, 10 Jul 2020 11:19:57 -0700 Subject: [PATCH 071/621] Add 1.17, update Kubernetes versions and deprecate 1.12 and 1.13 (#508) --- Makefile | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index a0e3d0980..215e23115 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.12 1.13 1.14 1.15 +all: 1.14 1.15 1.16 1.17 .PHONY: validate validate: @@ -41,23 +41,18 @@ k8s: validate # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html -.PHONY: 1.12 -1.12: - $(MAKE) k8s kubernetes_version=1.12.10 kubernetes_build_date=2020-04-17 pull_cni_from_github=true - -.PHONY: 1.13 -1.13: - $(MAKE) k8s kubernetes_version=1.13.12 kubernetes_build_date=2020-04-16 pull_cni_from_github=true - .PHONY: 1.14 1.14: - $(MAKE) k8s kubernetes_version=1.14.9 kubernetes_build_date=2020-04-16 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.14.9 kubernetes_build_date=2020-07-08 pull_cni_from_github=true .PHONY: 1.15 1.15: - $(MAKE) k8s kubernetes_version=1.15.11 kubernetes_build_date=2020-04-16 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.15.11 kubernetes_build_date=2020-07-08 pull_cni_from_github=true .PHONY: 1.16 1.16: - $(MAKE) k8s kubernetes_version=1.16.8 kubernetes_build_date=2020-04-16 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.16.12 kubernetes_build_date=2020-07-08 pull_cni_from_github=true +.PHONY: 1.17 +1.17: + $(MAKE) k8s kubernetes_version=1.17.7 kubernetes_build_date=2020-07-08 pull_cni_from_github=true From 222a3290958a22ebff375ed7fed578c5fd69030f Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Tue, 21 Jul 2020 13:20:48 -0700 Subject: [PATCH 072/621] updating logrotateConf ownership to root (#512) Co-authored-by: Sinha --- scripts/install-worker.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 03bc05709..e30aeb918 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -136,6 +136,7 @@ fi sudo mv $TEMPLATE_DIR/logrotate-kube-proxy /etc/logrotate.d/kube-proxy sudo mv $TEMPLATE_DIR/logrotate.conf /etc/logrotate.conf sudo chown root:root /etc/logrotate.d/kube-proxy +sudo chown root:root /etc/logrotate.conf sudo mkdir -p /var/log/journal ################################################################################ From 5bb7200497609b09da5213e03840635274086c49 Mon Sep 17 00:00:00 2001 From: Bronson Mirafuentes Date: Wed, 22 Jul 2020 12:11:36 -0700 Subject: [PATCH 073/621] Update pause container accounts for CPT/MXP (#514) --- files/bootstrap.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 7db5a476a..b42ad12d2 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -121,6 +121,10 @@ function get_pause_container_account_for_region () { echo "${PAUSE_CONTAINER_ACCOUNT:-013241004608}";; us-gov-east-1) echo "${PAUSE_CONTAINER_ACCOUNT:-151742754352}";; + af-south-1) + echo "${PAUSE_CONTAINER_ACCOUNT:-877085696533}";; + eu-south-1) + echo "${PAUSE_CONTAINER_ACCOUNT:-590381155156}";; *) echo "${PAUSE_CONTAINER_ACCOUNT:-602401143452}";; esac From b2901ff44ac4fb47197683f06e1fdd9930eeefcf Mon Sep 17 00:00:00 2001 From: Claes Mogren Date: Wed, 15 Jul 2020 13:11:05 -0700 Subject: [PATCH 074/621] Genererate max pods file from EC2 API Based on changes in https://github.com/aws/amazon-vpc-cni-k8s/pull/1035 --- files/eni-max-pods.txt | 618 ++++++++++++++++++++++------------------- 1 file changed, 328 insertions(+), 290 deletions(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 39308be2f..057a06ea1 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -1,300 +1,338 @@ -# Mapping is calculated from AWS ENI documentation, with the following modifications: +# Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may +# not use this file except in compliance with the License. A copy of the +# License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is distributed +# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# This file was generated at 2020-07-15T13:04:18-07:00 +# +# Mapping is calculated from AWS EC2 API using the following formula: # * First IP on each ENI is not used for pods # * 2 additional host-networking pods (AWS ENI and kube-proxy) are accounted for # -# # of ENI * (# of IPv4 per ENI - 1) + 2 +# # of ENI * (# of IPv4 per ENI - 1) + 2 # # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-eni.html#AvailableIpPerENI # -# If f1.16xlarge, g3.16xlarge, h1.16xlarge, i3.16xlarge, and r4.16xlarge -# instances use more than 31 IPv4 or IPv6 addresses per interface, they cannot -# access the instance metadata, VPC DNS, and Time Sync services from the 32nd IP -# address onwards. If access to these services is needed from all IP addresses -# on the interface, we recommend using a maximum of 31 IP addresses per interface. -a1.medium 8 -a1.large 29 -a1.xlarge 58 -a1.2xlarge 58 -a1.4xlarge 234 -c1.medium 12 -c1.xlarge 58 -c3.large 29 -c3.xlarge 58 -c3.2xlarge 58 -c3.4xlarge 234 -c3.8xlarge 234 -c4.large 29 -c4.xlarge 58 -c4.2xlarge 58 -c4.4xlarge 234 -c4.8xlarge 234 -c5.large 29 -c5.xlarge 58 -c5.2xlarge 58 -c5.4xlarge 234 -c5.9xlarge 234 -c5.12xlarge 234 -c5.18xlarge 737 -c5.24xlarge 737 -c5.metal 737 -c5a.large 29 -c5a.xlarge 58 -c5a.2xlarge 58 -c5a.4xlarge 234 -c5a.8xlarge 234 -c5a.12xlarge 234 -c5a.16xlarge 737 -c5a.24xlarge 737 -c5a.metal 737 -c5ad.large 29 -c5ad.xlarge 58 -c5ad.2xlarge 58 -c5ad.4xlarge 234 -c5ad.8xlarge 234 -c5ad.12xlarge 234 -c5ad.16xlarge 737 -c5ad.24xlarge 737 -c5ad.metal 737 -c5d.large 29 -c5d.xlarge 58 -c5d.2xlarge 58 -c5d.4xlarge 234 -c5d.9xlarge 234 -c5d.12xlarge 234 -c5d.18xlarge 737 -c5d.24xlarge 737 -c5d.metal 737 -c5n.large 29 -c5n.xlarge 58 -c5n.2xlarge 58 -c5n.4xlarge 234 -c5n.9xlarge 234 -c5n.18xlarge 737 -cc2.8xlarge 234 -cr1.8xlarge 234 -d2.xlarge 58 -d2.2xlarge 58 -d2.4xlarge 234 -d2.8xlarge 234 -f1.2xlarge 58 -f1.4xlarge 234 -f1.16xlarge 242 -g2.2xlarge 58 -g2.8xlarge 234 -g3s.xlarge 58 -g3.4xlarge 234 -g3.8xlarge 234 -g3.16xlarge 452 -g4dn.xlarge 29 -g4dn.2xlarge 29 -g4dn.4xlarge 29 -g4dn.8xlarge 58 -g4dn.16xlarge 58 -g4dn.12xlarge 234 -g4dn.metal 737 -h1.2xlarge 58 -h1.4xlarge 234 -h1.8xlarge 234 -h1.16xlarge 452 -hs1.8xlarge 234 -i2.xlarge 58 -i2.2xlarge 58 -i2.4xlarge 234 -i2.8xlarge 234 -i3.large 29 -i3.xlarge 58 -i3.2xlarge 58 -i3.4xlarge 234 -i3.8xlarge 234 -i3.16xlarge 452 -i3.metal 737 -i3en.large 29 -i3en.xlarge 58 -i3en.2xlarge 58 -i3en.3xlarge 58 -i3en.6xlarge 234 -i3en.12xlarge 234 -i3en.24xlarge 737 -inf1.xlarge 38 -inf1.2xlarge 38 -inf1.6xlarge 234 -inf1.24xlarge 437 -m1.small 8 -m1.medium 12 -m1.large 29 -m1.xlarge 58 -m2.xlarge 58 -m2.2xlarge 118 -m2.4xlarge 234 -m3.medium 12 -m3.large 29 -m3.xlarge 58 -m3.2xlarge 118 -m4.large 20 -m4.xlarge 58 -m4.2xlarge 58 -m4.4xlarge 234 -m4.10xlarge 234 -m4.16xlarge 234 -m5.large 29 -m5.xlarge 58 -m5.2xlarge 58 -m5.4xlarge 234 -m5.8xlarge 234 -m5.12xlarge 234 -m5.16xlarge 737 -m5.24xlarge 737 -m5.metal 737 -m5a.large 29 -m5a.xlarge 58 -m5a.2xlarge 58 -m5a.4xlarge 234 -m5a.8xlarge 234 -m5a.12xlarge 234 -m5a.16xlarge 737 -m5a.24xlarge 737 -m5ad.large 29 -m5ad.xlarge 58 -m5ad.2xlarge 58 -m5ad.4xlarge 234 -m5ad.12xlarge 234 -m5ad.24xlarge 737 -m5d.large 29 -m5d.xlarge 58 -m5d.2xlarge 58 -m5d.4xlarge 234 -m5d.8xlarge 234 -m5d.12xlarge 234 -m5d.16xlarge 737 -m5d.24xlarge 737 -m5d.metal 737 -m5dn.large 29 -m5dn.xlarge 58 -m5dn.2xlarge 58 -m5dn.4xlarge 234 -m5dn.8xlarge 234 -m5dn.12xlarge 234 -m5dn.16xlarge 737 -m5dn.24xlarge 737 -m5n.large 29 -m5n.xlarge 58 -m5n.2xlarge 58 -m5n.4xlarge 234 -m5n.8xlarge 234 -m5n.12xlarge 234 -m5n.16xlarge 737 -m5n.24xlarge 737 -m6g.medium 8 -m6g.large 29 -m6g.xlarge 58 -m6g.2xlarge 58 -m6g.4xlarge 234 -m6g.8xlarge 234 -m6g.12xlarge 234 -m6g.16xlarge 737 -p2.xlarge 58 -p2.8xlarge 234 -p2.16xlarge 234 -p3.2xlarge 58 -p3.8xlarge 234 -p3.16xlarge 234 -p3dn.24xlarge 737 -r3.large 29 -r3.xlarge 58 -r3.2xlarge 58 -r3.4xlarge 234 -r3.8xlarge 234 -r4.large 29 -r4.xlarge 58 -r4.2xlarge 58 -r4.4xlarge 234 -r4.8xlarge 234 -r4.16xlarge 452 -r5.large 29 -r5.xlarge 58 -r5.2xlarge 58 -r5.4xlarge 234 -r5.8xlarge 234 -r5.12xlarge 234 -r5.16xlarge 737 -r5.24xlarge 737 -r5.metal 737 -r5a.large 29 -r5a.xlarge 58 -r5a.2xlarge 58 -r5a.4xlarge 234 -r5a.8xlarge 234 -r5a.12xlarge 234 -r5a.16xlarge 737 -r5a.24xlarge 737 -r5ad.large 29 -r5ad.xlarge 58 -r5ad.2xlarge 58 -r5ad.4xlarge 234 -r5ad.12xlarge 234 -r5ad.24xlarge 737 -r5d.large 29 -r5d.xlarge 58 -r5d.2xlarge 58 -r5d.4xlarge 234 -r5d.8xlarge 234 -r5d.12xlarge 234 -r5d.16xlarge 737 -r5d.24xlarge 737 -r5d.metal 737 -r5dn.large 29 -r5dn.xlarge 58 -r5dn.2xlarge 58 -r5dn.4xlarge 234 -r5dn.8xlarge 234 -r5dn.12xlarge 234 -r5dn.16xlarge 737 -r5dn.24xlarge 737 -r5n.large 29 -r5n.xlarge 58 -r5n.2xlarge 58 -r5n.4xlarge 234 -r5n.8xlarge 234 -r5n.12xlarge 234 -r5n.16xlarge 737 -r5n.24xlarge 737 +a1.2xlarge 14 +a1.4xlarge 58 +a1.large 8 +a1.medium 4 +a1.metal 58 +a1.xlarge 14 +c1.medium 4 +c1.xlarge 14 +c3.2xlarge 14 +c3.4xlarge 58 +c3.8xlarge 58 +c3.large 8 +c3.xlarge 14 +c4.2xlarge 14 +c4.4xlarge 58 +c4.8xlarge 58 +c4.large 8 +c4.xlarge 14 +c5.12xlarge 58 +c5.18xlarge 212 +c5.24xlarge 212 +c5.2xlarge 14 +c5.4xlarge 58 +c5.9xlarge 58 +c5.large 8 +c5.metal 212 +c5.xlarge 14 +c5a.12xlarge 58 +c5a.16xlarge 212 +c5a.24xlarge 212 +c5a.2xlarge 14 +c5a.4xlarge 58 +c5a.8xlarge 58 +c5a.large 8 +c5a.metal 212 +c5a.xlarge 14 +c5ad.12xlarge 58 +c5ad.16xlarge 212 +c5ad.24xlarge 212 +c5ad.2xlarge 14 +c5ad.4xlarge 58 +c5ad.8xlarge 58 +c5ad.large 8 +c5ad.metal 212 +c5ad.xlarge 14 +c5d.12xlarge 58 +c5d.18xlarge 212 +c5d.24xlarge 212 +c5d.2xlarge 14 +c5d.4xlarge 58 +c5d.9xlarge 58 +c5d.large 8 +c5d.metal 212 +c5d.xlarge 14 +c5n.18xlarge 212 +c5n.2xlarge 14 +c5n.4xlarge 58 +c5n.9xlarge 58 +c5n.large 8 +c5n.metal 212 +c5n.xlarge 14 +c6g.12xlarge 58 +c6g.16xlarge 212 +c6g.2xlarge 14 +c6g.4xlarge 58 +c6g.8xlarge 58 +c6g.large 8 +c6g.medium 4 +c6g.metal 212 +c6g.xlarge 14 +cc2.8xlarge 58 +cr1.8xlarge 58 +d2.2xlarge 14 +d2.4xlarge 58 +d2.8xlarge 58 +d2.xlarge 14 +f1.16xlarge 58 +f1.2xlarge 14 +f1.4xlarge 58 +g2.2xlarge 14 +g2.8xlarge 58 +g3.16xlarge 212 +g3.4xlarge 58 +g3.8xlarge 58 +g3s.xlarge 14 +g4dn.12xlarge 58 +g4dn.16xlarge 14 +g4dn.2xlarge 8 +g4dn.4xlarge 8 +g4dn.8xlarge 14 +g4dn.metal 212 +g4dn.xlarge 8 +h1.16xlarge 212 +h1.2xlarge 14 +h1.4xlarge 58 +h1.8xlarge 58 +hs1.8xlarge 58 +i2.2xlarge 14 +i2.4xlarge 58 +i2.8xlarge 58 +i2.xlarge 14 +i3.16xlarge 212 +i3.2xlarge 14 +i3.4xlarge 58 +i3.8xlarge 58 +i3.large 8 +i3.metal 212 +i3.xlarge 14 +i3en.12xlarge 58 +i3en.24xlarge 212 +i3en.2xlarge 14 +i3en.3xlarge 14 +i3en.6xlarge 58 +i3en.large 8 +i3en.metal 212 +i3en.xlarge 14 +inf1.24xlarge 212 +inf1.2xlarge 14 +inf1.6xlarge 58 +inf1.xlarge 14 +m1.large 8 +m1.medium 4 +m1.small 4 +m1.xlarge 14 +m2.2xlarge 14 +m2.4xlarge 58 +m2.xlarge 14 +m3.2xlarge 14 +m3.large 8 +m3.medium 4 +m3.xlarge 14 +m4.10xlarge 58 +m4.16xlarge 58 +m4.2xlarge 14 +m4.4xlarge 58 +m4.large 4 +m4.xlarge 14 +m5.12xlarge 58 +m5.16xlarge 212 +m5.24xlarge 212 +m5.2xlarge 14 +m5.4xlarge 58 +m5.8xlarge 58 +m5.large 8 +m5.metal 212 +m5.xlarge 14 +m5a.12xlarge 58 +m5a.16xlarge 212 +m5a.24xlarge 212 +m5a.2xlarge 14 +m5a.4xlarge 58 +m5a.8xlarge 58 +m5a.large 8 +m5a.xlarge 14 +m5ad.12xlarge 58 +m5ad.16xlarge 212 +m5ad.24xlarge 212 +m5ad.2xlarge 14 +m5ad.4xlarge 58 +m5ad.8xlarge 58 +m5ad.large 8 +m5ad.xlarge 14 +m5d.12xlarge 58 +m5d.16xlarge 212 +m5d.24xlarge 212 +m5d.2xlarge 14 +m5d.4xlarge 58 +m5d.8xlarge 58 +m5d.large 8 +m5d.metal 212 +m5d.xlarge 14 +m5dn.12xlarge 58 +m5dn.16xlarge 212 +m5dn.24xlarge 212 +m5dn.2xlarge 14 +m5dn.4xlarge 58 +m5dn.8xlarge 58 +m5dn.large 8 +m5dn.xlarge 14 +m5n.12xlarge 58 +m5n.16xlarge 212 +m5n.24xlarge 212 +m5n.2xlarge 14 +m5n.4xlarge 58 +m5n.8xlarge 58 +m5n.large 8 +m5n.xlarge 14 +m6g.12xlarge 58 +m6g.16xlarge 212 +m6g.2xlarge 14 +m6g.4xlarge 58 +m6g.8xlarge 58 +m6g.large 8 +m6g.medium 4 +m6g.metal 212 +m6g.xlarge 14 +p2.16xlarge 58 +p2.8xlarge 58 +p2.xlarge 14 +p3.16xlarge 58 +p3.2xlarge 14 +p3.8xlarge 58 +p3dn.24xlarge 212 +r3.2xlarge 14 +r3.4xlarge 58 +r3.8xlarge 58 +r3.large 8 +r3.xlarge 14 +r4.16xlarge 212 +r4.2xlarge 14 +r4.4xlarge 58 +r4.8xlarge 58 +r4.large 8 +r4.xlarge 14 +r5.12xlarge 58 +r5.16xlarge 212 +r5.24xlarge 212 +r5.2xlarge 14 +r5.4xlarge 58 +r5.8xlarge 58 +r5.large 8 +r5.metal 212 +r5.xlarge 14 +r5a.12xlarge 58 +r5a.16xlarge 212 +r5a.24xlarge 212 +r5a.2xlarge 14 +r5a.4xlarge 58 +r5a.8xlarge 58 +r5a.large 8 +r5a.xlarge 14 +r5ad.12xlarge 58 +r5ad.16xlarge 212 +r5ad.24xlarge 212 +r5ad.2xlarge 14 +r5ad.4xlarge 58 +r5ad.8xlarge 58 +r5ad.large 8 +r5ad.xlarge 14 +r5d.12xlarge 58 +r5d.16xlarge 212 +r5d.24xlarge 212 +r5d.2xlarge 14 +r5d.4xlarge 58 +r5d.8xlarge 58 +r5d.large 8 +r5d.metal 212 +r5d.xlarge 14 +r5dn.12xlarge 58 +r5dn.16xlarge 212 +r5dn.24xlarge 212 +r5dn.2xlarge 14 +r5dn.4xlarge 58 +r5dn.8xlarge 58 +r5dn.large 8 +r5dn.xlarge 14 +r5n.12xlarge 58 +r5n.16xlarge 212 +r5n.24xlarge 212 +r5n.2xlarge 14 +r5n.4xlarge 58 +r5n.8xlarge 58 +r5n.large 8 +r5n.xlarge 14 +r6g.12xlarge 58 +r6g.16xlarge 212 +r6g.2xlarge 14 +r6g.4xlarge 58 +r6g.8xlarge 58 +r6g.large 8 +r6g.medium 4 +r6g.metal 212 +r6g.xlarge 14 t1.micro 4 -t2.nano 4 +t2.2xlarge 8 +t2.large 8 +t2.medium 8 t2.micro 4 -t2.small 11 -t2.medium 17 -t2.large 35 -t2.xlarge 44 -t2.2xlarge 44 -t3.nano 4 +t2.nano 4 +t2.small 8 +t2.xlarge 8 +t3.2xlarge 14 +t3.large 8 +t3.medium 8 t3.micro 4 -t3.small 11 -t3.medium 17 -t3.large 35 -t3.xlarge 58 -t3.2xlarge 58 -t3a.nano 4 +t3.nano 4 +t3.small 8 +t3.xlarge 14 +t3a.2xlarge 14 +t3a.large 8 +t3a.medium 8 t3a.micro 4 -t3a.small 8 -t3a.medium 17 -t3a.large 35 -t3a.xlarge 58 -t3a.2xlarge 58 -u-6tb1.metal 147 -u-9tb1.metal 147 -u-12tb1.metal 147 -x1.16xlarge 234 -x1.32xlarge 234 -x1e.xlarge 29 -x1e.2xlarge 58 -x1e.4xlarge 58 -x1e.8xlarge 58 -x1e.16xlarge 234 -x1e.32xlarge 234 -z1d.large 29 -z1d.xlarge 58 -z1d.2xlarge 58 -z1d.3xlarge 234 -z1d.6xlarge 234 -z1d.12xlarge 737 -z1d.metal 737 +t3a.nano 4 +t3a.small 4 +t3a.xlarge 14 +u-12tb1.metal 22 +u-18tb1.metal 212 +u-24tb1.metal 212 +u-6tb1.metal 22 +u-9tb1.metal 22 +x1.16xlarge 58 +x1.32xlarge 58 +x1e.16xlarge 58 +x1e.2xlarge 14 +x1e.32xlarge 58 +x1e.4xlarge 14 +x1e.8xlarge 14 +x1e.xlarge 8 +z1d.12xlarge 212 +z1d.2xlarge 14 +z1d.3xlarge 58 +z1d.6xlarge 58 +z1d.large 8 +z1d.metal 212 +z1d.xlarge 14 From 9e67ee1f365ab7548f6612534207c833b312d6c0 Mon Sep 17 00:00:00 2001 From: Claes Mogren Date: Wed, 1 Jul 2020 22:21:10 -0700 Subject: [PATCH 075/621] Update CNI plugins --- Makefile | 2 +- eks-worker-al2.json | 5 +--- scripts/install-worker.sh | 57 +++++++++++++++------------------------ 3 files changed, 24 insertions(+), 40 deletions(-) diff --git a/Makefile b/Makefile index 215e23115..8699a89f1 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ PACKER_BINARY ?= packer -PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date docker_version cni_version cni_plugin_version source_ami_id source_ami_owners arch instance_type security_group_id additional_yum_repos pull_cni_from_github +PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date docker_version cni_plugin_version source_ami_id source_ami_owners arch instance_type security_group_id additional_yum_repos pull_cni_from_github K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index e5621e519..3fb879101 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -15,8 +15,7 @@ "kubernetes_version": null, "kubernetes_build_date": null, "docker_version": "19.03.6ce-4.amzn2", - "cni_version": "v0.6.0", - "cni_plugin_version": "v0.7.5", + "cni_plugin_version": "v0.8.6", "pull_cni_from_github": "true", "source_ami_id": "", @@ -91,7 +90,6 @@ "docker_version": "{{ user `docker_version`}}", "source_ami_id": "{{ user `source_ami_id`}}", "kubernetes": "{{ user `kubernetes_version`}}/{{ user `kubernetes_build_date` }}/bin/linux/{{ user `arch` }}", - "cni_version": "{{ user `cni_version`}}", "cni_plugin_version": "{{ user `cni_plugin_version`}}" }, "ami_name": "{{user `ami_name`}}", @@ -135,7 +133,6 @@ "BINARY_BUCKET_NAME={{user `binary_bucket_name`}}", "BINARY_BUCKET_REGION={{user `binary_bucket_region`}}", "DOCKER_VERSION={{user `docker_version`}}", - "CNI_VERSION={{user `cni_version`}}", "CNI_PLUGIN_VERSION={{user `cni_plugin_version`}}", "PULL_CNI_FROM_GITHUB={{user `pull_cni_from_github`}}", "AWS_ACCESS_KEY_ID={{user `aws_access_key_id`}}", diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index e30aeb918..7d05c20ce 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -24,7 +24,6 @@ validate_env_set() { validate_env_set BINARY_BUCKET_NAME validate_env_set BINARY_BUCKET_REGION validate_env_set DOCKER_VERSION -validate_env_set CNI_VERSION validate_env_set CNI_PLUGIN_VERSION validate_env_set KUBERNETES_VERSION validate_env_set KUBERNETES_BUILD_DATE @@ -161,7 +160,7 @@ BINARIES=( aws-iam-authenticator ) for binary in ${BINARIES[*]} ; do - if [[ ! -z "$AWS_ACCESS_KEY_ID" ]]; then + if [[ -n "$AWS_ACCESS_KEY_ID" ]]; then echo "AWS cli present - using it to copy binaries from s3." aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/$binary . aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/$binary.sha256 . @@ -175,43 +174,31 @@ for binary in ${BINARIES[*]} ; do sudo mv $binary /usr/bin/ done +# Since CNI 0.7.0, all releases are done in the plugins repo. +CNI_PLUGIN_FILENAME="cni-plugins-linux-${ARCH}-${CNI_PLUGIN_VERSION}" + if [ "$PULL_CNI_FROM_GITHUB" = "true" ]; then - echo "Downloading CNI assets from Github" - wget https://github.com/containernetworking/cni/releases/download/${CNI_VERSION}/cni-${ARCH}-${CNI_VERSION}.tgz - wget https://github.com/containernetworking/cni/releases/download/${CNI_VERSION}/cni-${ARCH}-${CNI_VERSION}.tgz.sha512 - - wget https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGIN_VERSION}/cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz - wget https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGIN_VERSION}/cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz.sha512 - sudo sha512sum -c cni-${ARCH}-${CNI_VERSION}.tgz.sha512 - sudo sha512sum -c cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz.sha512 - rm cni-${ARCH}-${CNI_VERSION}.tgz.sha512 - rm cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz.sha512 + echo "Downloading CNI plugins from Github" + wget "https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGIN_VERSION}/${CNI_PLUGIN_FILENAME}.tgz" + wget "https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGIN_VERSION}/${CNI_PLUGIN_FILENAME}.tgz.sha512" + sudo sha512sum -c "${CNI_PLUGIN_FILENAME}.tgz.sha512" + rm "${CNI_PLUGIN_FILENAME}.tgz.sha512" else - CNI_BINARIES=( - cni-${ARCH}-${CNI_VERSION}.tgz - cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz - ) - for binary in ${CNI_BINARIES[*]} ; do - if [[ ! -z "$AWS_ACCESS_KEY_ID" ]]; then - echo "AWS cli present - using it to copy binaries from s3." - aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/$binary . - aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/$binary.sha256 . - sudo sha256sum -c $binary.sha256 - else - echo "AWS cli missing - using wget to fetch cni binaries from s3. Note: This won't work for private bucket." - sudo wget $S3_URL_BASE/$binary - sudo wget $S3_URL_BASE/$binary.sha256 - fi - done + if [[ -n "$AWS_ACCESS_KEY_ID" ]]; then + echo "AWS cli present - using it to copy binaries from s3." + aws s3 cp --region "$BINARY_BUCKET_REGION $S3_PATH/${CNI_PLUGIN_FILENAME}.tgz" . + aws s3 cp --region "$BINARY_BUCKET_REGION $S3_PATH/${CNI_PLUGIN_FILENAME}.tgz.sha256" . + sudo sha256sum -c "${CNI_PLUGIN_FILENAME}.tgz.sha256" + else + echo "AWS cli missing - using wget to fetch cni binaries from s3. Note: This won't work for private bucket." + sudo wget "$S3_URL_BASE/${CNI_PLUGIN_FILENAME}.tgz" + sudo wget "$S3_URL_BASE/${CNI_PLUGIN_FILENAME}.tgz.sha256" + fi fi -sudo tar -xvf cni-${ARCH}-${CNI_VERSION}.tgz -C /opt/cni/bin -sudo tar -xvf cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz -C /opt/cni/bin -rm cni-${ARCH}-${CNI_VERSION}.tgz -rm cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz - -sudo rm *.sha256 +sudo tar -xvf "${CNI_PLUGIN_FILENAME}.tgz" -C /opt/cni/bin +rm "${CNI_PLUGIN_FILENAME}.tgz" -KUBERNETES_MINOR_VERSION=${KUBERNETES_VERSION%.*} +sudo rm ./*.sha256 sudo mkdir -p /etc/kubernetes/kubelet sudo mkdir -p /etc/systemd/system/kubelet.service.d From 72a98ef909f74ccd922f97278adfc350f3e4da57 Mon Sep 17 00:00:00 2001 From: Claes Mogren Date: Mon, 27 Jul 2020 21:19:52 -0700 Subject: [PATCH 076/621] Re-generate max-pods from API --- files/eni-max-pods.txt | 645 +++++++++++++++++++++-------------------- 1 file changed, 336 insertions(+), 309 deletions(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 057a06ea1..d65adf81f 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2020-07-15T13:04:18-07:00 +# This file was generated at 2020-07-27T21:19:16-07:00 # # Mapping is calculated from AWS EC2 API using the following formula: # * First IP on each ENI is not used for pods @@ -21,318 +21,345 @@ # # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-eni.html#AvailableIpPerENI # -a1.2xlarge 14 -a1.4xlarge 58 -a1.large 8 -a1.medium 4 -a1.metal 58 -a1.xlarge 14 -c1.medium 4 -c1.xlarge 14 -c3.2xlarge 14 -c3.4xlarge 58 -c3.8xlarge 58 -c3.large 8 -c3.xlarge 14 -c4.2xlarge 14 -c4.4xlarge 58 -c4.8xlarge 58 -c4.large 8 -c4.xlarge 14 -c5.12xlarge 58 -c5.18xlarge 212 -c5.24xlarge 212 -c5.2xlarge 14 -c5.4xlarge 58 -c5.9xlarge 58 -c5.large 8 -c5.metal 212 -c5.xlarge 14 -c5a.12xlarge 58 -c5a.16xlarge 212 -c5a.24xlarge 212 -c5a.2xlarge 14 -c5a.4xlarge 58 -c5a.8xlarge 58 -c5a.large 8 -c5a.metal 212 -c5a.xlarge 14 -c5ad.12xlarge 58 -c5ad.16xlarge 212 -c5ad.24xlarge 212 -c5ad.2xlarge 14 -c5ad.4xlarge 58 -c5ad.8xlarge 58 -c5ad.large 8 -c5ad.metal 212 -c5ad.xlarge 14 -c5d.12xlarge 58 -c5d.18xlarge 212 -c5d.24xlarge 212 -c5d.2xlarge 14 -c5d.4xlarge 58 -c5d.9xlarge 58 -c5d.large 8 -c5d.metal 212 -c5d.xlarge 14 -c5n.18xlarge 212 -c5n.2xlarge 14 -c5n.4xlarge 58 -c5n.9xlarge 58 -c5n.large 8 -c5n.metal 212 -c5n.xlarge 14 -c6g.12xlarge 58 -c6g.16xlarge 212 -c6g.2xlarge 14 -c6g.4xlarge 58 -c6g.8xlarge 58 -c6g.large 8 -c6g.medium 4 -c6g.metal 212 -c6g.xlarge 14 -cc2.8xlarge 58 -cr1.8xlarge 58 -d2.2xlarge 14 -d2.4xlarge 58 -d2.8xlarge 58 -d2.xlarge 14 -f1.16xlarge 58 -f1.2xlarge 14 -f1.4xlarge 58 -g2.2xlarge 14 -g2.8xlarge 58 -g3.16xlarge 212 -g3.4xlarge 58 -g3.8xlarge 58 -g3s.xlarge 14 -g4dn.12xlarge 58 -g4dn.16xlarge 14 -g4dn.2xlarge 8 -g4dn.4xlarge 8 -g4dn.8xlarge 14 -g4dn.metal 212 -g4dn.xlarge 8 -h1.16xlarge 212 -h1.2xlarge 14 -h1.4xlarge 58 -h1.8xlarge 58 -hs1.8xlarge 58 -i2.2xlarge 14 -i2.4xlarge 58 -i2.8xlarge 58 -i2.xlarge 14 -i3.16xlarge 212 -i3.2xlarge 14 -i3.4xlarge 58 -i3.8xlarge 58 -i3.large 8 -i3.metal 212 -i3.xlarge 14 -i3en.12xlarge 58 -i3en.24xlarge 212 -i3en.2xlarge 14 -i3en.3xlarge 14 -i3en.6xlarge 58 -i3en.large 8 -i3en.metal 212 -i3en.xlarge 14 -inf1.24xlarge 212 -inf1.2xlarge 14 -inf1.6xlarge 58 -inf1.xlarge 14 -m1.large 8 -m1.medium 4 -m1.small 4 -m1.xlarge 14 -m2.2xlarge 14 -m2.4xlarge 58 -m2.xlarge 14 -m3.2xlarge 14 -m3.large 8 -m3.medium 4 -m3.xlarge 14 -m4.10xlarge 58 -m4.16xlarge 58 -m4.2xlarge 14 -m4.4xlarge 58 -m4.large 4 -m4.xlarge 14 -m5.12xlarge 58 -m5.16xlarge 212 -m5.24xlarge 212 -m5.2xlarge 14 -m5.4xlarge 58 -m5.8xlarge 58 -m5.large 8 -m5.metal 212 -m5.xlarge 14 -m5a.12xlarge 58 -m5a.16xlarge 212 -m5a.24xlarge 212 -m5a.2xlarge 14 -m5a.4xlarge 58 -m5a.8xlarge 58 -m5a.large 8 -m5a.xlarge 14 -m5ad.12xlarge 58 -m5ad.16xlarge 212 -m5ad.24xlarge 212 -m5ad.2xlarge 14 -m5ad.4xlarge 58 -m5ad.8xlarge 58 -m5ad.large 8 -m5ad.xlarge 14 -m5d.12xlarge 58 -m5d.16xlarge 212 -m5d.24xlarge 212 -m5d.2xlarge 14 -m5d.4xlarge 58 -m5d.8xlarge 58 -m5d.large 8 -m5d.metal 212 -m5d.xlarge 14 -m5dn.12xlarge 58 -m5dn.16xlarge 212 -m5dn.24xlarge 212 -m5dn.2xlarge 14 -m5dn.4xlarge 58 -m5dn.8xlarge 58 -m5dn.large 8 -m5dn.xlarge 14 -m5n.12xlarge 58 -m5n.16xlarge 212 -m5n.24xlarge 212 -m5n.2xlarge 14 -m5n.4xlarge 58 -m5n.8xlarge 58 -m5n.large 8 -m5n.xlarge 14 -m6g.12xlarge 58 -m6g.16xlarge 212 -m6g.2xlarge 14 -m6g.4xlarge 58 -m6g.8xlarge 58 -m6g.large 8 -m6g.medium 4 -m6g.metal 212 -m6g.xlarge 14 -p2.16xlarge 58 -p2.8xlarge 58 -p2.xlarge 14 -p3.16xlarge 58 -p3.2xlarge 14 -p3.8xlarge 58 -p3dn.24xlarge 212 -r3.2xlarge 14 -r3.4xlarge 58 -r3.8xlarge 58 -r3.large 8 -r3.xlarge 14 -r4.16xlarge 212 -r4.2xlarge 14 -r4.4xlarge 58 -r4.8xlarge 58 -r4.large 8 -r4.xlarge 14 -r5.12xlarge 58 -r5.16xlarge 212 -r5.24xlarge 212 -r5.2xlarge 14 -r5.4xlarge 58 -r5.8xlarge 58 -r5.large 8 -r5.metal 212 -r5.xlarge 14 -r5a.12xlarge 58 -r5a.16xlarge 212 -r5a.24xlarge 212 -r5a.2xlarge 14 -r5a.4xlarge 58 -r5a.8xlarge 58 -r5a.large 8 -r5a.xlarge 14 -r5ad.12xlarge 58 -r5ad.16xlarge 212 -r5ad.24xlarge 212 -r5ad.2xlarge 14 -r5ad.4xlarge 58 -r5ad.8xlarge 58 -r5ad.large 8 -r5ad.xlarge 14 -r5d.12xlarge 58 -r5d.16xlarge 212 -r5d.24xlarge 212 -r5d.2xlarge 14 -r5d.4xlarge 58 -r5d.8xlarge 58 -r5d.large 8 -r5d.metal 212 -r5d.xlarge 14 -r5dn.12xlarge 58 -r5dn.16xlarge 212 -r5dn.24xlarge 212 -r5dn.2xlarge 14 -r5dn.4xlarge 58 -r5dn.8xlarge 58 -r5dn.large 8 -r5dn.xlarge 14 -r5n.12xlarge 58 -r5n.16xlarge 212 -r5n.24xlarge 212 -r5n.2xlarge 14 -r5n.4xlarge 58 -r5n.8xlarge 58 -r5n.large 8 -r5n.xlarge 14 -r6g.12xlarge 58 -r6g.16xlarge 212 -r6g.2xlarge 14 -r6g.4xlarge 58 -r6g.8xlarge 58 -r6g.large 8 -r6g.medium 4 -r6g.metal 212 -r6g.xlarge 14 +a1.2xlarge 58 +a1.4xlarge 234 +a1.large 29 +a1.medium 8 +a1.metal 234 +a1.xlarge 58 +c1.medium 12 +c1.xlarge 58 +c3.2xlarge 58 +c3.4xlarge 234 +c3.8xlarge 234 +c3.large 29 +c3.xlarge 58 +c4.2xlarge 58 +c4.4xlarge 234 +c4.8xlarge 234 +c4.large 29 +c4.xlarge 58 +c5.12xlarge 234 +c5.18xlarge 737 +c5.24xlarge 737 +c5.2xlarge 58 +c5.4xlarge 234 +c5.9xlarge 234 +c5.large 29 +c5.metal 737 +c5.xlarge 58 +c5a.12xlarge 234 +c5a.16xlarge 737 +c5a.24xlarge 737 +c5a.2xlarge 58 +c5a.4xlarge 234 +c5a.8xlarge 234 +c5a.large 29 +c5a.metal 737 +c5a.xlarge 58 +c5ad.12xlarge 234 +c5ad.16xlarge 737 +c5ad.24xlarge 737 +c5ad.2xlarge 58 +c5ad.4xlarge 234 +c5ad.8xlarge 234 +c5ad.large 29 +c5ad.metal 737 +c5ad.xlarge 58 +c5d.12xlarge 234 +c5d.18xlarge 737 +c5d.24xlarge 737 +c5d.2xlarge 58 +c5d.4xlarge 234 +c5d.9xlarge 234 +c5d.large 29 +c5d.metal 737 +c5d.xlarge 58 +c5n.18xlarge 737 +c5n.2xlarge 58 +c5n.4xlarge 234 +c5n.9xlarge 234 +c5n.large 29 +c5n.metal 737 +c5n.xlarge 58 +c6g.12xlarge 234 +c6g.16xlarge 737 +c6g.2xlarge 58 +c6g.4xlarge 234 +c6g.8xlarge 234 +c6g.large 29 +c6g.medium 8 +c6g.metal 737 +c6g.xlarge 58 +c6gd.12xlarge 234 +c6gd.16xlarge 737 +c6gd.2xlarge 58 +c6gd.4xlarge 234 +c6gd.8xlarge 234 +c6gd.large 29 +c6gd.medium 8 +c6gd.metal 737 +c6gd.xlarge 58 +cc2.8xlarge 234 +cr1.8xlarge 234 +d2.2xlarge 58 +d2.4xlarge 234 +d2.8xlarge 234 +d2.xlarge 58 +f1.16xlarge 394 +f1.2xlarge 58 +f1.4xlarge 234 +g2.2xlarge 58 +g2.8xlarge 234 +g3.16xlarge 737 +g3.4xlarge 234 +g3.8xlarge 234 +g3s.xlarge 58 +g4dn.12xlarge 234 +g4dn.16xlarge 58 +g4dn.2xlarge 29 +g4dn.4xlarge 29 +g4dn.8xlarge 58 +g4dn.metal 737 +g4dn.xlarge 29 +h1.16xlarge 737 +h1.2xlarge 58 +h1.4xlarge 234 +h1.8xlarge 234 +hs1.8xlarge 234 +i2.2xlarge 58 +i2.4xlarge 234 +i2.8xlarge 234 +i2.xlarge 58 +i3.16xlarge 737 +i3.2xlarge 58 +i3.4xlarge 234 +i3.8xlarge 234 +i3.large 29 +i3.metal 737 +i3.xlarge 58 +i3en.12xlarge 234 +i3en.24xlarge 737 +i3en.2xlarge 58 +i3en.3xlarge 58 +i3en.6xlarge 234 +i3en.large 29 +i3en.metal 737 +i3en.xlarge 58 +inf1.24xlarge 437 +inf1.2xlarge 38 +inf1.6xlarge 234 +inf1.xlarge 38 +m1.large 29 +m1.medium 12 +m1.small 8 +m1.xlarge 58 +m2.2xlarge 118 +m2.4xlarge 234 +m2.xlarge 58 +m3.2xlarge 118 +m3.large 29 +m3.medium 12 +m3.xlarge 58 +m4.10xlarge 234 +m4.16xlarge 234 +m4.2xlarge 58 +m4.4xlarge 234 +m4.large 20 +m4.xlarge 58 +m5.12xlarge 234 +m5.16xlarge 737 +m5.24xlarge 737 +m5.2xlarge 58 +m5.4xlarge 234 +m5.8xlarge 234 +m5.large 29 +m5.metal 737 +m5.xlarge 58 +m5a.12xlarge 234 +m5a.16xlarge 737 +m5a.24xlarge 737 +m5a.2xlarge 58 +m5a.4xlarge 234 +m5a.8xlarge 234 +m5a.large 29 +m5a.xlarge 58 +m5ad.12xlarge 234 +m5ad.16xlarge 737 +m5ad.24xlarge 737 +m5ad.2xlarge 58 +m5ad.4xlarge 234 +m5ad.8xlarge 234 +m5ad.large 29 +m5ad.xlarge 58 +m5d.12xlarge 234 +m5d.16xlarge 737 +m5d.24xlarge 737 +m5d.2xlarge 58 +m5d.4xlarge 234 +m5d.8xlarge 234 +m5d.large 29 +m5d.metal 737 +m5d.xlarge 58 +m5dn.12xlarge 234 +m5dn.16xlarge 737 +m5dn.24xlarge 737 +m5dn.2xlarge 58 +m5dn.4xlarge 234 +m5dn.8xlarge 234 +m5dn.large 29 +m5dn.xlarge 58 +m5n.12xlarge 234 +m5n.16xlarge 737 +m5n.24xlarge 737 +m5n.2xlarge 58 +m5n.4xlarge 234 +m5n.8xlarge 234 +m5n.large 29 +m5n.xlarge 58 +m6g.12xlarge 234 +m6g.16xlarge 737 +m6g.2xlarge 58 +m6g.4xlarge 234 +m6g.8xlarge 234 +m6g.large 29 +m6g.medium 8 +m6g.metal 737 +m6g.xlarge 58 +m6gd.12xlarge 234 +m6gd.16xlarge 737 +m6gd.2xlarge 58 +m6gd.4xlarge 234 +m6gd.8xlarge 234 +m6gd.large 29 +m6gd.medium 8 +m6gd.metal 737 +m6gd.xlarge 58 +p2.16xlarge 234 +p2.8xlarge 234 +p2.xlarge 58 +p3.16xlarge 234 +p3.2xlarge 58 +p3.8xlarge 234 +p3dn.24xlarge 737 +r3.2xlarge 58 +r3.4xlarge 234 +r3.8xlarge 234 +r3.large 29 +r3.xlarge 58 +r4.16xlarge 737 +r4.2xlarge 58 +r4.4xlarge 234 +r4.8xlarge 234 +r4.large 29 +r4.xlarge 58 +r5.12xlarge 234 +r5.16xlarge 737 +r5.24xlarge 737 +r5.2xlarge 58 +r5.4xlarge 234 +r5.8xlarge 234 +r5.large 29 +r5.metal 737 +r5.xlarge 58 +r5a.12xlarge 234 +r5a.16xlarge 737 +r5a.24xlarge 737 +r5a.2xlarge 58 +r5a.4xlarge 234 +r5a.8xlarge 234 +r5a.large 29 +r5a.xlarge 58 +r5ad.12xlarge 234 +r5ad.16xlarge 737 +r5ad.24xlarge 737 +r5ad.2xlarge 58 +r5ad.4xlarge 234 +r5ad.8xlarge 234 +r5ad.large 29 +r5ad.xlarge 58 +r5d.12xlarge 234 +r5d.16xlarge 737 +r5d.24xlarge 737 +r5d.2xlarge 58 +r5d.4xlarge 234 +r5d.8xlarge 234 +r5d.large 29 +r5d.metal 737 +r5d.xlarge 58 +r5dn.12xlarge 234 +r5dn.16xlarge 737 +r5dn.24xlarge 737 +r5dn.2xlarge 58 +r5dn.4xlarge 234 +r5dn.8xlarge 234 +r5dn.large 29 +r5dn.xlarge 58 +r5n.12xlarge 234 +r5n.16xlarge 737 +r5n.24xlarge 737 +r5n.2xlarge 58 +r5n.4xlarge 234 +r5n.8xlarge 234 +r5n.large 29 +r5n.xlarge 58 +r6g.12xlarge 234 +r6g.16xlarge 737 +r6g.2xlarge 58 +r6g.4xlarge 234 +r6g.8xlarge 234 +r6g.large 29 +r6g.medium 8 +r6g.metal 737 +r6g.xlarge 58 +r6gd.12xlarge 234 +r6gd.16xlarge 737 +r6gd.2xlarge 58 +r6gd.4xlarge 234 +r6gd.8xlarge 234 +r6gd.large 29 +r6gd.medium 8 +r6gd.metal 737 +r6gd.xlarge 58 t1.micro 4 -t2.2xlarge 8 -t2.large 8 -t2.medium 8 +t2.2xlarge 44 +t2.large 35 +t2.medium 17 t2.micro 4 t2.nano 4 -t2.small 8 -t2.xlarge 8 -t3.2xlarge 14 -t3.large 8 -t3.medium 8 +t2.small 11 +t2.xlarge 44 +t3.2xlarge 58 +t3.large 35 +t3.medium 17 t3.micro 4 t3.nano 4 -t3.small 8 -t3.xlarge 14 -t3a.2xlarge 14 -t3a.large 8 -t3a.medium 8 +t3.small 11 +t3.xlarge 58 +t3a.2xlarge 58 +t3a.large 35 +t3a.medium 17 t3a.micro 4 t3a.nano 4 -t3a.small 4 -t3a.xlarge 14 -u-12tb1.metal 22 -u-18tb1.metal 212 -u-24tb1.metal 212 -u-6tb1.metal 22 -u-9tb1.metal 22 -x1.16xlarge 58 -x1.32xlarge 58 -x1e.16xlarge 58 -x1e.2xlarge 14 -x1e.32xlarge 58 -x1e.4xlarge 14 -x1e.8xlarge 14 -x1e.xlarge 8 -z1d.12xlarge 212 -z1d.2xlarge 14 -z1d.3xlarge 58 -z1d.6xlarge 58 -z1d.large 8 -z1d.metal 212 -z1d.xlarge 14 +t3a.small 8 +t3a.xlarge 58 +u-12tb1.metal 147 +u-18tb1.metal 737 +u-24tb1.metal 737 +u-6tb1.metal 147 +u-9tb1.metal 147 +x1.16xlarge 234 +x1.32xlarge 234 +x1e.16xlarge 234 +x1e.2xlarge 58 +x1e.32xlarge 234 +x1e.4xlarge 58 +x1e.8xlarge 58 +x1e.xlarge 29 +z1d.12xlarge 737 +z1d.2xlarge 58 +z1d.3xlarge 234 +z1d.6xlarge 234 +z1d.large 29 +z1d.metal 737 +z1d.xlarge 58 From 5593de80211161937be8c6b39df9495b3b2b99f0 Mon Sep 17 00:00:00 2001 From: Nathan Prabhu Date: Wed, 29 Jul 2020 16:48:24 -0500 Subject: [PATCH 077/621] update pause container to use multi-arch repo (#517) Co-authored-by: Nathan Prabhu --- files/bootstrap.sh | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index b42ad12d2..f0b7924e5 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -104,7 +104,7 @@ KUBELET_EXTRA_ARGS="${KUBELET_EXTRA_ARGS:-}" ENABLE_DOCKER_BRIDGE="${ENABLE_DOCKER_BRIDGE:-false}" API_RETRY_ATTEMPTS="${API_RETRY_ATTEMPTS:-3}" DOCKER_CONFIG_JSON="${DOCKER_CONFIG_JSON:-}" -PAUSE_CONTAINER_VERSION="${PAUSE_CONTAINER_VERSION:-3.1}" +PAUSE_CONTAINER_VERSION="${PAUSE_CONTAINER_VERSION:-3.1-eksbuild.1}" function get_pause_container_account_for_region () { local region="$1" @@ -204,17 +204,13 @@ AWS_DEFAULT_REGION=$(curl -s --retry 5 -H "X-aws-ec2-metadata-token: $TOKEN" htt AWS_SERVICES_DOMAIN=$(curl -s --retry 5 -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/2018-09-24/meta-data/services/domain) MACHINE=$(uname -m) -if [ "$MACHINE" == "x86_64" ]; then - ARCH="amd64" -elif [ "$MACHINE" == "aarch64" ]; then - ARCH="arm64" -else +if [[ "$MACHINE" != "x86_64" && "$MACHINE" != "aarch64" ]]; then echo "Unknown machine architecture '$MACHINE'" >&2 exit 1 fi PAUSE_CONTAINER_ACCOUNT=$(get_pause_container_account_for_region "${AWS_DEFAULT_REGION}") -PAUSE_CONTAINER_IMAGE=${PAUSE_CONTAINER_IMAGE:-$PAUSE_CONTAINER_ACCOUNT.dkr.ecr.$AWS_DEFAULT_REGION.$AWS_SERVICES_DOMAIN/eks/pause-${ARCH}} +PAUSE_CONTAINER_IMAGE=${PAUSE_CONTAINER_IMAGE:-$PAUSE_CONTAINER_ACCOUNT.dkr.ecr.$AWS_DEFAULT_REGION.$AWS_SERVICES_DOMAIN/eks/pause} PAUSE_CONTAINER="$PAUSE_CONTAINER_IMAGE:$PAUSE_CONTAINER_VERSION" ### kubelet kubeconfig From ee2e42cd7693206ef93191ca85cc3878e02092fc Mon Sep 17 00:00:00 2001 From: Bronson Mirafuentes Date: Tue, 4 Aug 2020 13:42:09 -0700 Subject: [PATCH 078/621] Fix typo in CNI Plugin pull command from S3 (#520) --- scripts/install-worker.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 7d05c20ce..f99a4f290 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -186,8 +186,8 @@ if [ "$PULL_CNI_FROM_GITHUB" = "true" ]; then else if [[ -n "$AWS_ACCESS_KEY_ID" ]]; then echo "AWS cli present - using it to copy binaries from s3." - aws s3 cp --region "$BINARY_BUCKET_REGION $S3_PATH/${CNI_PLUGIN_FILENAME}.tgz" . - aws s3 cp --region "$BINARY_BUCKET_REGION $S3_PATH/${CNI_PLUGIN_FILENAME}.tgz.sha256" . + aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/${CNI_PLUGIN_FILENAME}.tgz . + aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/${CNI_PLUGIN_FILENAME}.tgz.sha256 . sudo sha256sum -c "${CNI_PLUGIN_FILENAME}.tgz.sha256" else echo "AWS cli missing - using wget to fetch cni binaries from s3. Note: This won't work for private bucket." From 0d325c249c580e226071d20090c9b6e46a09b5b4 Mon Sep 17 00:00:00 2001 From: Vishal Gupta Date: Thu, 13 Aug 2020 15:18:05 -0700 Subject: [PATCH 079/621] Adding new ARM instance types --- amazon-eks-nodegroup.yaml | 90 +++++++++++++++++++++++++++++++++++---- 1 file changed, 81 insertions(+), 9 deletions(-) diff --git a/amazon-eks-nodegroup.yaml b/amazon-eks-nodegroup.yaml index d1f1b159b..af85646ed 100644 --- a/amazon-eks-nodegroup.yaml +++ b/amazon-eks-nodegroup.yaml @@ -23,6 +23,7 @@ Metadata: - NodeVolumeSize - KeyName - BootstrapArguments + - DisableIMDSv1 - Label: default: Worker Network Configuration Parameters: @@ -73,8 +74,15 @@ Parameters: NodeImageIdSSMParam: Type: "AWS::SSM::Parameter::Value" - Default: /aws/service/eks/optimized-ami/1.14/amazon-linux-2/recommended/image_id - Description: AWS Systems Manager Parameter Store parameter of the AMI ID for the worker node instances. + Default: /aws/service/eks/optimized-ami/1.17/amazon-linux-2/recommended/image_id + Description: AWS Systems Manager Parameter Store parameter of the AMI ID for the worker node instances. Change this value to match the version of Kubernetes you are using. + + DisableIMDSv1: + Type: String + Default: "false" + AllowedValues: + - "false" + - "true" NodeInstanceType: Type: String @@ -121,6 +129,24 @@ Parameters: - c5n.4xlarge - c5n.9xlarge - c5n.18xlarge + - c6g.medium + - c6g.large + - c6g.xlarge + - c6g.2xlarge + - c6g.4xlarge + - c6g.8xlarge + - c6g.12xlarge + - c6g.16xlarge + - c6g.metal + - c6gd.medium + - c6gd.large + - c6gd.xlarge + - c6gd.2xlarge + - c6gd.4xlarge + - c6gd.8xlarge + - c6gd.12xlarge + - c6gd.16xlarge + - c6gd.metal - cc2.8xlarge - cr1.8xlarge - d2.xlarge @@ -228,6 +254,24 @@ Parameters: - m5n.12xlarge - m5n.16xlarge - m5n.24xlarge + - m6g.medium + - m6g.large + - m6g.xlarge + - m6g.2xlarge + - m6g.4xlarge + - m6g.8xlarge + - m6g.12xlarge + - m6g.16xlarge + - m6g.metal + - m6gd.medium + - m6gd.large + - m6gd.xlarge + - m6gd.2xlarge + - m6gd.4xlarge + - m6gd.8xlarge + - m6gd.12xlarge + - m6gd.16xlarge + - m6gd.metal - p2.xlarge - p2.8xlarge - p2.16xlarge @@ -301,6 +345,24 @@ Parameters: - r5n.12xlarge - r5n.16xlarge - r5n.24xlarge + - r6g.medium + - r6g.large + - r6g.xlarge + - r6g.2xlarge + - r6g.4xlarge + - r6g.8xlarge + - r6g.12xlarge + - r6g.16xlarge + - r6g.metal + - r6gd.medium + - r6gd.large + - r6gd.xlarge + - r6gd.2xlarge + - r6gd.4xlarge + - r6gd.8xlarge + - r6gd.12xlarge + - r6gd.16xlarge + - r6gd.metal - t1.micro - t2.nano - t2.micro @@ -373,8 +435,13 @@ Mappings: Conditions: HasNodeImageId: !Not - "Fn::Equals": - - Ref: NodeImageId - - "" + - !Ref NodeImageId + - "" + + IMDSv1Disabled: + "Fn::Equals": + - !Ref DisableIMDSv1 + - "true" Resources: NodeInstanceRole: @@ -400,7 +467,7 @@ Resources: Properties: Path: / Roles: - - Ref: NodeInstanceRole + - !Ref NodeInstanceRole NodeSecurityGroup: Type: "AWS::EC2::SecurityGroup" @@ -491,12 +558,12 @@ Resources: Arn: !GetAtt NodeInstanceProfile.Arn ImageId: !If - HasNodeImageId - - Ref: NodeImageId - - Ref: NodeImageIdSSMParam + - !Ref NodeImageId + - !Ref NodeImageIdSSMParam InstanceType: !Ref NodeInstanceType KeyName: !Ref KeyName SecurityGroupIds: - - Ref: NodeSecurityGroup + - !Ref NodeSecurityGroup UserData: !Base64 "Fn::Sub": | #!/bin/bash @@ -507,7 +574,12 @@ Resources: --resource NodeGroup \ --region ${AWS::Region} MetadataOptions: - "HttpPutResponseHopLimit" : 2 + HttpPutResponseHopLimit : 2 + HttpEndpoint: enabled + HttpTokens: !If + - IMDSv1Disabled + - required + - optional NodeGroup: Type: "AWS::AutoScaling::AutoScalingGroup" From 88648213079b4c0f70fe69c479724519327d7ae1 Mon Sep 17 00:00:00 2001 From: Laxmi Soumya Josyula <42261978+ljosyula@users.noreply.github.com> Date: Thu, 20 Aug 2020 15:15:11 -0700 Subject: [PATCH 080/621] AMI release to get Cluster DNS IP from SCIDR for Self Managed Nodegroups (#525) Co-authored-by: Laxmi Soumya Josyula --- files/bootstrap.sh | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index f0b7924e5..1b5dc94cb 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -100,6 +100,7 @@ set -u USE_MAX_PODS="${USE_MAX_PODS:-true}" B64_CLUSTER_CA="${B64_CLUSTER_CA:-}" APISERVER_ENDPOINT="${APISERVER_ENDPOINT:-}" +DNS_CLUSTER_IP="${DNS_CLUSTER_IP:-}" KUBELET_EXTRA_ARGS="${KUBELET_EXTRA_ARGS:-}" ENABLE_DOCKER_BRIDGE="${ENABLE_DOCKER_BRIDGE:-false}" API_RETRY_ATTEMPTS="${API_RETRY_ATTEMPTS:-3}" @@ -218,7 +219,7 @@ PAUSE_CONTAINER="$PAUSE_CONTAINER_IMAGE:$PAUSE_CONTAINER_VERSION" CA_CERTIFICATE_DIRECTORY=/etc/kubernetes/pki CA_CERTIFICATE_FILE_PATH=$CA_CERTIFICATE_DIRECTORY/ca.crt mkdir -p $CA_CERTIFICATE_DIRECTORY -if [[ -z "${B64_CLUSTER_CA}" ]] && [[ -z "${APISERVER_ENDPOINT}" ]]; then +if [[ -z "${DNS_CLUSTER_IP}" ]] || [[ -z "${B64_CLUSTER_CA}" ]] || [[ -z "${APISERVER_ENDPOINT}" ]]; then DESCRIBE_CLUSTER_RESULT="/tmp/describe_cluster_result.txt" # Retry the DescribeCluster API for API_RETRY_ATTEMPTS @@ -236,7 +237,7 @@ if [[ -z "${B64_CLUSTER_CA}" ]] && [[ -z "${APISERVER_ENDPOINT}" ]]; then --region=${AWS_DEFAULT_REGION} \ --name=${CLUSTER_NAME} \ --output=text \ - --query 'cluster.{certificateAuthorityData: certificateAuthority.data, endpoint: endpoint}' > $DESCRIBE_CLUSTER_RESULT || rc=$? + --query 'cluster.{certificateAuthorityData: certificateAuthority.data, endpoint: endpoint, kubernetesNetworkConfig: kubernetesNetworkConfig.serviceIpv4Cidr}' > $DESCRIBE_CLUSTER_RESULT || rc=$? if [[ $rc -eq 0 ]]; then break fi @@ -249,6 +250,7 @@ if [[ -z "${B64_CLUSTER_CA}" ]] && [[ -z "${APISERVER_ENDPOINT}" ]]; then done B64_CLUSTER_CA=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $1}') APISERVER_ENDPOINT=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $2}') + SERVICE_IPV4_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $3}') fi echo $B64_CLUSTER_CA | base64 -d > $CA_CERTIFICATE_FILE_PATH @@ -258,15 +260,20 @@ sed -i s,MASTER_ENDPOINT,$APISERVER_ENDPOINT,g /var/lib/kubelet/kubeconfig sed -i s,AWS_REGION,$AWS_DEFAULT_REGION,g /var/lib/kubelet/kubeconfig ### kubelet.service configuration -if [ -z ${DNS_CLUSTER_IP+x} ]; then +if [[ -z "${DNS_CLUSTER_IP}" ]]; then + if [[ ! -z "${SERVICE_IPV4_CIDR}" ]] && [[ "${SERVICE_IPV4_CIDR}" != "None" ]] ; then + #Sets the DNS Cluster IP address that would be chosen from the serviceIpv4Cidr. (x.y.z.10) + DNS_CLUSTER_IP=${SERVICE_IPV4_CIDR%.*}.10 + else MAC=$(curl -H "X-aws-ec2-metadata-token: $TOKEN" -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/ -s | head -n 1 | sed 's/\/$//') TEN_RANGE=$(curl -H "X-aws-ec2-metadata-token: $TOKEN" -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/$MAC/vpc-ipv4-cidr-blocks | grep -c '^10\..*' || true ) DNS_CLUSTER_IP=10.100.0.10 if [[ "$TEN_RANGE" != "0" ]]; then - DNS_CLUSTER_IP=172.20.0.10 + DNS_CLUSTER_IP=172.20.0.10 fi + fi else - DNS_CLUSTER_IP="${DNS_CLUSTER_IP}" + DNS_CLUSTER_IP="${DNS_CLUSTER_IP}" fi KUBELET_CONFIG=/etc/kubernetes/kubelet/kubelet-config.json From 645470d824044237aafefa900a90360fc4c6cf41 Mon Sep 17 00:00:00 2001 From: Laxmi Soumya Josyula <42261978+ljosyula@users.noreply.github.com> Date: Wed, 2 Sep 2020 17:05:27 -0700 Subject: [PATCH 081/621] Making describe call only when one of or both the CA value or endpoint are missing (#534) Co-authored-by: Laxmi Soumya Josyula --- files/bootstrap.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 1b5dc94cb..4b3dc26c8 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -100,6 +100,7 @@ set -u USE_MAX_PODS="${USE_MAX_PODS:-true}" B64_CLUSTER_CA="${B64_CLUSTER_CA:-}" APISERVER_ENDPOINT="${APISERVER_ENDPOINT:-}" +SERVICE_IPV4_CIDR="${SERVICE_IPV4_CIDR:-}" DNS_CLUSTER_IP="${DNS_CLUSTER_IP:-}" KUBELET_EXTRA_ARGS="${KUBELET_EXTRA_ARGS:-}" ENABLE_DOCKER_BRIDGE="${ENABLE_DOCKER_BRIDGE:-false}" @@ -219,7 +220,7 @@ PAUSE_CONTAINER="$PAUSE_CONTAINER_IMAGE:$PAUSE_CONTAINER_VERSION" CA_CERTIFICATE_DIRECTORY=/etc/kubernetes/pki CA_CERTIFICATE_FILE_PATH=$CA_CERTIFICATE_DIRECTORY/ca.crt mkdir -p $CA_CERTIFICATE_DIRECTORY -if [[ -z "${DNS_CLUSTER_IP}" ]] || [[ -z "${B64_CLUSTER_CA}" ]] || [[ -z "${APISERVER_ENDPOINT}" ]]; then +if [[ -z "${B64_CLUSTER_CA}" ]] || [[ -z "${APISERVER_ENDPOINT}" ]]; then DESCRIBE_CLUSTER_RESULT="/tmp/describe_cluster_result.txt" # Retry the DescribeCluster API for API_RETRY_ATTEMPTS From 8a1b744a575d88f8137a823f860850838db6ac95 Mon Sep 17 00:00:00 2001 From: cmdallas Date: Wed, 2 Sep 2020 19:45:12 -0700 Subject: [PATCH 082/621] fix(install-worker): add s3 domains for isolated regions (#518) --- scripts/install-worker.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index f99a4f290..826b28d98 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -151,6 +151,10 @@ echo "Downloading binaries from: s3://$BINARY_BUCKET_NAME" S3_DOMAIN="amazonaws.com" if [ "$BINARY_BUCKET_REGION" = "cn-north-1" ] || [ "$BINARY_BUCKET_REGION" = "cn-northwest-1" ]; then S3_DOMAIN="amazonaws.com.cn" +elif [ "$BINARY_BUCKET_REGION" = "us-iso-east-1" ]; then + S3_DOMAIN="c2s.ic.gov" +elif [ "$BINARY_BUCKET_REGION" = "us-isob-east-1" ]; then + S3_DOMAIN="sc2s.sgov.gov" fi S3_URL_BASE="https://$BINARY_BUCKET_NAME.s3.$BINARY_BUCKET_REGION.$S3_DOMAIN/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/linux/$ARCH" S3_PATH="s3://$BINARY_BUCKET_NAME/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/linux/$ARCH" From 68dce97bf58884cf3043ae0e077fad48a31c6700 Mon Sep 17 00:00:00 2001 From: sehayoun <70663169+sehayoun@users.noreply.github.com> Date: Wed, 2 Sep 2020 21:26:58 -0700 Subject: [PATCH 083/621] Allow to pass source ami owner id (#533) --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 8699a89f1..faf2bf7ec 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,7 @@ K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) aws_region ?= $(AWS_DEFAULT_REGION) +source_ami_owners ?= $(SOURCE_AMI_OWNERS) binary_bucket_region ?= $(AWS_DEFAULT_REGION) ami_name ?= amazon-eks-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') arch ?= x86_64 From 3fb57eca91ba2bc96c20fe6d3240a8b23afda12b Mon Sep 17 00:00:00 2001 From: sehayoun <70663169+sehayoun@users.noreply.github.com> Date: Thu, 3 Sep 2020 14:47:35 -0700 Subject: [PATCH 084/621] Revert "Allow to pass source ami owner id" (#538) This reverts commit 004e7dc23622bccac4e40fae1897cddaa85a4673. --- Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/Makefile b/Makefile index faf2bf7ec..8699a89f1 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,6 @@ K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) aws_region ?= $(AWS_DEFAULT_REGION) -source_ami_owners ?= $(SOURCE_AMI_OWNERS) binary_bucket_region ?= $(AWS_DEFAULT_REGION) ami_name ?= amazon-eks-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') arch ?= x86_64 From 1affbd8ccc6cd977f5b73c303887bb79174a1957 Mon Sep 17 00:00:00 2001 From: Claes Mogren Date: Thu, 3 Sep 2020 14:19:13 -0700 Subject: [PATCH 085/621] Fix formula comment --- files/eni-max-pods.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index d65adf81f..89f566a85 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,11 +11,11 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2020-07-27T21:19:16-07:00 +# This file was generated at 2020-09-03T14:12:27-07:00 # # Mapping is calculated from AWS EC2 API using the following formula: # * First IP on each ENI is not used for pods -# * 2 additional host-networking pods (AWS ENI and kube-proxy) are accounted for +# * +2 for for the pods that use host-networking (AWS CNI and kube-proxy) # # # of ENI * (# of IPv4 per ENI - 1) + 2 # From 07dd954f09084c46d8c570f010c529ea1ad48027 Mon Sep 17 00:00:00 2001 From: Claes Mogren Date: Tue, 15 Sep 2020 19:09:04 -0700 Subject: [PATCH 086/621] Add t4g instance type --- files/eni-max-pods.txt | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 89f566a85..a4e10753a 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2020-09-03T14:12:27-07:00 +# This file was generated at 2020-09-15T18:15:46-07:00 # # Mapping is calculated from AWS EC2 API using the following formula: # * First IP on each ENI is not used for pods @@ -146,7 +146,7 @@ i3en.6xlarge 234 i3en.large 29 i3en.metal 737 i3en.xlarge 58 -inf1.24xlarge 437 +inf1.24xlarge 321 inf1.2xlarge 38 inf1.6xlarge 234 inf1.xlarge 38 @@ -343,6 +343,13 @@ t3a.micro 4 t3a.nano 4 t3a.small 8 t3a.xlarge 58 +t4g.2xlarge 58 +t4g.large 35 +t4g.medium 17 +t4g.micro 4 +t4g.nano 4 +t4g.small 11 +t4g.xlarge 58 u-12tb1.metal 147 u-18tb1.metal 737 u-24tb1.metal 737 From 9c632e446b684bcbb89b1c4a382b9393eae04046 Mon Sep 17 00:00:00 2001 From: Joshua Powers Date: Wed, 23 Sep 2020 08:29:26 -0700 Subject: [PATCH 087/621] eni-max-pods.txt: remove extra 'for' --- files/eni-max-pods.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index a4e10753a..01cbc0955 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -15,7 +15,7 @@ # # Mapping is calculated from AWS EC2 API using the following formula: # * First IP on each ENI is not used for pods -# * +2 for for the pods that use host-networking (AWS CNI and kube-proxy) +# * +2 for the pods that use host-networking (AWS CNI and kube-proxy) # # # of ENI * (# of IPv4 per ENI - 1) + 2 # From 6d02109f831dcedeb52c91be37c63fe902ab9bbe Mon Sep 17 00:00:00 2001 From: Claes Mogren Date: Wed, 30 Sep 2020 16:02:38 -0700 Subject: [PATCH 088/621] Add support for p4d.24xlarge --- files/eni-max-pods.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 01cbc0955..4c44fce90 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2020-09-15T18:15:46-07:00 +# This file was generated at 2020-09-30T15:27:32-07:00 # # Mapping is calculated from AWS EC2 API using the following formula: # * First IP on each ENI is not used for pods @@ -242,6 +242,7 @@ p3.16xlarge 234 p3.2xlarge 58 p3.8xlarge 234 p3dn.24xlarge 737 +p4d.24xlarge 2942 r3.2xlarge 58 r3.4xlarge 234 r3.8xlarge 234 From 774f2ddae482f3fc44a7ec03929f6ea9d04b113f Mon Sep 17 00:00:00 2001 From: Vishal Gupta Date: Mon, 12 Oct 2020 10:43:32 -0700 Subject: [PATCH 089/621] Including 1.18 (#549) Co-authored-by: Vishal Gupta --- Makefile | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 8699a89f1..70a587c43 100644 --- a/Makefile +++ b/Makefile @@ -6,12 +6,13 @@ K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS aws_region ?= $(AWS_DEFAULT_REGION) binary_bucket_region ?= $(AWS_DEFAULT_REGION) -ami_name ?= amazon-eks-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') arch ?= x86_64 ifeq ($(arch), arm64) instance_type ?= a1.large +ami_name ?= amazon-eks-arm64-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') else instance_type ?= m4.large +ami_name ?= amazon-eks-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') endif ifeq ($(aws_region), cn-northwest-1) @@ -43,16 +44,20 @@ k8s: validate .PHONY: 1.14 1.14: - $(MAKE) k8s kubernetes_version=1.14.9 kubernetes_build_date=2020-07-08 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.14.9 kubernetes_build_date=2020-09-18 pull_cni_from_github=true .PHONY: 1.15 1.15: - $(MAKE) k8s kubernetes_version=1.15.11 kubernetes_build_date=2020-07-08 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.15.11 kubernetes_build_date=2020-09-18 pull_cni_from_github=true .PHONY: 1.16 1.16: - $(MAKE) k8s kubernetes_version=1.16.12 kubernetes_build_date=2020-07-08 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.16.13 kubernetes_build_date=2020-09-18 pull_cni_from_github=true .PHONY: 1.17 1.17: - $(MAKE) k8s kubernetes_version=1.17.7 kubernetes_build_date=2020-07-08 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.17.11 kubernetes_build_date=2020-09-18 pull_cni_from_github=true + +.PHONY: 1.18 +1.18: + $(MAKE) k8s kubernetes_version=1.18.8 kubernetes_build_date=2020-09-18 pull_cni_from_github=true From 5c5a5c536045f80bb2d1ab2a3edab3a1eb0ffb10 Mon Sep 17 00:00:00 2001 From: Vishal Gupta Date: Wed, 21 Oct 2020 14:12:15 -0700 Subject: [PATCH 090/621] Adding t4g instance types + some missing instances types (#552) Co-authored-by: Vishal Gupta --- amazon-eks-nodegroup.yaml | 367 +++++++++++++++++++++----------------- 1 file changed, 201 insertions(+), 166 deletions(-) diff --git a/amazon-eks-nodegroup.yaml b/amazon-eks-nodegroup.yaml index af85646ed..aa4e39ee0 100644 --- a/amazon-eks-nodegroup.yaml +++ b/amazon-eks-nodegroup.yaml @@ -88,321 +88,356 @@ Parameters: Type: String Default: t3.medium AllowedValues: - - a1.medium - - a1.large - - a1.xlarge - a1.2xlarge - a1.4xlarge + - a1.large + - a1.medium + - a1.metal + - a1.xlarge - c1.medium - c1.xlarge - - c3.large - - c3.xlarge - c3.2xlarge - c3.4xlarge - c3.8xlarge - - c4.large - - c4.xlarge + - c3.large + - c3.xlarge - c4.2xlarge - c4.4xlarge - c4.8xlarge - - c5.large - - c5.xlarge - - c5.2xlarge - - c5.4xlarge - - c5.9xlarge + - c4.large + - c4.xlarge - c5.12xlarge - c5.18xlarge - c5.24xlarge + - c5.2xlarge + - c5.4xlarge + - c5.9xlarge + - c5.large - c5.metal - - c5d.large - - c5d.xlarge - - c5d.2xlarge - - c5d.4xlarge - - c5d.9xlarge + - c5.xlarge + - c5a.12xlarge + - c5a.16xlarge + - c5a.24xlarge + - c5a.2xlarge + - c5a.4xlarge + - c5a.8xlarge + - c5a.large + - c5a.metal + - c5a.xlarge + - c5ad.12xlarge + - c5ad.16xlarge + - c5ad.24xlarge + - c5ad.2xlarge + - c5ad.4xlarge + - c5ad.8xlarge + - c5ad.large + - c5ad.metal + - c5ad.xlarge - c5d.12xlarge - c5d.18xlarge - c5d.24xlarge + - c5d.2xlarge + - c5d.4xlarge + - c5d.9xlarge + - c5d.large - c5d.metal - - c5n.large - - c5n.xlarge + - c5d.xlarge + - c5n.18xlarge - c5n.2xlarge - c5n.4xlarge - c5n.9xlarge - - c5n.18xlarge - - c6g.medium - - c6g.large - - c6g.xlarge + - c5n.large + - c5n.metal + - c5n.xlarge + - c6g.12xlarge + - c6g.16xlarge - c6g.2xlarge - c6g.4xlarge - c6g.8xlarge - - c6g.12xlarge - - c6g.16xlarge + - c6g.large + - c6g.medium - c6g.metal - - c6gd.medium - - c6gd.large - - c6gd.xlarge + - c6g.xlarge + - c6gd.12xlarge + - c6gd.16xlarge - c6gd.2xlarge - c6gd.4xlarge - c6gd.8xlarge - - c6gd.12xlarge - - c6gd.16xlarge + - c6gd.large + - c6gd.medium - c6gd.metal + - c6gd.xlarge - cc2.8xlarge - cr1.8xlarge - - d2.xlarge - d2.2xlarge - d2.4xlarge - d2.8xlarge + - d2.xlarge + - f1.16xlarge - f1.2xlarge - f1.4xlarge - - f1.16xlarge - g2.2xlarge - g2.8xlarge - - g3s.xlarge + - g3.16xlarge - g3.4xlarge - g3.8xlarge - - g3.16xlarge + - g3s.xlarge + - g4dn.12xlarge + - g4dn.16xlarge + - g4dn.2xlarge + - g4dn.4xlarge + - g4dn.8xlarge + - g4dn.metal + - g4dn.xlarge + - h1.16xlarge - h1.2xlarge - h1.4xlarge - h1.8xlarge - - h1.16xlarge - hs1.8xlarge - - i2.xlarge - i2.2xlarge - i2.4xlarge - i2.8xlarge - - i3.large - - i3.xlarge + - i2.xlarge + - i3.16xlarge - i3.2xlarge - i3.4xlarge - i3.8xlarge - - i3.16xlarge + - i3.large - i3.metal - - i3en.large - - i3en.xlarge + - i3.xlarge + - i3en.12xlarge + - i3en.24xlarge - i3en.2xlarge - i3en.3xlarge - i3en.6xlarge - - i3en.12xlarge - - i3en.24xlarge - - inf1.xlarge + - i3en.large + - i3en.metal + - i3en.xlarge + - inf1.24xlarge - inf1.2xlarge - inf1.6xlarge - - inf1.24xlarge - - m1.small - - m1.medium + - inf1.xlarge - m1.large + - m1.medium + - m1.small - m1.xlarge - - m2.xlarge - m2.2xlarge - m2.4xlarge - - m3.medium + - m2.xlarge + - m3.2xlarge - m3.large + - m3.medium - m3.xlarge - - m3.2xlarge - - m4.large - - m4.xlarge - - m4.2xlarge - - m4.4xlarge - m4.10xlarge - m4.16xlarge - - m5.large - - m5.xlarge - - m5.2xlarge - - m5.4xlarge - - m5.8xlarge + - m4.2xlarge + - m4.4xlarge + - m4.large + - m4.xlarge - m5.12xlarge - m5.16xlarge - m5.24xlarge + - m5.2xlarge + - m5.4xlarge + - m5.8xlarge + - m5.large - m5.metal - - m5a.large - - m5a.xlarge - - m5a.2xlarge - - m5a.4xlarge - - m5a.8xlarge + - m5.xlarge - m5a.12xlarge - m5a.16xlarge - m5a.24xlarge - - m5ad.large - - m5ad.xlarge - - m5ad.2xlarge - - m5ad.4xlarge + - m5a.2xlarge + - m5a.4xlarge + - m5a.8xlarge + - m5a.large + - m5a.xlarge - m5ad.12xlarge + - m5ad.16xlarge - m5ad.24xlarge - - m5d.large - - m5d.xlarge - - m5d.2xlarge - - m5d.4xlarge - - m5d.8xlarge + - m5ad.2xlarge + - m5ad.4xlarge + - m5ad.8xlarge + - m5ad.large + - m5ad.xlarge - m5d.12xlarge - m5d.16xlarge - m5d.24xlarge + - m5d.2xlarge + - m5d.4xlarge + - m5d.8xlarge + - m5d.large - m5d.metal - - m5dn.large - - m5dn.xlarge - - m5dn.2xlarge - - m5dn.4xlarge - - m5dn.8xlarge + - m5d.xlarge - m5dn.12xlarge - m5dn.16xlarge - m5dn.24xlarge - - m5n.large - - m5n.xlarge - - m5n.2xlarge - - m5n.4xlarge - - m5n.8xlarge + - m5dn.2xlarge + - m5dn.4xlarge + - m5dn.8xlarge + - m5dn.large + - m5dn.xlarge - m5n.12xlarge - m5n.16xlarge - m5n.24xlarge - - m6g.medium - - m6g.large - - m6g.xlarge + - m5n.2xlarge + - m5n.4xlarge + - m5n.8xlarge + - m5n.large + - m5n.xlarge + - m6g.12xlarge + - m6g.16xlarge - m6g.2xlarge - m6g.4xlarge - m6g.8xlarge - - m6g.12xlarge - - m6g.16xlarge + - m6g.large + - m6g.medium - m6g.metal - - m6gd.medium - - m6gd.large - - m6gd.xlarge + - m6g.xlarge + - m6gd.12xlarge + - m6gd.16xlarge - m6gd.2xlarge - m6gd.4xlarge - m6gd.8xlarge - - m6gd.12xlarge - - m6gd.16xlarge + - m6gd.large + - m6gd.medium - m6gd.metal - - p2.xlarge - - p2.8xlarge + - m6gd.xlarge - p2.16xlarge + - p2.8xlarge + - p2.xlarge + - p3.16xlarge - p3.2xlarge - p3.8xlarge - - p3.16xlarge - p3dn.24xlarge - - g4dn.xlarge - - g4dn.2xlarge - - g4dn.4xlarge - - g4dn.8xlarge - - g4dn.12xlarge - - g4dn.16xlarge - - g4dn.metal - - r3.large - - r3.xlarge + - p4d.24xlarge - r3.2xlarge - r3.4xlarge - r3.8xlarge - - r4.large - - r4.xlarge + - r3.large + - r3.xlarge + - r4.16xlarge - r4.2xlarge - r4.4xlarge - r4.8xlarge - - r4.16xlarge - - r5.large - - r5.xlarge - - r5.2xlarge - - r5.4xlarge - - r5.8xlarge + - r4.large + - r4.xlarge - r5.12xlarge - r5.16xlarge - r5.24xlarge + - r5.2xlarge + - r5.4xlarge + - r5.8xlarge + - r5.large - r5.metal - - r5a.large - - r5a.xlarge - - r5a.2xlarge - - r5a.4xlarge - - r5a.8xlarge + - r5.xlarge - r5a.12xlarge - r5a.16xlarge - r5a.24xlarge - - r5ad.large - - r5ad.xlarge - - r5ad.2xlarge - - r5ad.4xlarge + - r5a.2xlarge + - r5a.4xlarge + - r5a.8xlarge + - r5a.large + - r5a.xlarge - r5ad.12xlarge + - r5ad.16xlarge - r5ad.24xlarge - - r5d.large - - r5d.xlarge - - r5d.2xlarge - - r5d.4xlarge - - r5d.8xlarge + - r5ad.2xlarge + - r5ad.4xlarge + - r5ad.8xlarge + - r5ad.large + - r5ad.xlarge - r5d.12xlarge - r5d.16xlarge - r5d.24xlarge + - r5d.2xlarge + - r5d.4xlarge + - r5d.8xlarge + - r5d.large - r5d.metal - - r5dn.large - - r5dn.xlarge - - r5dn.2xlarge - - r5dn.4xlarge - - r5dn.8xlarge + - r5d.xlarge - r5dn.12xlarge - r5dn.16xlarge - r5dn.24xlarge - - r5n.large - - r5n.xlarge - - r5n.2xlarge - - r5n.4xlarge - - r5n.8xlarge + - r5dn.2xlarge + - r5dn.4xlarge + - r5dn.8xlarge + - r5dn.large + - r5dn.xlarge - r5n.12xlarge - r5n.16xlarge - r5n.24xlarge - - r6g.medium - - r6g.large - - r6g.xlarge + - r5n.2xlarge + - r5n.4xlarge + - r5n.8xlarge + - r5n.large + - r5n.xlarge + - r6g.12xlarge + - r6g.16xlarge - r6g.2xlarge - r6g.4xlarge - r6g.8xlarge - - r6g.12xlarge - - r6g.16xlarge + - r6g.large + - r6g.medium - r6g.metal - - r6gd.medium - - r6gd.large - - r6gd.xlarge + - r6g.xlarge + - r6gd.12xlarge + - r6gd.16xlarge - r6gd.2xlarge - r6gd.4xlarge - r6gd.8xlarge - - r6gd.12xlarge - - r6gd.16xlarge + - r6gd.large + - r6gd.medium - r6gd.metal + - r6gd.xlarge - t1.micro - - t2.nano + - t2.2xlarge + - t2.large + - t2.medium - t2.micro + - t2.nano - t2.small - - t2.medium - - t2.large - t2.xlarge - - t2.2xlarge - - t3.nano + - t3.2xlarge + - t3.large + - t3.medium - t3.micro + - t3.nano - t3.small - - t3.medium - - t3.large - t3.xlarge - - t3.2xlarge - - t3a.nano + - t3a.2xlarge + - t3a.large + - t3a.medium - t3a.micro + - t3a.nano - t3a.small - - t3a.medium - - t3a.large - t3a.xlarge - - t3a.2xlarge + - t4g.2xlarge + - t4g.large + - t4g.medium + - t4g.micro + - t4g.nano + - t4g.small + - t4g.xlarge + - u-12tb1.metal + - u-18tb1.metal + - u-24tb1.metal - u-6tb1.metal - u-9tb1.metal - - u-12tb1.metal - x1.16xlarge - x1.32xlarge - - x1e.xlarge + - x1e.16xlarge - x1e.2xlarge + - x1e.32xlarge - x1e.4xlarge - x1e.8xlarge - - x1e.16xlarge - - x1e.32xlarge - - z1d.large - - z1d.xlarge + - x1e.xlarge + - z1d.12xlarge - z1d.2xlarge - z1d.3xlarge - z1d.6xlarge - - z1d.12xlarge + - z1d.large - z1d.metal + - z1d.xlarge ConstraintDescription: Must be a valid EC2 instance type Description: EC2 instance type for the node instances From a4a140213756b2365a4365641fcff7ba367d05b7 Mon Sep 17 00:00:00 2001 From: rfox-godaddy <59625844+rfox-godaddy@users.noreply.github.com> Date: Mon, 2 Nov 2020 13:44:02 -0700 Subject: [PATCH 091/621] Update kubelet.service (#526) * Update kubelet.service If kubelet wins the systemd startup race against iptables, then the `ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5` may be clobbered by existing rules. * Correction that the requested change is that this runs after iptables-restore --- files/kubelet.service | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/kubelet.service b/files/kubelet.service index 358b659ae..63f4574a2 100644 --- a/files/kubelet.service +++ b/files/kubelet.service @@ -1,7 +1,7 @@ [Unit] Description=Kubernetes Kubelet Documentation=https://github.com/kubernetes/kubernetes -After=docker.service +After=docker.service iptables-restore.service Requires=docker.service [Service] From 0a96824d7b60d0930c846f5d6841d1c10ff411d2 Mon Sep 17 00:00:00 2001 From: sehayoun <70663169+sehayoun@users.noreply.github.com> Date: Mon, 9 Nov 2020 14:18:43 -0800 Subject: [PATCH 092/621] Update eni-max-pods value for p4d.24xlarge (#558) --- files/eni-max-pods.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 4c44fce90..1c497974f 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -242,7 +242,7 @@ p3.16xlarge 234 p3.2xlarge 58 p3.8xlarge 234 p3dn.24xlarge 737 -p4d.24xlarge 2942 +p4d.24xlarge 737 r3.2xlarge 58 r3.4xlarge 234 r3.8xlarge 234 From 4d5df6742357337533ce83297685e54cc2dd9eb5 Mon Sep 17 00:00:00 2001 From: sehayoun <70663169+sehayoun@users.noreply.github.com> Date: Tue, 17 Nov 2020 09:37:12 -0800 Subject: [PATCH 093/621] Update CHANGELOG.md --- CHANGELOG.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f5aa887b..eed74a73a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,34 @@ # Changelog +### AMI Release v20201112 +* amazon-eks-gpu-node-1.18-v20201112 +* amazon-eks-gpu-node-1.17-v20201112 +* amazon-eks-gpu-node-1.16-v20201112 +* amazon-eks-gpu-node-1.15-v20201112 +* amazon-eks-arm64-node-1.18-v20201112 +* amazon-eks-arm64-node-1.17-v20201112 +* amazon-eks-arm64-node-1.16-v20201112 +* amazon-eks-arm64-node-1.15-v20201112 +* amazon-eks-node-1.18-v20201112 +* amazon-eks-node-1.17-v20201112 +* amazon-eks-node-1.16-v20201112 +* amazon-eks-node-1.15-v20201112 + +Binaries used to build these AMIs are published : +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes : +* Bug fix [#526](https://github.com/awslabs/amazon-eks-ami/pull/526) +* GPU AMIs - Nvidia driver version update to 450.51.06, cuda version update to 11.0 +* Updated kernel version to 4.14.203 and fix for [soft lockup issue](https://github.com/awslabs/amazon-eks-ami/issues/454) + + +Note: Previous release information can be found from [release note](https://github.com/awslabs/amazon-eks-ami/releases) + + ### AMI Release v20190927 * amazon-eks-node-1.14-v20190927 * amazon-eks-gpu-node-1.14-v20190927 From a6313d533cd6a97f77e1af7c3979c78e51338ebc Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 17 Nov 2020 14:49:00 -0800 Subject: [PATCH 094/621] Downgrades containerd to containerd-1.3.2-1.amzn2 to fix issue #563 (#564) --- scripts/install-worker.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 826b28d98..46f39c58a 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -121,6 +121,12 @@ if [[ "$INSTALL_DOCKER" == "true" ]]; then sudo mv $TEMPLATE_DIR/docker-daemon.json /etc/docker/daemon.json sudo chown root:root /etc/docker/daemon.json + # https://github.com/awslabs/amazon-eks-ami/issues/563 + # Due to an issue with the latest containerd, customers are seeing + # pods getting stuck in terminating, so we need to downgrade containerd + # until the issue is fixed upstream or we have another workaround. + sudo yum downgrade -y containerd-1.3.2-1.amzn2 + # Enable docker daemon to start on boot. sudo systemctl daemon-reload sudo systemctl enable docker From 547c941ed49f546d86c9fd2a391d1f5c26bd4a3e Mon Sep 17 00:00:00 2001 From: Vishal Gupta Date: Wed, 18 Nov 2020 10:50:48 -0800 Subject: [PATCH 095/621] Updated changelog for 20201117 release (#566) Co-authored-by: Vishal Gupta --- CHANGELOG.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index eed74a73a..92f4b2111 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,32 @@ # Changelog +### AMI Release v20201117 +* amazon-eks-gpu-node-1.18-v20201117 +* amazon-eks-gpu-node-1.17-v20201117 +* amazon-eks-gpu-node-1.16-v20201117 +* amazon-eks-gpu-node-1.15-v20201117 +* amazon-eks-arm64-node-1.18-v20201117 +* amazon-eks-arm64-node-1.17-v20201117 +* amazon-eks-arm64-node-1.16-v20201117 +* amazon-eks-arm64-node-1.15-v20201117 +* amazon-eks-node-1.18-v20201117 +* amazon-eks-node-1.17-v20201117 +* amazon-eks-node-1.16-v20201117 +* amazon-eks-node-1.15-v20201117 + +Binaries used to build these AMIs are published : +* s3://amazon-eks/1.18.9/2020-11-07/ +* s3://amazon-eks/1.17.12/2020-11-07/ +* s3://amazon-eks/1.16.15/2020-11-07/ +* s3://amazon-eks/1.15.12/2020-11-07/ + +Notable changes : +* Bug fix [#526](https://github.com/awslabs/amazon-eks-ami/pull/526) +* GPU AMIs - Nvidia driver version update to 450.51.06, cuda version update to 11.0 +* Updated kernel version to 4.14.203 and fix for soft lockup issue +* Downgraded containerd version to 1.3.2 to fix pods getting stuck in the Terminating state + + ### AMI Release v20201112 * amazon-eks-gpu-node-1.18-v20201112 * amazon-eks-gpu-node-1.17-v20201112 From 08801ff74183bf3f99f9442db3c91b1b16465f45 Mon Sep 17 00:00:00 2001 From: sehayoun <70663169+sehayoun@users.noreply.github.com> Date: Thu, 19 Nov 2020 12:30:24 -0800 Subject: [PATCH 096/621] Updated changedlog with correct s3 bucket name (#570) --- CHANGELOG.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 92f4b2111..128ba9155 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,10 +15,10 @@ * amazon-eks-node-1.15-v20201117 Binaries used to build these AMIs are published : -* s3://amazon-eks/1.18.9/2020-11-07/ -* s3://amazon-eks/1.17.12/2020-11-07/ -* s3://amazon-eks/1.16.15/2020-11-07/ -* s3://amazon-eks/1.15.12/2020-11-07/ +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ Notable changes : * Bug fix [#526](https://github.com/awslabs/amazon-eks-ami/pull/526) From 695d0a36b9928a446e1327bd2eb191afdc696f58 Mon Sep 17 00:00:00 2001 From: sehayoun <70663169+sehayoun@users.noreply.github.com> Date: Thu, 19 Nov 2020 13:00:12 -0800 Subject: [PATCH 097/621] Update Makefile with the latest k8s version and binary buckets (#571) --- Makefile | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 70a587c43..2f345a695 100644 --- a/Makefile +++ b/Makefile @@ -42,22 +42,19 @@ k8s: validate # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html -.PHONY: 1.14 -1.14: - $(MAKE) k8s kubernetes_version=1.14.9 kubernetes_build_date=2020-09-18 pull_cni_from_github=true .PHONY: 1.15 1.15: - $(MAKE) k8s kubernetes_version=1.15.11 kubernetes_build_date=2020-09-18 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.15.12 kubernetes_build_date=2020-11-02 pull_cni_from_github=true .PHONY: 1.16 1.16: - $(MAKE) k8s kubernetes_version=1.16.13 kubernetes_build_date=2020-09-18 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.16.15 kubernetes_build_date=2020-11-02 pull_cni_from_github=true .PHONY: 1.17 1.17: - $(MAKE) k8s kubernetes_version=1.17.11 kubernetes_build_date=2020-09-18 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.17.12 kubernetes_build_date=2020-11-02 pull_cni_from_github=true .PHONY: 1.18 1.18: - $(MAKE) k8s kubernetes_version=1.18.8 kubernetes_build_date=2020-09-18 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.18.9 kubernetes_build_date=2020-11-02 pull_cni_from_github=true From 028eaadbd06ced1df4dc9f245c64fb8ba6b60d27 Mon Sep 17 00:00:00 2001 From: Visuna Date: Mon, 30 Nov 2020 17:04:13 -0800 Subject: [PATCH 098/621] update change log with AMI Release v20201126 --- CHANGELOG.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 128ba9155..5c47e2987 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,30 @@ # Changelog +### AMI Release v20201126 +* amazon-eks-gpu-node-1.18-v20201126 +* amazon-eks-gpu-node-1.17-v20201126 +* amazon-eks-gpu-node-1.16-v20201126 +* amazon-eks-gpu-node-1.15-v20201126 +* amazon-eks-arm64-node-1.18-v20201126 +* amazon-eks-arm64-node-1.17-v20201126 +* amazon-eks-arm64-node-1.16-v20201126 +* amazon-eks-arm64-node-1.15-v20201126 +* amazon-eks-node-1.18-v20201126 +* amazon-eks-node-1.17-v20201126 +* amazon-eks-node-1.16-v20201126 +* amazon-eks-node-1.15-v20201126 + +Binaries used to build these AMIs are published : +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes : + +* Containerd patch for CVE-2020-15257 + + ### AMI Release v20201117 * amazon-eks-gpu-node-1.18-v20201117 * amazon-eks-gpu-node-1.17-v20201117 From 993b41af4bd8b47443889f7e535b72192f210001 Mon Sep 17 00:00:00 2001 From: Raghav Tripathi <31378836+rtripat@users.noreply.github.com> Date: Mon, 30 Nov 2020 17:13:09 -0800 Subject: [PATCH 099/621] Adding containerd version for v20201126 (#574) --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c47e2987..41f97fcc2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,7 @@ Binaries used to build these AMIs are published : Notable changes : -* Containerd patch for CVE-2020-15257 +* Containerd patch for CVE-2020-15257 (containerd-1.4.1-2) ### AMI Release v20201117 From 2db3aa781aaebbc65355c711b249b51d356ed91c Mon Sep 17 00:00:00 2001 From: Paul Forman Date: Mon, 9 Nov 2020 13:27:01 -0700 Subject: [PATCH 100/621] Handle a missing instance_type better in bootstrap In a strange case that appears to have been a brief metadata server issue, the "MAX_PODS" selection in bootstrap caused the nodes to fail to join the cluster. The log messages were not very helpful, since it triggered a bash math function error on a string. The specific failure appears to have been returning *too much* data, after searching for an empty string matched every line in the eni-max-pods.txt file. In the rare case that an instance_type isn't set, search for a known "unset" value to get predictable behavior. --- files/bootstrap.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 4b3dc26c8..cd56aced4 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -291,10 +291,10 @@ INSTANCE_TYPE=$(curl -H "X-aws-ec2-metadata-token: $TOKEN" -s http://169.254.169 #calculate the max number of pods per instance type MAX_PODS_FILE="/etc/eks/eni-max-pods.txt" set +o pipefail -MAX_PODS=$(cat $MAX_PODS_FILE | awk "/^$INSTANCE_TYPE/"' { print $2 }') +MAX_PODS=$(cat $MAX_PODS_FILE | awk "/^${INSTANCE_TYPE:-unset}/"' { print $2 }') set -o pipefail -if [ -z "$MAX_PODS" ]; then - echo 'No entry for $INSTANCE_TYPE in $MAX_PODS_FILE' +if [ -z "$MAX_PODS" ] || [ -z "$INSTANCE_TYPE" ]; then + echo "No entry for type '$INSTANCE_TYPE' in $MAX_PODS_FILE" exit 1 fi From 1b6e0a9498c064d39e6050ace7e3be11740e9c6f Mon Sep 17 00:00:00 2001 From: Penugonda Date: Fri, 28 Aug 2020 16:47:07 -0400 Subject: [PATCH 101/621] error handling to collect instance id --- log-collector-script/linux/eks-log-collector.sh | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 32f1dccde..2d5a45fcd 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -28,7 +28,6 @@ readonly LOG_DIR="/var/log" readonly COLLECT_DIR="/tmp/eks-log-collector" readonly CURRENT_TIME=$(date --utc +%Y-%m-%d_%H%M-%Z) readonly DAYS_10=$(date -d "-10 days" '+%Y-%m-%d %H:%M') -readonly TOKEN=$(curl -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 600" "http://169.254.169.254/latest/api/token") INSTANCE_ID="" INIT_TYPE="" PACKAGE_TYPE="" @@ -189,9 +188,16 @@ create_directories() { done } -get_instance_metadata() { - readonly INSTANCE_ID=$(curl --max-time 3 -H "X-aws-ec2-metadata-token: $TOKEN" --silent http://169.254.169.254/latest/meta-data/instance-id 2>/dev/null) - echo "${INSTANCE_ID}" > "${COLLECT_DIR}"/system/instance-id.txt +get_instance_id() { + + INSTANCE_ID_FILE="/var/lib/cloud/data/instance-id" + + if grep -q '^i-' "$INSTANCE_ID_FILE"; then + cp ${INSTANCE_ID_FILE} "${COLLECT_DIR}"/system/instance-id.txt + else + warning "Unable to find EC2 Instance Id. Skipped Instance Id." + fi + } is_diskfull() { @@ -231,7 +237,7 @@ init() { collect() { init is_diskfull - get_instance_metadata + get_instance_id get_common_logs get_kernel_info get_mounts_info From 0ab2ee883d3c5493408c49ca22239d813095e770 Mon Sep 17 00:00:00 2001 From: Penugonda Date: Tue, 24 Nov 2020 10:32:14 -0500 Subject: [PATCH 102/621] set instance id based of file when metadata is not available --- log-collector-script/linux/eks-log-collector.sh | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 2d5a45fcd..932c98957 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -192,12 +192,18 @@ get_instance_id() { INSTANCE_ID_FILE="/var/lib/cloud/data/instance-id" - if grep -q '^i-' "$INSTANCE_ID_FILE"; then - cp ${INSTANCE_ID_FILE} "${COLLECT_DIR}"/system/instance-id.txt + timeout 75 readonly INSTANCE_ID=$(curl http://169.254.169.254/latest/meta-data/instance-id) + + if [ 0 -eq $? ]; then # Check if previous command was successful. + echo "${INSTANCE_ID}" > "${COLLECT_DIR}"/system/instance-id.txt else - warning "Unable to find EC2 Instance Id. Skipped Instance Id." + if grep -q '^i-' "$INSTANCE_ID_FILE"; then + cp ${INSTANCE_ID_FILE} "${COLLECT_DIR}"/system/instance-id.txt + readonly INSTANCE_ID=$(cat "${COLLECT_DIR}"/system/instance-id.txt) + else + warning "Unable to find EC2 Instance Id. Skipped Instance Id." + fi fi - } is_diskfull() { From 2d6cb253c9621cd1e5206827bc4409b3cb9c88b5 Mon Sep 17 00:00:00 2001 From: Penugonda Date: Thu, 26 Nov 2020 16:48:13 -0500 Subject: [PATCH 103/621] added timeout and retry --- log-collector-script/linux/eks-log-collector.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 932c98957..fc83e2f71 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -192,7 +192,7 @@ get_instance_id() { INSTANCE_ID_FILE="/var/lib/cloud/data/instance-id" - timeout 75 readonly INSTANCE_ID=$(curl http://169.254.169.254/latest/meta-data/instance-id) + readonly INSTANCE_ID=$(curl --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/instance-id) if [ 0 -eq $? ]; then # Check if previous command was successful. echo "${INSTANCE_ID}" > "${COLLECT_DIR}"/system/instance-id.txt From 13c8f54bacf6c349337bc0eef1fc6526760a892a Mon Sep 17 00:00:00 2001 From: Penugonda Date: Tue, 1 Dec 2020 16:42:32 -0500 Subject: [PATCH 104/621] get instance id from file and fall back to ec2 metadata endpoint --- log-collector-script/linux/eks-log-collector.sh | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index fc83e2f71..f69756126 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -189,17 +189,15 @@ create_directories() { } get_instance_id() { - INSTANCE_ID_FILE="/var/lib/cloud/data/instance-id" - - readonly INSTANCE_ID=$(curl --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/instance-id) - - if [ 0 -eq $? ]; then # Check if previous command was successful. - echo "${INSTANCE_ID}" > "${COLLECT_DIR}"/system/instance-id.txt + + if grep -q '^i-' "$INSTANCE_ID_FILE"; then + cp ${INSTANCE_ID_FILE} "${COLLECT_DIR}"/system/instance-id.txt + readonly INSTANCE_ID=$(cat "${COLLECT_DIR}"/system/instance-id.txt) else - if grep -q '^i-' "$INSTANCE_ID_FILE"; then - cp ${INSTANCE_ID_FILE} "${COLLECT_DIR}"/system/instance-id.txt - readonly INSTANCE_ID=$(cat "${COLLECT_DIR}"/system/instance-id.txt) + readonly INSTANCE_ID=$(curl --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/instance-id) + if [ 0 -eq $? ]; then # Check if previous command was successful. + echo "${INSTANCE_ID}" > "${COLLECT_DIR}"/system/instance-id.txt else warning "Unable to find EC2 Instance Id. Skipped Instance Id." fi From a63f05e48dc347c2d9309c88de6499dc9346ada1 Mon Sep 17 00:00:00 2001 From: mtilson Date: Wed, 2 Dec 2020 01:39:00 +0300 Subject: [PATCH 105/621] Improve function documentation (#562) Passed argument to 'get_memory_mebibytes_to_reserve()' is not instance type anymore after commit e988f28 --- files/bootstrap.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index cd56aced4..f6069b671 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -161,7 +161,7 @@ get_resource_to_reserve_in_range() { # density so we are calculating the amount of memory to reserve for Kubernetes systems daemons by # considering the maximum number of pods this instance type supports. # Args: -# $1 the instance type of the worker node +# $1 the max number of pods per instance type (MAX_PODS) based on values from /etc/eks/eni-max-pods.txt # Return: # memory to reserve in Mi for the kubelet get_memory_mebibytes_to_reserve() { From 70dfa1b0de63be1c989118ddd13c172d9883ed65 Mon Sep 17 00:00:00 2001 From: Tommy Nguyen Date: Fri, 4 Dec 2020 00:50:53 +0800 Subject: [PATCH 106/621] Remove 1.14 version from makefile (#576) Signed-off-by: Tommy Nguyen Co-authored-by: Tommy Nguyen --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2f345a695..b42f03ac8 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.14 1.15 1.16 1.17 +all: 1.15 1.16 1.17 .PHONY: validate validate: From 90bde2a7af0ab0dde888217692301294536b3409 Mon Sep 17 00:00:00 2001 From: Bronson Mirafuentes Date: Thu, 3 Dec 2020 10:39:42 -0800 Subject: [PATCH 107/621] Adding containerd_version as a packer variable (#575) * Adding containerd_version as a packer variable * bump containerd_version to 1.4.1-1 * bump containerd_version to 1.4.1-2 * remove obsolete comments about downgrading containerd version --- Makefile | 4 ++-- eks-worker-al2.json | 45 ++++++++++++++++++++------------------- scripts/install-worker.sh | 7 ++---- 3 files changed, 27 insertions(+), 29 deletions(-) diff --git a/Makefile b/Makefile index b42f03ac8..8520b36c2 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ PACKER_BINARY ?= packer -PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date docker_version cni_plugin_version source_ami_id source_ami_owners arch instance_type security_group_id additional_yum_repos pull_cni_from_github +PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date docker_version containerd_verion cni_plugin_version source_ami_id source_ami_owners arch instance_type security_group_id additional_yum_repos pull_cni_from_github K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) @@ -29,7 +29,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.15 1.16 1.17 +all: 1.15 1.16 1.17 1.18 .PHONY: validate validate: diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 3fb879101..eff63e08e 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -5,26 +5,23 @@ "creator": "{{env `USER`}}", "encrypted": "false", "kms_key_id": "", - "aws_access_key_id": "{{env `AWS_ACCESS_KEY_ID`}}", "aws_secret_access_key": "{{env `AWS_SECRET_ACCESS_KEY`}}", "aws_session_token": "{{env `AWS_SESSION_TOKEN`}}", - "binary_bucket_name": "amazon-eks", "binary_bucket_region": "us-west-2", "kubernetes_version": null, "kubernetes_build_date": null, "docker_version": "19.03.6ce-4.amzn2", + "containerd_version": "1.4.1-2.amzn2", "cni_plugin_version": "v0.8.6", "pull_cni_from_github": "true", - "source_ami_id": "", "source_ami_owners": "137112412989", "source_ami_filter_name": "amzn2-ami-minimal-hvm-*", "arch": null, "instance_type": null, "ami_description": "EKS Kubernetes Worker AMI with AmazonLinux2 image", - "ssh_interface": "", "ssh_username": "ec2-user", "temporary_security_group_source_cidrs": "", @@ -36,7 +33,6 @@ "ami_users": "", "additional_yum_repos": "" }, - "builders": [ { "type": "amazon-ebs", @@ -52,7 +48,9 @@ "state": "available", "virtualization-type": "hvm" }, - "owners": [ "{{user `source_ami_owners`}}" ], + "owners": [ + "{{user `source_ami_owners`}}" + ], "most_recent": true }, "instance_type": "{{user `instance_type`}}", @@ -81,30 +79,30 @@ "encrypt_boot": "{{user `encrypted`}}", "kms_key_id": "{{user `kms_key_id`}}", "run_tags": { - "creator": "{{user `creator`}}" + "creator": "{{user `creator`}}" }, "subnet_id": "{{user `subnet_id`}}", "tags": { - "Name": "{{user `ami_name`}}", - "created": "{{timestamp}}", - "docker_version": "{{ user `docker_version`}}", - "source_ami_id": "{{ user `source_ami_id`}}", - "kubernetes": "{{ user `kubernetes_version`}}/{{ user `kubernetes_build_date` }}/bin/linux/{{ user `arch` }}", - "cni_plugin_version": "{{ user `cni_plugin_version`}}" + "Name": "{{user `ami_name`}}", + "created": "{{timestamp}}", + "docker_version": "{{ user `docker_version`}}", + "containerd_version": "{{ user `containerd_version`}}", + "source_ami_id": "{{ user `source_ami_id`}}", + "kubernetes": "{{ user `kubernetes_version`}}/{{ user `kubernetes_build_date` }}/bin/linux/{{ user `arch` }}", + "cni_plugin_version": "{{ user `cni_plugin_version`}}" }, "ami_name": "{{user `ami_name`}}", - "ami_description": "{{ user `ami_description` }}, (k8s: {{ user `kubernetes_version`}}, docker:{{ user `docker_version`}})" + "ami_description": "{{ user `ami_description` }}, (k8s: {{ user `kubernetes_version` }}, docker: {{ user `docker_version` }}, containerd: {{ user `containerd_version` }})" } ], - "provisioners": [ { - "type": "shell", - "remote_folder": "{{ user `remote_folder`}}", - "script": "{{template_dir}}/scripts/install_additional_repos.sh", - "environment_vars": [ - "ADDITIONAL_YUM_REPOS={{user `additional_yum_repos`}}" - ] + "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", + "script": "{{template_dir}}/scripts/install_additional_repos.sh", + "environment_vars": [ + "ADDITIONAL_YUM_REPOS={{user `additional_yum_repos`}}" + ] }, { "type": "shell", @@ -116,7 +114,9 @@ "type": "shell", "pause_before": "90s", "remote_folder": "{{ user `remote_folder`}}", - "inline": ["mkdir -p /tmp/worker/"] + "inline": [ + "mkdir -p /tmp/worker/" + ] }, { "type": "file", @@ -133,6 +133,7 @@ "BINARY_BUCKET_NAME={{user `binary_bucket_name`}}", "BINARY_BUCKET_REGION={{user `binary_bucket_region`}}", "DOCKER_VERSION={{user `docker_version`}}", + "CONTAINERD_VERSION={{user `containerd_version`}}", "CNI_PLUGIN_VERSION={{user `cni_plugin_version`}}", "PULL_CNI_FROM_GITHUB={{user `pull_cni_from_github`}}", "AWS_ACCESS_KEY_ID={{user `aws_access_key_id`}}", diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 46f39c58a..917d6167f 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -24,6 +24,7 @@ validate_env_set() { validate_env_set BINARY_BUCKET_NAME validate_env_set BINARY_BUCKET_REGION validate_env_set DOCKER_VERSION +validate_env_set CONTAINERD_VERSION validate_env_set CNI_PLUGIN_VERSION validate_env_set KUBERNETES_VERSION validate_env_set KUBERNETES_BUILD_DATE @@ -121,11 +122,7 @@ if [[ "$INSTALL_DOCKER" == "true" ]]; then sudo mv $TEMPLATE_DIR/docker-daemon.json /etc/docker/daemon.json sudo chown root:root /etc/docker/daemon.json - # https://github.com/awslabs/amazon-eks-ami/issues/563 - # Due to an issue with the latest containerd, customers are seeing - # pods getting stuck in terminating, so we need to downgrade containerd - # until the issue is fixed upstream or we have another workaround. - sudo yum downgrade -y containerd-1.3.2-1.amzn2 + sudo yum downgrade -y containerd-${CONTAINERD_VERSION} # Enable docker daemon to start on boot. sudo systemctl daemon-reload From 5c4d01f121ff4eba9098c333507dc1443867f870 Mon Sep 17 00:00:00 2001 From: mtilson Date: Tue, 8 Dec 2020 00:15:56 +0300 Subject: [PATCH 108/621] Missing sha256 check sum for 'else' path (#561) When using 'wget' to fetch cni binaries from s3, checksum file is downloaded but not used --- scripts/install-worker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 917d6167f..e72cb4731 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -195,12 +195,12 @@ else echo "AWS cli present - using it to copy binaries from s3." aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/${CNI_PLUGIN_FILENAME}.tgz . aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/${CNI_PLUGIN_FILENAME}.tgz.sha256 . - sudo sha256sum -c "${CNI_PLUGIN_FILENAME}.tgz.sha256" else echo "AWS cli missing - using wget to fetch cni binaries from s3. Note: This won't work for private bucket." sudo wget "$S3_URL_BASE/${CNI_PLUGIN_FILENAME}.tgz" sudo wget "$S3_URL_BASE/${CNI_PLUGIN_FILENAME}.tgz.sha256" fi + sudo sha256sum -c "${CNI_PLUGIN_FILENAME}.tgz.sha256" fi sudo tar -xvf "${CNI_PLUGIN_FILENAME}.tgz" -C /opt/cni/bin rm "${CNI_PLUGIN_FILENAME}.tgz" From 4faba3266d6b35e5ad050c8544ff99700f4842c3 Mon Sep 17 00:00:00 2001 From: cmdallas Date: Tue, 8 Dec 2020 11:20:54 -0800 Subject: [PATCH 109/621] (bootstrap): document pause container parameters (#556) --- files/bootstrap.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index f6069b671..2f444aaaf 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -24,6 +24,8 @@ function print_help { echo "--aws-api-retry-attempts Number of retry attempts for AWS API call (DescribeCluster) (default: 3)" echo "--docker-config-json The contents of the /etc/docker/daemon.json file. Useful if you want a custom config differing from the default one in the AMI" echo "--dns-cluster-ip Overrides the IP address to use for DNS queries within the cluster. Defaults to 10.100.0.10 or 172.20.0.10 based on the IP address of the primary interface" + echo "--pause-container-account The AWS account (number) to pull the pause container from" + echo "--pause-container-version The tag of the pause container" } POSITIONAL=() From 4e04217e96b70c763a886d28ba271f60f9765885 Mon Sep 17 00:00:00 2001 From: StevenSu Date: Wed, 9 Dec 2020 03:24:09 +0800 Subject: [PATCH 110/621] add SIGKILL to RestartForceExitStatus (#554) --- files/kubelet.service | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/kubelet.service b/files/kubelet.service index 63f4574a2..9e67cf667 100644 --- a/files/kubelet.service +++ b/files/kubelet.service @@ -13,7 +13,7 @@ ExecStart=/usr/bin/kubelet --cloud-provider aws \ --network-plugin cni $KUBELET_ARGS $KUBELET_EXTRA_ARGS Restart=on-failure -RestartForceExitStatus=SIGPIPE +RestartForceExitStatus=SIGPIPE SIGKILL RestartSec=5 KillMode=process From 7458152e02386525fdf3a31123d2c16e4e943732 Mon Sep 17 00:00:00 2001 From: yeolahm <46589223+yeolahm@users.noreply.github.com> Date: Tue, 8 Dec 2020 22:11:50 +0200 Subject: [PATCH 111/621] Add iptables rule count (#547) Co-authored-by: yeolahm --- log-collector-script/linux/eks-log-collector.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index f69756126..66f73974e 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -299,10 +299,10 @@ get_selinux_info() { get_iptables_info() { try "collect iptables information" - iptables --wait 1 --numeric --verbose --list --table mangle > "${COLLECT_DIR}"/networking/iptables-mangle.txt - iptables --wait 1 --numeric --verbose --list --table filter > "${COLLECT_DIR}"/networking/iptables-filter.txt - iptables --wait 1 --numeric --verbose --list --table nat > "${COLLECT_DIR}"/networking/iptables-nat.txt - iptables --wait 1 --numeric --verbose --list > "${COLLECT_DIR}"/networking/iptables.txt + iptables --wait 1 --numeric --verbose --list --table mangle | tee "${COLLECT_DIR}"/networking/iptables-mangle.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables-mangle.txt + iptables --wait 1 --numeric --verbose --list --table filter | tee "${COLLECT_DIR}"/networking/iptables-filter.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables-filter.txt + iptables --wait 1 --numeric --verbose --list --table nat | tee "${COLLECT_DIR}"/networking/iptables-nat.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables-nat.txt + iptables --wait 1 --numeric --verbose --list | tee "${COLLECT_DIR}"/networking/iptables.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables.txt iptables-save > "${COLLECT_DIR}"/networking/iptables-save.txt ok From ab3245f5ac0f2d434d4401c8f312a0f79abb1ad8 Mon Sep 17 00:00:00 2001 From: Tom Dyas Date: Tue, 8 Dec 2020 13:10:13 -0800 Subject: [PATCH 112/621] ensure kubelet.service.d directory exists (#519) --- files/bootstrap.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 2f444aaaf..2694d74df 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -316,6 +316,8 @@ if [[ "$USE_MAX_PODS" = "true" ]]; then fi fi +mkdir -p /etc/systemd/system/kubelet.service.d + cat < /etc/systemd/system/kubelet.service.d/10-kubelet-args.conf [Service] Environment='KUBELET_ARGS=--node-ip=$INTERNAL_IP --pod-infra-container-image=$PAUSE_CONTAINER' From 967bb2fb9f8d1ea2a2c38280fa48b654b93ee66f Mon Sep 17 00:00:00 2001 From: sehayoun <70663169+sehayoun@users.noreply.github.com> Date: Thu, 10 Dec 2020 12:19:07 -0800 Subject: [PATCH 113/621] GPU Boost clock setup for performance improvement (#573) --- files/bootstrap.sh | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 2694d74df..c17bb816a 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -346,3 +346,31 @@ fi systemctl daemon-reload systemctl enable kubelet systemctl start kubelet + +# gpu boost clock +if command -v nvidia-smi &>/dev/null ; then + echo "nvidia-smi found" + + sudo nvidia-smi -pm 1 # set persistence mode + sudo nvidia-smi --auto-boost-default=0 + + GPUNAME=$(nvidia-smi -L | head -n1) + echo $GPUNAME + + # set application clock to maximum + if [[ $GPUNAME == *"A100"* ]]; then + nvidia-smi -ac 1215,1410 + elif [[ $GPUNAME == *"V100"* ]]; then + nvidia-smi -ac 877,1530 + elif [[ $GPUNAME == *"K80"* ]]; then + nvidia-smi -ac 2505,875 + elif [[ $GPUNAME == *"T4"* ]]; then + nvidia-smi -ac 5001,1590 + elif [[ $GPUNAME == *"M60"* ]]; then + nvidia-smi -ac 2505,1177 + else + echo "unsupported gpu" + fi +else + echo "nvidia-smi not found" +fi From facf0ec8761e1b697a2bb2f3e4791d611c86535a Mon Sep 17 00:00:00 2001 From: Chotiwat Chawannakul Date: Mon, 14 Dec 2020 09:27:24 -0800 Subject: [PATCH 114/621] Update systemd to always restart kubelet to support dynamic kubelet configuration (#578) * Add exit code 0 to RestartForceExitStatus * Use Restart=always for kubelet.service --- files/kubelet.service | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/files/kubelet.service b/files/kubelet.service index 9e67cf667..387470da1 100644 --- a/files/kubelet.service +++ b/files/kubelet.service @@ -12,8 +12,7 @@ ExecStart=/usr/bin/kubelet --cloud-provider aws \ --container-runtime docker \ --network-plugin cni $KUBELET_ARGS $KUBELET_EXTRA_ARGS -Restart=on-failure -RestartForceExitStatus=SIGPIPE SIGKILL +Restart=always RestartSec=5 KillMode=process From bfbe42d504273f917d9bab4f388b78643531800d Mon Sep 17 00:00:00 2001 From: Gabriel Mendes Date: Mon, 14 Dec 2020 18:53:03 -0300 Subject: [PATCH 115/621] Add missing instance types (#580) * Re-generate eni-max-pods from EC2 API * Add missing instance types --- amazon-eks-nodegroup.yaml | 30 ++++++++++++++++++++++++++++++ files/eni-max-pods.txt | 32 +++++++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/amazon-eks-nodegroup.yaml b/amazon-eks-nodegroup.yaml index aa4e39ee0..1cfb2af38 100644 --- a/amazon-eks-nodegroup.yaml +++ b/amazon-eks-nodegroup.yaml @@ -173,6 +173,16 @@ Parameters: - d2.4xlarge - d2.8xlarge - d2.xlarge + - d3.2xlarge + - d3.4xlarge + - d3.8xlarge + - d3.xlarge + - d3en.12xlarge + - d3en.2xlarge + - d3en.4xlarge + - d3en.6xlarge + - d3en.8xlarge + - d3en.xlarge - f1.16xlarge - f1.2xlarge - f1.4xlarge @@ -182,6 +192,9 @@ Parameters: - g3.4xlarge - g3.8xlarge - g3s.xlarge + - g4ad.16xlarge + - g4ad.4xlarge + - g4ad.8xlarge - g4dn.12xlarge - g4dn.16xlarge - g4dn.2xlarge @@ -284,6 +297,13 @@ Parameters: - m5n.8xlarge - m5n.large - m5n.xlarge + - m5zn.12xlarge + - m5zn.2xlarge + - m5zn.3xlarge + - m5zn.6xlarge + - m5zn.large + - m5zn.metal + - m5zn.xlarge - m6g.12xlarge - m6g.16xlarge - m6g.2xlarge @@ -302,6 +322,7 @@ Parameters: - m6gd.medium - m6gd.metal - m6gd.xlarge + - mac1.metal - p2.16xlarge - p2.8xlarge - p2.xlarge @@ -346,6 +367,15 @@ Parameters: - r5ad.8xlarge - r5ad.large - r5ad.xlarge + - r5b.12xlarge + - r5b.16xlarge + - r5b.24xlarge + - r5b.2xlarge + - r5b.4xlarge + - r5b.8xlarge + - r5b.large + - r5b.metal + - r5b.xlarge - r5d.12xlarge - r5d.16xlarge - r5d.24xlarge diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 1c497974f..a6eb620fa 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2020-09-30T15:27:32-07:00 +# This file was generated at 2020-12-12T18:33:04-03:00 # # Mapping is calculated from AWS EC2 API using the following formula: # * First IP on each ENI is not used for pods @@ -106,6 +106,16 @@ d2.2xlarge 58 d2.4xlarge 234 d2.8xlarge 234 d2.xlarge 58 +d3.2xlarge 18 +d3.4xlarge 38 +d3.8xlarge 59 +d3.xlarge 10 +d3en.12xlarge 89 +d3en.2xlarge 18 +d3en.4xlarge 38 +d3en.6xlarge 58 +d3en.8xlarge 78 +d3en.xlarge 10 f1.16xlarge 394 f1.2xlarge 58 f1.4xlarge 234 @@ -115,6 +125,9 @@ g3.16xlarge 737 g3.4xlarge 234 g3.8xlarge 234 g3s.xlarge 58 +g4ad.16xlarge 234 +g4ad.4xlarge 29 +g4ad.8xlarge 58 g4dn.12xlarge 234 g4dn.16xlarge 58 g4dn.2xlarge 29 @@ -217,6 +230,13 @@ m5n.4xlarge 234 m5n.8xlarge 234 m5n.large 29 m5n.xlarge 58 +m5zn.12xlarge 737 +m5zn.2xlarge 58 +m5zn.3xlarge 234 +m5zn.6xlarge 234 +m5zn.large 29 +m5zn.metal 737 +m5zn.xlarge 58 m6g.12xlarge 234 m6g.16xlarge 737 m6g.2xlarge 58 @@ -235,6 +255,7 @@ m6gd.large 29 m6gd.medium 8 m6gd.metal 737 m6gd.xlarge 58 +mac1.metal 234 p2.16xlarge 234 p2.8xlarge 234 p2.xlarge 58 @@ -279,6 +300,15 @@ r5ad.4xlarge 234 r5ad.8xlarge 234 r5ad.large 29 r5ad.xlarge 58 +r5b.12xlarge 234 +r5b.16xlarge 737 +r5b.24xlarge 737 +r5b.2xlarge 58 +r5b.4xlarge 234 +r5b.8xlarge 234 +r5b.large 29 +r5b.metal 737 +r5b.xlarge 58 r5d.12xlarge 234 r5d.16xlarge 737 r5d.24xlarge 737 From 69acbbc8fb550a43e16d5a17bd8d4972e38f6345 Mon Sep 17 00:00:00 2001 From: Eric Larssen Date: Mon, 14 Dec 2020 15:54:23 -0600 Subject: [PATCH 116/621] Feature flag the cleanup of the image (#522) * Feature flag the cleanup of the image * Move touch outside of if-statement --- eks-worker-al2.json | 4 ++- scripts/install-worker.sh | 53 +++++++++++++++++++++------------------ 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index eff63e08e..21c7c4fdb 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -22,6 +22,7 @@ "arch": null, "instance_type": null, "ami_description": "EKS Kubernetes Worker AMI with AmazonLinux2 image", + "cleanup_image": "true", "ssh_interface": "", "ssh_username": "ec2-user", "temporary_security_group_source_cidrs": "", @@ -138,7 +139,8 @@ "PULL_CNI_FROM_GITHUB={{user `pull_cni_from_github`}}", "AWS_ACCESS_KEY_ID={{user `aws_access_key_id`}}", "AWS_SECRET_ACCESS_KEY={{user `aws_secret_access_key`}}", - "AWS_SESSION_TOKEN={{user `aws_session_token`}}" + "AWS_SESSION_TOKEN={{user `aws_session_token`}}", + "CLEANUP_IMAGE={{user `cleanup_image`}}" ] }, { diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index e72cb4731..72b7d7f65 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -258,30 +258,33 @@ EOF ### Cleanup #################################################################### ################################################################################ -# Clean up yum caches to reduce the image size -sudo yum clean all -sudo rm -rf \ - $TEMPLATE_DIR \ - /var/cache/yum - -# Clean up files to reduce confusion during debug -sudo rm -rf \ - /etc/hostname \ - /etc/machine-id \ - /etc/resolv.conf \ - /etc/ssh/ssh_host* \ - /home/ec2-user/.ssh/authorized_keys \ - /root/.ssh/authorized_keys \ - /var/lib/cloud/data \ - /var/lib/cloud/instance \ - /var/lib/cloud/instances \ - /var/lib/cloud/sem \ - /var/lib/dhclient/* \ - /var/lib/dhcp/dhclient.* \ - /var/lib/yum/history \ - /var/log/cloud-init-output.log \ - /var/log/cloud-init.log \ - /var/log/secure \ - /var/log/wtmp +CLEANUP_IMAGE="${CLEANUP_IMAGE:-true}" +if [[ "$CLEANUP_IMAGE" == "true" ]]; then + # Clean up yum caches to reduce the image size + sudo yum clean all + sudo rm -rf \ + $TEMPLATE_DIR \ + /var/cache/yum + + # Clean up files to reduce confusion during debug + sudo rm -rf \ + /etc/hostname \ + /etc/machine-id \ + /etc/resolv.conf \ + /etc/ssh/ssh_host* \ + /home/ec2-user/.ssh/authorized_keys \ + /root/.ssh/authorized_keys \ + /var/lib/cloud/data \ + /var/lib/cloud/instance \ + /var/lib/cloud/instances \ + /var/lib/cloud/sem \ + /var/lib/dhclient/* \ + /var/lib/dhcp/dhclient.* \ + /var/lib/yum/history \ + /var/log/cloud-init-output.log \ + /var/log/cloud-init.log \ + /var/log/secure \ + /var/log/wtmp +done sudo touch /etc/machine-id From 7c72faff618c857cc9930aa0eb0e174af9007501 Mon Sep 17 00:00:00 2001 From: Bin Chen Date: Wed, 16 Dec 2020 03:49:33 +1100 Subject: [PATCH 117/621] fix syntax error in install script (#582) --- scripts/install-worker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 72b7d7f65..c93e4909a 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -285,6 +285,6 @@ if [[ "$CLEANUP_IMAGE" == "true" ]]; then /var/log/cloud-init.log \ /var/log/secure \ /var/log/wtmp -done +fi sudo touch /etc/machine-id From 8b4c2258bccd41225015e089a2197ae792bee60e Mon Sep 17 00:00:00 2001 From: Laxmi Soumya Josyula <42261978+ljosyula@users.noreply.github.com> Date: Tue, 15 Dec 2020 10:51:53 -0800 Subject: [PATCH 118/621] Adding release note for AMI v20201211 (#583) Co-authored-by: Laxmi Soumya Josyula --- CHANGELOG.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 41f97fcc2..134a5abf9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,30 @@ # Changelog +### AMI Release v20201211 +* amazon-eks-gpu-node-1.18-v20201211 +* amazon-eks-gpu-node-1.17-v20201211 +* amazon-eks-gpu-node-1.16-v20201211 +* amazon-eks-gpu-node-1.15-v20201211 +* amazon-eks-arm64-node-1.18-v20201211 +* amazon-eks-arm64-node-1.17-v20201211 +* amazon-eks-arm64-node-1.16-v20201211 +* amazon-eks-arm64-node-1.15-v20201211 +* amazon-eks-node-1.18-v20201211 +* amazon-eks-node-1.17-v20201211 +* amazon-eks-node-1.16-v20201211 +* amazon-eks-node-1.15-v20201211 + +Binaries used to build these AMIs are published : +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes : +* Bug fix for the issue with rngd on EKS worker ami that's built with AL2 source ami. +* Bug fix for grub issue introduced by new nvidia driver +* Patch for CVE-2020-1971 (https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-1971) + ### AMI Release v20201126 * amazon-eks-gpu-node-1.18-v20201126 * amazon-eks-gpu-node-1.17-v20201126 From a6e0e6ae07c15558911ca2cf8c888d7c85102f73 Mon Sep 17 00:00:00 2001 From: cmdallas Date: Tue, 15 Dec 2020 13:30:40 -0800 Subject: [PATCH 119/621] add support for sonobuoy e2e registry overrides (#585) --- eks-worker-al2.json | 6 ++++-- files/sonobuoy-e2e-registry-config | 5 +++++ scripts/install-worker.sh | 5 +++++ 3 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 files/sonobuoy-e2e-registry-config diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 21c7c4fdb..53a55f230 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -32,7 +32,8 @@ "remote_folder": "", "launch_block_device_mappings_volume_size": "4", "ami_users": "", - "additional_yum_repos": "" + "additional_yum_repos": "", + "sonobuoy_e2e_registry": "" }, "builders": [ { @@ -140,7 +141,8 @@ "AWS_ACCESS_KEY_ID={{user `aws_access_key_id`}}", "AWS_SECRET_ACCESS_KEY={{user `aws_secret_access_key`}}", "AWS_SESSION_TOKEN={{user `aws_session_token`}}", - "CLEANUP_IMAGE={{user `cleanup_image`}}" + "CLEANUP_IMAGE={{user `cleanup_image`}}", + "SONOBUOY_E2E_REGISTRY={{user `sonobuoy_e2e_registry`}}" ] }, { diff --git a/files/sonobuoy-e2e-registry-config b/files/sonobuoy-e2e-registry-config new file mode 100644 index 000000000..be3813d86 --- /dev/null +++ b/files/sonobuoy-e2e-registry-config @@ -0,0 +1,5 @@ +dockerLibraryRegistry: SONOBUOY_E2E_REGISTRY/library +e2eRegistry: SONOBUOY_E2E_REGISTRY/kubernetes-e2e-test-images +gcRegistry: SONOBUOY_E2E_REGISTRY +googleContainerRegistry: SONOBUOY_E2E_REGISTRY/google-containers +sampleRegistry: SONOBUOY_E2E_REGISTRY/google-samples \ No newline at end of file diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index c93e4909a..2bb3b74a4 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -129,6 +129,11 @@ if [[ "$INSTALL_DOCKER" == "true" ]]; then sudo systemctl enable docker fi +if [[ -n "$SONOBUOY_E2E_REGISTRY" ]]; then + sudo mv $TEMPLATE_DIR/sonobuoy-e2e-registry-config /etc/eks/sonobuoy-e2e-registry-config + sed -i s,SONOBUOY_E2E_REGISTRY,$SONOBUOY_E2E_REGISTRY,g /etc/eks/sonobuoy-e2e-registry-config +fi + ################################################################################ ### Logrotate ################################################################## ################################################################################ From 488b490b6ea10b54f46b43fde9ad1d5660c210fb Mon Sep 17 00:00:00 2001 From: Bronson Mirafuentes Date: Tue, 15 Dec 2020 14:57:27 -0800 Subject: [PATCH 120/621] fix containerd_version typo in Makefile (#584) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8520b36c2..e857c4251 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ PACKER_BINARY ?= packer -PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date docker_version containerd_verion cni_plugin_version source_ami_id source_ami_owners arch instance_type security_group_id additional_yum_repos pull_cni_from_github +PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date docker_version containerd_version cni_plugin_version source_ami_id source_ami_owners arch instance_type security_group_id additional_yum_repos pull_cni_from_github K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) From f9d2c39535a8b53a951c1a896590aef5254dc896 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zolt=C3=A1n=20Reegn?= Date: Wed, 16 Dec 2020 23:06:39 +0100 Subject: [PATCH 121/621] Change cgroup driver to systemd (#521) Kubernetes documentation indicates that for stability reasons one should run kubernetes with the systemd cgroup driver if the init system itself is systemd. https://kubernetes.io/docs/setup/production-environment/container-runtimes/#cgroup-drivers Fixes #490 --- files/docker-daemon.json | 3 +++ files/kubelet-config.json | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/files/docker-daemon.json b/files/docker-daemon.json index 55e395721..1a69a0f82 100644 --- a/files/docker-daemon.json +++ b/files/docker-daemon.json @@ -1,5 +1,8 @@ { "bridge": "none", + "exec-opts": [ + "native.cgroupdriver=systemd" + ], "log-driver": "json-file", "log-opts": { "max-size": "10m", diff --git a/files/kubelet-config.json b/files/kubelet-config.json index b78510c6a..607de83fe 100644 --- a/files/kubelet-config.json +++ b/files/kubelet-config.json @@ -24,7 +24,7 @@ "clusterDomain": "cluster.local", "hairpinMode": "hairpin-veth", "readOnlyPort": 0, - "cgroupDriver": "cgroupfs", + "cgroupDriver": "systemd", "cgroupRoot": "/", "featureGates": { "RotateKubeletServerCertificate": true From 4793e3db696f7953e9206ed915f44b8d46dd15b8 Mon Sep 17 00:00:00 2001 From: Abeer Sethi <38733827+abeer91@users.noreply.github.com> Date: Wed, 16 Dec 2020 17:24:43 -0800 Subject: [PATCH 122/621] Revert "Change cgroup driver to systemd (#521)" (#587) This reverts commit f9d2c39535a8b53a951c1a896590aef5254dc896. --- files/docker-daemon.json | 3 --- files/kubelet-config.json | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/files/docker-daemon.json b/files/docker-daemon.json index 1a69a0f82..55e395721 100644 --- a/files/docker-daemon.json +++ b/files/docker-daemon.json @@ -1,8 +1,5 @@ { "bridge": "none", - "exec-opts": [ - "native.cgroupdriver=systemd" - ], "log-driver": "json-file", "log-opts": { "max-size": "10m", diff --git a/files/kubelet-config.json b/files/kubelet-config.json index 607de83fe..b78510c6a 100644 --- a/files/kubelet-config.json +++ b/files/kubelet-config.json @@ -24,7 +24,7 @@ "clusterDomain": "cluster.local", "hairpinMode": "hairpin-veth", "readOnlyPort": 0, - "cgroupDriver": "systemd", + "cgroupDriver": "cgroupfs", "cgroupRoot": "/", "featureGates": { "RotateKubeletServerCertificate": true From b8eaf228b24811382b85ec9a60c4ea284707a06b Mon Sep 17 00:00:00 2001 From: Joe Gawrieh Date: Thu, 17 Dec 2020 12:06:33 -0800 Subject: [PATCH 123/621] Update Makefile to support sonobuoy e2e registry config override (#588) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e857c4251..d7de49f30 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ PACKER_BINARY ?= packer -PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date docker_version containerd_version cni_plugin_version source_ami_id source_ami_owners arch instance_type security_group_id additional_yum_repos pull_cni_from_github +PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date docker_version containerd_version cni_plugin_version source_ami_id source_ami_owners arch instance_type security_group_id additional_yum_repos pull_cni_from_github sonobuoy_e2e_registry K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) From fdfc06c3c30781621703067697287cde1fc10359 Mon Sep 17 00:00:00 2001 From: Joe Gawrieh Date: Fri, 18 Dec 2020 09:32:11 -0800 Subject: [PATCH 124/621] Fix position of sonobuoy e2e registry config check (#590) --- scripts/install-worker.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 2bb3b74a4..81229d9f6 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -129,11 +129,6 @@ if [[ "$INSTALL_DOCKER" == "true" ]]; then sudo systemctl enable docker fi -if [[ -n "$SONOBUOY_E2E_REGISTRY" ]]; then - sudo mv $TEMPLATE_DIR/sonobuoy-e2e-registry-config /etc/eks/sonobuoy-e2e-registry-config - sed -i s,SONOBUOY_E2E_REGISTRY,$SONOBUOY_E2E_REGISTRY,g /etc/eks/sonobuoy-e2e-registry-config -fi - ################################################################################ ### Logrotate ################################################################## ################################################################################ @@ -235,6 +230,11 @@ sudo mv $TEMPLATE_DIR/eni-max-pods.txt /etc/eks/eni-max-pods.txt sudo mv $TEMPLATE_DIR/bootstrap.sh /etc/eks/bootstrap.sh sudo chmod +x /etc/eks/bootstrap.sh +if [[ -n "$SONOBUOY_E2E_REGISTRY" ]]; then + sudo mv $TEMPLATE_DIR/sonobuoy-e2e-registry-config /etc/eks/sonobuoy-e2e-registry-config + sed -i s,SONOBUOY_E2E_REGISTRY,$SONOBUOY_E2E_REGISTRY,g /etc/eks/sonobuoy-e2e-registry-config +fi + ################################################################################ ### AMI Metadata ############################################################### ################################################################################ From 7e6a0ee20e6cf226a257c742914909fb46d79271 Mon Sep 17 00:00:00 2001 From: Joe Gawrieh Date: Fri, 18 Dec 2020 11:59:59 -0800 Subject: [PATCH 125/621] Fix permission issue for sed (#591) --- scripts/install-worker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 81229d9f6..3a830b911 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -232,7 +232,7 @@ sudo chmod +x /etc/eks/bootstrap.sh if [[ -n "$SONOBUOY_E2E_REGISTRY" ]]; then sudo mv $TEMPLATE_DIR/sonobuoy-e2e-registry-config /etc/eks/sonobuoy-e2e-registry-config - sed -i s,SONOBUOY_E2E_REGISTRY,$SONOBUOY_E2E_REGISTRY,g /etc/eks/sonobuoy-e2e-registry-config + sudo sed -i s,SONOBUOY_E2E_REGISTRY,$SONOBUOY_E2E_REGISTRY,g /etc/eks/sonobuoy-e2e-registry-config fi ################################################################################ From 6f2472eed9973c32c89742813c6995c4c5c13d38 Mon Sep 17 00:00:00 2001 From: Abeer Sethi <38733827+abeer91@users.noreply.github.com> Date: Fri, 18 Dec 2020 12:42:34 -0800 Subject: [PATCH 126/621] Increasing values for max_user_watches and max_map_count (#589) --- scripts/install-worker.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 3a830b911..503f7835c 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -259,6 +259,14 @@ kernel.panic=10 kernel.panic_on_oops=1 EOF +################################################################################ +### Setting up sysctl properties ############################################### +################################################################################ + +echo fs.inotify.max_user_watches=524288 | sudo tee -a /etc/sysctl.conf +echo vm.max_map_count=524288 | sudo tee -a /etc/sysctl.conf + + ################################################################################ ### Cleanup #################################################################### ################################################################################ From e8fff40ceeb0e2a2a0bece5bce724d58761a7da8 Mon Sep 17 00:00:00 2001 From: Abeer Sethi <38733827+abeer91@users.noreply.github.com> Date: Tue, 12 Jan 2021 11:20:41 -0800 Subject: [PATCH 127/621] Updating limits for memlock ulimit (#595) --- files/docker-daemon.json | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/files/docker-daemon.json b/files/docker-daemon.json index 55e395721..cf5459f51 100644 --- a/files/docker-daemon.json +++ b/files/docker-daemon.json @@ -6,5 +6,12 @@ "max-file": "10" }, "live-restore": true, - "max-concurrent-downloads": 10 + "max-concurrent-downloads": 10, + "default-ulimits": { + "memlock": { + "Hard": -1, + "Name": "memlock", + "Soft": -1 + } + } } From 45f5f5b160652202f8bcc8193545c1d5034e9598 Mon Sep 17 00:00:00 2001 From: Abeer Sethi <38733827+abeer91@users.noreply.github.com> Date: Fri, 15 Jan 2021 10:00:14 -0800 Subject: [PATCH 128/621] Adding AMI Release v20210112 to Changelog (#598) --- CHANGELOG.md | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 134a5abf9..7d18d40b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,42 @@ # Changelog +### AMI Release v20210112 +* amazon-eks-gpu-node-1.18-v20210112 +* amazon-eks-gpu-node-1.17-v20210112 +* amazon-eks-gpu-node-1.16-v20210112 +* amazon-eks-gpu-node-1.15-v20210112 +* amazon-eks-arm64-node-1.18-v20210112 +* amazon-eks-arm64-node-1.17-v20210112 +* amazon-eks-arm64-node-1.16-v20210112 +* amazon-eks-arm64-node-1.15-v20210112 +* amazon-eks-node-1.18-v20210112 +* amazon-eks-node-1.17-v20210112 +* amazon-eks-node-1.16-v20210112 +* amazon-eks-node-1.15-v20210112 + +Binaries used to build these AMIs are published : +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes : +* Update ulimit for memlock to unlimited +* Update ulimit for max_user_watches and max_file_count +* Fix position of sonobuoy e2e registry config check (#590) +* Update Makefile to support sonobuoy e2e registry config override (#588) +* fix syntax error in install script (#582) introduced by #522 +* Feature flag the cleanup of the image (#522) +* Add iptables rule count to log collector +* GPU Boost clock setup for performance improvement (#573) +* add support for sonobuoy e2e registry overrides (#585) for MVP +* ensure kubelet.service.d directory exists (#519) +* (bootstrap): document pause container parameters (#556) +* add SIGKILL to RestartForceExitStatus (#554) +* fix containerd_version typo in Makefile (#584) +* Update systemd to always restart kubelet to support dynamic kubelet configuration (#578) +* Add missing instance types (#580) + ### AMI Release v20201211 * amazon-eks-gpu-node-1.18-v20201211 * amazon-eks-gpu-node-1.17-v20201211 From d028149da221a3de351da2a057d0e4967467a55d Mon Sep 17 00:00:00 2001 From: Matt Merkes Date: Wed, 20 Jan 2021 10:11:19 -0800 Subject: [PATCH 129/621] Sets the 5.4 linux kernel as default for kubernetes version 1.19 and higher --- eks-worker-al2.json | 7 ++++++- scripts/upgrade_kernel.sh | 26 ++++++++++++++++++++++++-- 2 files changed, 30 insertions(+), 3 deletions(-) mode change 100644 => 100755 scripts/upgrade_kernel.sh diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 53a55f230..33df3f79b 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -12,6 +12,7 @@ "binary_bucket_region": "us-west-2", "kubernetes_version": null, "kubernetes_build_date": null, + "kernel_version": "", "docker_version": "19.03.6ce-4.amzn2", "containerd_version": "1.4.1-2.amzn2", "cni_plugin_version": "v0.8.6", @@ -110,7 +111,11 @@ "type": "shell", "remote_folder": "{{ user `remote_folder`}}", "expect_disconnect": true, - "script": "{{template_dir}}/scripts/upgrade_kernel.sh" + "script": "{{template_dir}}/scripts/upgrade_kernel.sh", + "environment_vars": [ + "KUBERNETES_VERSION={{user `kubernetes_version`}}", + "KERNEL_VERSION={{user `kernel_version`}}" + ] }, { "type": "shell", diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh old mode 100644 new mode 100755 index 150c290fc..9d2bcbd07 --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -4,5 +4,27 @@ set -o pipefail set -o nounset set -o errexit -sudo yum update -y kernel -sudo reboot \ No newline at end of file +if [[ -z "$KERNEL_VERSION" ]]; then + # Makes 5.4 kernel the default on 1.19 and higher + IFS='.' + # Convert kubernetes version in an array to compare versions + read -ra ADDR <<< "$KUBERNETES_VERSION" + if (( ADDR[0] == 1 && ADDR[1] < 19 )); then + KERNEL_VERSION=4.14 + else + KERNEL_VERSION=5.4 + fi + + echo "kernel_version is unset. Setting to $KERNEL_VERSION based on kubernetes_version $KUBERNETES_VERSION" +fi + +if [[ $KERNEL_VERSION == "4.14" ]]; then + sudo yum update -y kernel +elif [[ $KERNEL_VERSION == "5.4" ]]; then + sudo amazon-linux-extras install -y kernel-5.4 +else + echo "$KERNEL_VERSION is not a valid kernel version" + exit 1 +fi + +sudo reboot From abdadef492ddf4ee6fc2bb327ef867eee4257d3b Mon Sep 17 00:00:00 2001 From: Matt Merkes Date: Wed, 20 Jan 2021 10:18:27 -0800 Subject: [PATCH 130/621] Revert "Sets the 5.4 linux kernel as default for kubernetes version 1.19 and higher" This reverts commit d028149da221a3de351da2a057d0e4967467a55d. --- eks-worker-al2.json | 7 +------ scripts/upgrade_kernel.sh | 26 ++------------------------ 2 files changed, 3 insertions(+), 30 deletions(-) mode change 100755 => 100644 scripts/upgrade_kernel.sh diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 33df3f79b..53a55f230 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -12,7 +12,6 @@ "binary_bucket_region": "us-west-2", "kubernetes_version": null, "kubernetes_build_date": null, - "kernel_version": "", "docker_version": "19.03.6ce-4.amzn2", "containerd_version": "1.4.1-2.amzn2", "cni_plugin_version": "v0.8.6", @@ -111,11 +110,7 @@ "type": "shell", "remote_folder": "{{ user `remote_folder`}}", "expect_disconnect": true, - "script": "{{template_dir}}/scripts/upgrade_kernel.sh", - "environment_vars": [ - "KUBERNETES_VERSION={{user `kubernetes_version`}}", - "KERNEL_VERSION={{user `kernel_version`}}" - ] + "script": "{{template_dir}}/scripts/upgrade_kernel.sh" }, { "type": "shell", diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh old mode 100755 new mode 100644 index 9d2bcbd07..150c290fc --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -4,27 +4,5 @@ set -o pipefail set -o nounset set -o errexit -if [[ -z "$KERNEL_VERSION" ]]; then - # Makes 5.4 kernel the default on 1.19 and higher - IFS='.' - # Convert kubernetes version in an array to compare versions - read -ra ADDR <<< "$KUBERNETES_VERSION" - if (( ADDR[0] == 1 && ADDR[1] < 19 )); then - KERNEL_VERSION=4.14 - else - KERNEL_VERSION=5.4 - fi - - echo "kernel_version is unset. Setting to $KERNEL_VERSION based on kubernetes_version $KUBERNETES_VERSION" -fi - -if [[ $KERNEL_VERSION == "4.14" ]]; then - sudo yum update -y kernel -elif [[ $KERNEL_VERSION == "5.4" ]]; then - sudo amazon-linux-extras install -y kernel-5.4 -else - echo "$KERNEL_VERSION is not a valid kernel version" - exit 1 -fi - -sudo reboot +sudo yum update -y kernel +sudo reboot \ No newline at end of file From 3bff6ba3f23fb15de34c50cd50adf4cdbe8a90bd Mon Sep 17 00:00:00 2001 From: Bronson Mirafuentes Date: Wed, 20 Jan 2021 12:25:03 -0800 Subject: [PATCH 131/621] build ARM AMIs with m6g.large instance type (#601) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d7de49f30..870a2a19d 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ aws_region ?= $(AWS_DEFAULT_REGION) binary_bucket_region ?= $(AWS_DEFAULT_REGION) arch ?= x86_64 ifeq ($(arch), arm64) -instance_type ?= a1.large +instance_type ?= m6g.large ami_name ?= amazon-eks-arm64-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') else instance_type ?= m4.large From f85cf415b90acaafb2aebc1cf845b6a2282a0ada Mon Sep 17 00:00:00 2001 From: Laxmi Soumya Josyula <42261978+ljosyula@users.noreply.github.com> Date: Wed, 20 Jan 2021 13:32:04 -0800 Subject: [PATCH 132/621] Add Support for c6gn instance type (#597) Co-authored-by: Laxmi Soumya Josyula --- amazon-eks-nodegroup.yaml | 8 ++++++++ files/eni-max-pods.txt | 10 +++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/amazon-eks-nodegroup.yaml b/amazon-eks-nodegroup.yaml index 1cfb2af38..3ae645796 100644 --- a/amazon-eks-nodegroup.yaml +++ b/amazon-eks-nodegroup.yaml @@ -167,6 +167,14 @@ Parameters: - c6gd.medium - c6gd.metal - c6gd.xlarge + - c6gn.12xlarge + - c6gn.16xlarge + - c6gn.2xlarge + - c6gn.4xlarge + - c6gn.8xlarge + - c6gn.large + - c6gn.medium + - c6gn.xlarge - cc2.8xlarge - cr1.8xlarge - d2.2xlarge diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index a6eb620fa..99e61b9ec 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2020-12-12T18:33:04-03:00 +# This file was generated at 2021-01-13T12:54:18-08:00 # # Mapping is calculated from AWS EC2 API using the following formula: # * First IP on each ENI is not used for pods @@ -100,6 +100,14 @@ c6gd.large 29 c6gd.medium 8 c6gd.metal 737 c6gd.xlarge 58 +c6gn.12xlarge 234 +c6gn.16xlarge 737 +c6gn.2xlarge 58 +c6gn.4xlarge 234 +c6gn.8xlarge 234 +c6gn.large 29 +c6gn.medium 8 +c6gn.xlarge 58 cc2.8xlarge 234 cr1.8xlarge 234 d2.2xlarge 58 From e3f1b910f83ad1f27e68312e50474ea6059f052d Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 25 Jan 2021 10:38:05 -0800 Subject: [PATCH 133/621] Sets the 5.4 linux kernel as default for kubernetes version 1.19 and higher (#600) This reverts commit abdadef492ddf4ee6fc2bb327ef867eee4257d3b. --- Makefile | 2 +- eks-worker-al2.json | 7 ++++++- scripts/upgrade_kernel.sh | 31 +++++++++++++++++++++++++++++-- 3 files changed, 36 insertions(+), 4 deletions(-) mode change 100644 => 100755 scripts/upgrade_kernel.sh diff --git a/Makefile b/Makefile index 870a2a19d..f042f7de1 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ PACKER_BINARY ?= packer -PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date docker_version containerd_version cni_plugin_version source_ami_id source_ami_owners arch instance_type security_group_id additional_yum_repos pull_cni_from_github sonobuoy_e2e_registry +PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date kernel_version docker_version containerd_version cni_plugin_version source_ami_id source_ami_owners arch instance_type security_group_id additional_yum_repos pull_cni_from_github sonobuoy_e2e_registry K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 53a55f230..33df3f79b 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -12,6 +12,7 @@ "binary_bucket_region": "us-west-2", "kubernetes_version": null, "kubernetes_build_date": null, + "kernel_version": "", "docker_version": "19.03.6ce-4.amzn2", "containerd_version": "1.4.1-2.amzn2", "cni_plugin_version": "v0.8.6", @@ -110,7 +111,11 @@ "type": "shell", "remote_folder": "{{ user `remote_folder`}}", "expect_disconnect": true, - "script": "{{template_dir}}/scripts/upgrade_kernel.sh" + "script": "{{template_dir}}/scripts/upgrade_kernel.sh", + "environment_vars": [ + "KUBERNETES_VERSION={{user `kubernetes_version`}}", + "KERNEL_VERSION={{user `kernel_version`}}" + ] }, { "type": "shell", diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh old mode 100644 new mode 100755 index 150c290fc..83376eedf --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -4,5 +4,32 @@ set -o pipefail set -o nounset set -o errexit -sudo yum update -y kernel -sudo reboot \ No newline at end of file +if [[ -z "$KERNEL_VERSION" ]]; then + # Save for resetting + OLDIFS=$IFS + # Makes 5.4 kernel the default on 1.19 and higher + IFS='.' + # Convert kubernetes version in an array to compare versions + read -ra ADDR <<< "$KUBERNETES_VERSION" + # Reset + IFS=$OLDIFS + + if (( ADDR[0] == 1 && ADDR[1] < 19 )); then + KERNEL_VERSION=4.14 + else + KERNEL_VERSION=5.4 + fi + + echo "kernel_version is unset. Setting to $KERNEL_VERSION based on kubernetes_version $KUBERNETES_VERSION" +fi + +if [[ $KERNEL_VERSION == "4.14" ]]; then + sudo yum update -y kernel +elif [[ $KERNEL_VERSION == "5.4" ]]; then + sudo amazon-linux-extras install -y kernel-5.4 +else + echo "$KERNEL_VERSION is not a valid kernel version" + exit 1 +fi + +sudo reboot From 685e0907fb3ed8f1b5e5bda3aa123e7ed6419d9d Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 26 Jan 2021 13:15:48 -0800 Subject: [PATCH 134/621] Adds more information to README on the AL2 base AMI and Linux kernel selection (#602) --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index dbdec62ab..37cbc9c3f 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,12 @@ versions by running `aws s3 ls s3://amazon-eks/cloudformation/`. For older versions of the EKS AMI (v20-v22), you can find the CloudFormation templates in the same bucket under the path `s3://amazon-eks/1.10.3/2018-06-05/`. +## AL2 / Linux Kernel Information + +By default, the `amazon-eks-ami` uses a [source_ami_filter](https://github.com/awslabs/amazon-eks-ami/blob/e3f1b910f83ad1f27e68312e50474ea6059f052d/eks-worker-al2.json#L46) that selects the latest [hvm](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/virtualization_types.html) AL2 AMI for the given architecture as the base AMI. For more information on what kernel versions are running on published Amazon EKS optimized Linux AMIs, see [the public documentation](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html). + +When building an AMI, you can set the `kernel_version` to `4.14` or `5.4` to customize the kernel version. The [upgrade_kernel.sh script](https://github.com/awslabs/amazon-eks-ami/blob/master/scripts/upgrade_kernel.sh#L26) contains the logic for updating and upgrading the kernel. For Kubernetes versions 1.18 and below, it uses the `4.14` kernel if not set, and it will install the latest patches. For Kubernetes version 1.19 and above, it uses the `5.4` kernel if not set. + ## Security For security issues or concerns, please do not open an issue or pull request on GitHub. Please report any suspected or confirmed security issues to AWS Security https://aws.amazon.com/security/vulnerability-reporting/ From 563d35fe115fd27cac90a94d91bf28c3a583854f Mon Sep 17 00:00:00 2001 From: suket22 Date: Tue, 26 Jan 2021 14:19:54 -0800 Subject: [PATCH 135/621] Update CHANGELOG.md (#603) Adding information about AMI Release v20210125 --- CHANGELOG.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d18d40b4..d38538c2b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,30 @@ # Changelog +### AMI Release v20210125 +* amazon-eks-gpu-node-1.18-v20210125 +* amazon-eks-gpu-node-1.17-v20210125 +* amazon-eks-gpu-node-1.16-v20210125 +* amazon-eks-gpu-node-1.15-v20210125 +* amazon-eks-arm64-node-1.18-v20210125 +* amazon-eks-arm64-node-1.17-v20210125 +* amazon-eks-arm64-node-1.16-v20210125 +* amazon-eks-arm64-node-1.15-v20210125 +* amazon-eks-node-1.18-v20210125 +* amazon-eks-node-1.17-v20210125 +* amazon-eks-node-1.16-v20210125 +* amazon-eks-node-1.15-v20210125 + +Binaries used to build these AMIs are published : +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes : +* ARM AMIs built with m6g.large instance type (#601) +* Add Support for c6gn instance type (#597) +* Patch for CVE-2021-3156 (https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-3156) + ### AMI Release v20210112 * amazon-eks-gpu-node-1.18-v20210112 * amazon-eks-gpu-node-1.17-v20210112 From 2e0954b1cbe4a3321e7391cd1075d64eacda1f91 Mon Sep 17 00:00:00 2001 From: David Stewart Date: Wed, 27 Jan 2021 17:57:02 +0000 Subject: [PATCH 136/621] fix for bootstrap.sh 401 issue: https://github.com/awslabs/amazon-eks-ami/issues/594 (#596) Co-authored-by: David Stewart --- files/bootstrap.sh | 80 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 73 insertions(+), 7 deletions(-) mode change 100755 => 100644 files/bootstrap.sh diff --git a/files/bootstrap.sh b/files/bootstrap.sh old mode 100755 new mode 100644 index c17bb816a..ca787717e --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -134,6 +134,72 @@ function get_pause_container_account_for_region () { esac } +function _get_token() { + local token_result= + local http_result= + + token_result=$(curl -s -w "\n%{http_code}" -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 600" "http://169.254.169.254/latest/api/token") + http_result=$(echo "$token_result" | tail -n 1) + if [[ "$http_result" != "200" ]] + then + echo -e "Failed to get token:\n$token_result" + return 1 + else + echo "$token_result" | head -n 1 + return 0 + fi +} + +function get_token() { + local token= + local retries=20 + local result=1 + + while [[ retries -gt 0 && $result -ne 0 ]] + do + retries=$[$retries-1] + token=$(_get_token) + result=$? + [[ $result != 0 ]] && sleep 5 + done + [[ $result == 0 ]] && echo "$token" + return $result +} + +function _get_meta_data() { + local path=$1 + local metadata_result= + + metadata_result=$(curl -s -w "\n%{http_code}" -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/$path) + http_result=$(echo "$metadata_result" | tail -n 1) + if [[ "$http_result" != "200" ]] + then + echo -e "Failed to get metadata:\n$metadata_result\nhttp://169.254.169.254/$path\n$TOKEN" + return 1 + else + local lines=$(echo "$metadata_result" | wc -l) + echo "$metadata_result" | head -n $(( lines - 1 )) + return 0 + fi +} + +function get_meta_data() { + local metadata= + local path=$1 + local retries=20 + local result=1 + + while [[ retries -gt 0 && $result -ne 0 ]] + do + retries=$[$retries-1] + metadata=$(_get_meta_data $path) + result=$? + [[ $result != 0 ]] && TOKEN=$(get_token) + done + [[ $result == 0 ]] && echo "$metadata" + return $result +} + # Helper function which calculates the amount of the given resource (either CPU or memory) # to reserve in a given resource range, specified by a start and end of the range and a percentage # of the resource to reserve. Note that we return zero if the start of the resource range is @@ -203,9 +269,9 @@ if [ -z "$CLUSTER_NAME" ]; then fi -TOKEN=$(curl -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 600" "http://169.254.169.254/latest/api/token") -AWS_DEFAULT_REGION=$(curl -s --retry 5 -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/dynamic/instance-identity/document | jq .region -r) -AWS_SERVICES_DOMAIN=$(curl -s --retry 5 -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/2018-09-24/meta-data/services/domain) +TOKEN=$(get_token) +AWS_DEFAULT_REGION=$(get_meta_data 'latest/dynamic/instance-identity/document' | jq .region -r) +AWS_SERVICES_DOMAIN=$(get_meta_data '2018-09-24/meta-data/services/domain') MACHINE=$(uname -m) if [[ "$MACHINE" != "x86_64" && "$MACHINE" != "aarch64" ]]; then @@ -268,8 +334,8 @@ if [[ -z "${DNS_CLUSTER_IP}" ]]; then #Sets the DNS Cluster IP address that would be chosen from the serviceIpv4Cidr. (x.y.z.10) DNS_CLUSTER_IP=${SERVICE_IPV4_CIDR%.*}.10 else - MAC=$(curl -H "X-aws-ec2-metadata-token: $TOKEN" -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/ -s | head -n 1 | sed 's/\/$//') - TEN_RANGE=$(curl -H "X-aws-ec2-metadata-token: $TOKEN" -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/$MAC/vpc-ipv4-cidr-blocks | grep -c '^10\..*' || true ) + MAC=$(get_meta_data 'latest/meta-data/network/interfaces/macs/' | head -n 1 | sed 's/\/$//') + TEN_RANGE=$(get_meta_data "latest/meta-data/network/interfaces/macs/$MAC/vpc-ipv4-cidr-blocks" | grep -c '^10\..*' || true ) DNS_CLUSTER_IP=10.100.0.10 if [[ "$TEN_RANGE" != "0" ]]; then DNS_CLUSTER_IP=172.20.0.10 @@ -282,8 +348,8 @@ fi KUBELET_CONFIG=/etc/kubernetes/kubelet/kubelet-config.json echo "$(jq ".clusterDNS=[\"$DNS_CLUSTER_IP\"]" $KUBELET_CONFIG)" > $KUBELET_CONFIG -INTERNAL_IP=$(curl -H "X-aws-ec2-metadata-token: $TOKEN" -s http://169.254.169.254/latest/meta-data/local-ipv4) -INSTANCE_TYPE=$(curl -H "X-aws-ec2-metadata-token: $TOKEN" -s http://169.254.169.254/latest/meta-data/instance-type) +INTERNAL_IP=$(get_meta_data 'latest/meta-data/local-ipv4') +INSTANCE_TYPE=$(get_meta_data 'latest/meta-data/instance-type') # Sets kubeReserved and evictionHard in /etc/kubernetes/kubelet/kubelet-config.json for worker nodes. The following two function # calls calculate the CPU and memory resources to reserve for kubeReserved based on the instance type of the worker node. From 911119250416f460bbd60acd86e4763c006749fa Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 12 Feb 2021 13:10:15 -0800 Subject: [PATCH 137/621] Updates change log with AMI release v20210208 (#609) --- CHANGELOG.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d38538c2b..42091e33f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,28 @@ # Changelog +### AMI Release v20210208 +* amazon-eks-gpu-node-1.18-v20210208 +* amazon-eks-gpu-node-1.17-v20210208 +* amazon-eks-gpu-node-1.16-v20210208 +* amazon-eks-gpu-node-1.15-v20210208 +* amazon-eks-arm64-node-1.18-v20210208 +* amazon-eks-arm64-node-1.17-v20210208 +* amazon-eks-arm64-node-1.16-v20210208 +* amazon-eks-arm64-node-1.15-v20210208 +* amazon-eks-node-1.18-v20210208 +* amazon-eks-node-1.17-v20210208 +* amazon-eks-node-1.16-v20210208 +* amazon-eks-node-1.15-v20210208 + +Binaries used to build these AMIs are published : +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes : +* Patch for [ALAS-2021-1588](https://alas.aws.amazon.com/AL2/ALAS-2021-1588.html) + ### AMI Release v20210125 * amazon-eks-gpu-node-1.18-v20210125 * amazon-eks-gpu-node-1.17-v20210125 From d36c03833ee9e8a48bede20795200df7c1bd2dac Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 16 Feb 2021 09:05:55 -0800 Subject: [PATCH 138/621] Adds information on 1.19 AMI (#612) --- .gitignore | 4 ++-- CHANGELOG.md | 5 +++++ Makefile | 4 ++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 467547614..42b8dcbf0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ -manifest.json +*manifest.json *.swp -.idea \ No newline at end of file +.idea diff --git a/CHANGELOG.md b/CHANGELOG.md index 42091e33f..e5d857277 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,26 +1,31 @@ # Changelog ### AMI Release v20210208 +* amazon-eks-gpu-node-1.19-v20210208 * amazon-eks-gpu-node-1.18-v20210208 * amazon-eks-gpu-node-1.17-v20210208 * amazon-eks-gpu-node-1.16-v20210208 * amazon-eks-gpu-node-1.15-v20210208 +* amazon-eks-arm64-node-1.19-v20210208 * amazon-eks-arm64-node-1.18-v20210208 * amazon-eks-arm64-node-1.17-v20210208 * amazon-eks-arm64-node-1.16-v20210208 * amazon-eks-arm64-node-1.15-v20210208 +* amazon-eks-node-1.19-v20210208 * amazon-eks-node-1.18-v20210208 * amazon-eks-node-1.17-v20210208 * amazon-eks-node-1.16-v20210208 * amazon-eks-node-1.15-v20210208 Binaries used to build these AMIs are published : +* s3://amazon-eks/1.19.6/2021-01-05/ * s3://amazon-eks/1.18.9/2020-11-02/ * s3://amazon-eks/1.17.12/2020-11-02/ * s3://amazon-eks/1.16.15/2020-11-02/ * s3://amazon-eks/1.15.12/2020-11-02/ Notable changes : +* Kubernetes versions 1.19+ will now use the 5.4 Linux kernel * Patch for [ALAS-2021-1588](https://alas.aws.amazon.com/AL2/ALAS-2021-1588.html) ### AMI Release v20210125 diff --git a/Makefile b/Makefile index f042f7de1..dcd780beb 100644 --- a/Makefile +++ b/Makefile @@ -58,3 +58,7 @@ k8s: validate .PHONY: 1.18 1.18: $(MAKE) k8s kubernetes_version=1.18.9 kubernetes_build_date=2020-11-02 pull_cni_from_github=true + +.PHONY: 1.19 + 1.19: + $(MAKE) k8s kubernetes_version=1.19.6 kubernetes_build_date=2021-01-05 pull_cni_from_github=true From eb9459ba970c28d661741a8cad115e267871d3b8 Mon Sep 17 00:00:00 2001 From: Bronson Mirafuentes Date: Wed, 17 Feb 2021 11:30:41 -0800 Subject: [PATCH 139/621] check that nvidia-smi is configured correctly before updating GPU clocks (#613) --- files/bootstrap.sh | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index ca787717e..696f32431 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -417,25 +417,30 @@ systemctl start kubelet if command -v nvidia-smi &>/dev/null ; then echo "nvidia-smi found" - sudo nvidia-smi -pm 1 # set persistence mode - sudo nvidia-smi --auto-boost-default=0 - - GPUNAME=$(nvidia-smi -L | head -n1) - echo $GPUNAME - - # set application clock to maximum - if [[ $GPUNAME == *"A100"* ]]; then - nvidia-smi -ac 1215,1410 - elif [[ $GPUNAME == *"V100"* ]]; then - nvidia-smi -ac 877,1530 - elif [[ $GPUNAME == *"K80"* ]]; then - nvidia-smi -ac 2505,875 - elif [[ $GPUNAME == *"T4"* ]]; then - nvidia-smi -ac 5001,1590 - elif [[ $GPUNAME == *"M60"* ]]; then - nvidia-smi -ac 2505,1177 + nvidia-smi -q > /tmp/nvidia-smi-check + if [[ "$?" == "0" ]]; then + sudo nvidia-smi -pm 1 # set persistence mode + sudo nvidia-smi --auto-boost-default=0 + + GPUNAME=$(nvidia-smi -L | head -n1) + echo $GPUNAME + + # set application clock to maximum + if [[ $GPUNAME == *"A100"* ]]; then + nvidia-smi -ac 1215,1410 + elif [[ $GPUNAME == *"V100"* ]]; then + nvidia-smi -ac 877,1530 + elif [[ $GPUNAME == *"K80"* ]]; then + nvidia-smi -ac 2505,875 + elif [[ $GPUNAME == *"T4"* ]]; then + nvidia-smi -ac 5001,1590 + elif [[ $GPUNAME == *"M60"* ]]; then + nvidia-smi -ac 2505,1177 + else + echo "unsupported gpu" + fi else - echo "unsupported gpu" + cat /tmp/nvidia-smi-check fi else echo "nvidia-smi not found" From 2cbd368ac99080a07641b47c439cbc210b2a2a17 Mon Sep 17 00:00:00 2001 From: Justin Plock Date: Fri, 19 Feb 2021 19:48:40 +0000 Subject: [PATCH 140/621] Fix Makefile indentation for 1.19 (#616) * Fix Makefile indentation * Added 1.19 to all target --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index dcd780beb..6b0b9ed66 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.15 1.16 1.17 1.18 +all: 1.15 1.16 1.17 1.18 1.19 .PHONY: validate validate: @@ -60,5 +60,5 @@ k8s: validate $(MAKE) k8s kubernetes_version=1.18.9 kubernetes_build_date=2020-11-02 pull_cni_from_github=true .PHONY: 1.19 - 1.19: - $(MAKE) k8s kubernetes_version=1.19.6 kubernetes_build_date=2021-01-05 pull_cni_from_github=true +1.19: + $(MAKE) k8s kubernetes_version=1.19.6 kubernetes_build_date=2021-01-05 pull_cni_from_github=true From c72748a017f70b3ec0d9f2571b8705f0b098e7b2 Mon Sep 17 00:00:00 2001 From: Saurav Agarwalla Date: Fri, 19 Feb 2021 12:08:33 -0800 Subject: [PATCH 141/621] Increase fs.inotify.max_user_instances to 8192 from the default of 128 (#614) We have customers running out of watchers. Increase this limit to complement the earlier increase in fs.inotify.max_user_watches limit. --- scripts/install-worker.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 503f7835c..a12a33930 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -264,6 +264,7 @@ EOF ################################################################################ echo fs.inotify.max_user_watches=524288 | sudo tee -a /etc/sysctl.conf +echo fs.inotify.max_user_instances=8192 | sudo tee -a /etc/sysctl.conf echo vm.max_map_count=524288 | sudo tee -a /etc/sysctl.conf From 745b333e2d3c07d8ad7a1147df85acd543ad213b Mon Sep 17 00:00:00 2001 From: Bronson Mirafuentes Date: Mon, 22 Feb 2021 15:00:16 -0800 Subject: [PATCH 142/621] use dynamic lookup of docker gid (#622) * remove hardcoded gid for docker * keep static docker gid, dynamically find docker gid in useradd command --- scripts/install-worker.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index a12a33930..286021233 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -111,7 +111,8 @@ sudo yum install -y yum-utils device-mapper-persistent-data lvm2 INSTALL_DOCKER="${INSTALL_DOCKER:-true}" if [[ "$INSTALL_DOCKER" == "true" ]]; then sudo amazon-linux-extras enable docker - sudo groupadd -fog 1950 docker && sudo useradd --gid 1950 docker + sudo groupadd -fog 1950 docker + sudo useradd --gid $(getent group docker | cut -d: -f3) docker sudo yum install -y docker-${DOCKER_VERSION}* sudo usermod -aG docker $USER From 953aeb5ea53d4535639e7b15b393fa5f6ba5a729 Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 23 Feb 2021 09:26:01 -0800 Subject: [PATCH 143/621] Resolves #607 by adding more information to log-collector-script README (#623) --- log-collector-script/linux/README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/log-collector-script/linux/README.md b/log-collector-script/linux/README.md index e544db9f3..ee08bfd72 100644 --- a/log-collector-script/linux/README.md +++ b/log-collector-script/linux/README.md @@ -3,6 +3,9 @@ This project was created to collect Amazon EKS log files and OS logs for troubleshooting Amazon EKS customer support cases. #### Usage + +At a high level, you run this script on your Kubernetes node, and it will collect system information, configuration and logs that will assist in troubleshooting issues with your node. AWS support and service team engineers can use this information once provided via a customer support case. + * Collect EKS logs using SSM agent, jump to below [section](#collect-eks-logs-using-ssm-agent) _(or)_ * Run this project as the root user: From 850e0029c34c0dfcb8788a1433759d1cc8cef2b2 Mon Sep 17 00:00:00 2001 From: Bronson Mirafuentes Date: Tue, 23 Feb 2021 16:16:43 -0800 Subject: [PATCH 144/621] bump docker version to 19.03.13ce-1 (#624) --- eks-worker-al2.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 33df3f79b..20fc62aba 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -13,7 +13,7 @@ "kubernetes_version": null, "kubernetes_build_date": null, "kernel_version": "", - "docker_version": "19.03.6ce-4.amzn2", + "docker_version": "19.03.13ce-1.amzn2", "containerd_version": "1.4.1-2.amzn2", "cni_plugin_version": "v0.8.6", "pull_cni_from_github": "true", From 59932442c80886db4127e1c8e037b41c0ea7f7cd Mon Sep 17 00:00:00 2001 From: Mateusz Gozdek Date: Thu, 25 Feb 2021 18:51:22 +0100 Subject: [PATCH 145/621] files/bootstrap.sh: ensure /etc/docker exists before writing to it (#611) On distros like Flatcar Container Linux, /etc/docker may not exist, so additional step is required apart from running /etc/eks/bootstrap.sh script to get things running. This will be fixed in next release of Flatcar, but I think it's still valuable to include for possibly other distros. Signed-off-by: Mateusz Gozdek --- files/bootstrap.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 696f32431..a0943ff6a 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -398,11 +398,15 @@ fi # Replace with custom docker config contents. if [[ -n "$DOCKER_CONFIG_JSON" ]]; then + mkdir -p /etc/docker + echo "$DOCKER_CONFIG_JSON" > /etc/docker/daemon.json systemctl restart docker fi if [[ "$ENABLE_DOCKER_BRIDGE" = "true" ]]; then + mkdir -p /etc/docker + # Enabling the docker bridge network. We have to disable live-restore as it # prevents docker from recreating the default bridge network on restart echo "$(jq '.bridge="docker0" | ."live-restore"=false' /etc/docker/daemon.json)" > /etc/docker/daemon.json From db28da15d2b696bc08ac3aacc9675694f4a69933 Mon Sep 17 00:00:00 2001 From: Naeil Ezzoueidi Date: Thu, 25 Feb 2021 18:00:21 +0000 Subject: [PATCH 146/621] Add Docker tracing based on its socket and containerd logging (#619) * Add Docker tracing based on its socket and containerd logging * Fixing a typo jouralctl Co-authored-by: Naeil Ezzoueidi --- .../linux/eks-log-collector.sh | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 66f73974e..aa6681ef9 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -54,6 +54,7 @@ COMMON_DIRECTORIES=( kernel system docker + containerd storage var_log networking @@ -190,7 +191,7 @@ create_directories() { get_instance_id() { INSTANCE_ID_FILE="/var/lib/cloud/data/instance-id" - + if grep -q '^i-' "$INSTANCE_ID_FILE"; then cp ${INSTANCE_ID_FILE} "${COLLECT_DIR}"/system/instance-id.txt readonly INSTANCE_ID=$(cat "${COLLECT_DIR}"/system/instance-id.txt) @@ -249,6 +250,7 @@ collect() { get_iptables_info get_pkglist get_system_services + get_containerd_info get_docker_info get_k8s_info get_ipamd_info @@ -533,6 +535,19 @@ get_system_services() { ok } +get_containerd_info() { + try "Collect Containerd daemon information" + + if [[ "$(pgrep -o containerd)" -ne 0 ]]; then + timeout 75 containerd config dump > "${COLLECT_DIR}"/containerd/containerd-config.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 journalctl -u containerd > "${COLLECT_DIR}"/containerd/containerd-log.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + else + warning "The Containerd daemon is not running." + fi + + ok +} + get_docker_info() { try "collect Docker daemon information" @@ -541,6 +556,7 @@ get_docker_info() { timeout 75 docker ps --all --no-trunc > "${COLLECT_DIR}"/docker/docker-ps.txt 2>&1 || echo -e "\tTimed out, ignoring \"docker ps --all --no-truc output \" " timeout 75 docker images > "${COLLECT_DIR}"/docker/docker-images.txt 2>&1 || echo -e "\tTimed out, ignoring \"docker images output \" " timeout 75 docker version > "${COLLECT_DIR}"/docker/docker-version.txt 2>&1 || echo -e "\tTimed out, ignoring \"docker version output \" " + timeout 75 curl --unix-socket /var/run/docker.sock http://./debug/pprof/goroutine\?debug\=2 > "${COLLECT_DIR}"/docker/docker-trace.txt 2>&1 || echo -e "\tTimed out, ignoring \"docker version output \" " else warning "The Docker daemon is not running." fi From 0ccb71878f92f808dcc80d38e9cf8a296bb72b33 Mon Sep 17 00:00:00 2001 From: Bronson Mirafuentes Date: Thu, 4 Mar 2021 09:56:43 -0800 Subject: [PATCH 147/621] set kubelet klog verbosity to 2 (#629) --- files/bootstrap.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index a0943ff6a..f90a99ea4 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -386,7 +386,7 @@ mkdir -p /etc/systemd/system/kubelet.service.d cat < /etc/systemd/system/kubelet.service.d/10-kubelet-args.conf [Service] -Environment='KUBELET_ARGS=--node-ip=$INTERNAL_IP --pod-infra-container-image=$PAUSE_CONTAINER' +Environment='KUBELET_ARGS=--node-ip=$INTERNAL_IP --pod-infra-container-image=$PAUSE_CONTAINER --v=2' EOF if [[ -n "$KUBELET_EXTRA_ARGS" ]]; then From 90cadfce2724a425e2aff1d787c05e025be9170a Mon Sep 17 00:00:00 2001 From: Bronson Mirafuentes Date: Thu, 4 Mar 2021 16:21:03 -0800 Subject: [PATCH 148/621] Create CHANGELOG.md --- CHANGELOG.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e5d857277..8ff375a39 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,39 @@ # Changelog +### AMI Release v20210302 +* amazon-eks-gpu-node-1.19-v20210302 +* amazon-eks-gpu-node-1.18-v20210302 +* amazon-eks-gpu-node-1.17-v20210302 +* amazon-eks-gpu-node-1.16-v20210302 +* amazon-eks-gpu-node-1.15-v20210302 +* amazon-eks-arm64-node-1.19-v20210302 +* amazon-eks-arm64-node-1.18-v20210302 +* amazon-eks-arm64-node-1.17-v20210302 +* amazon-eks-arm64-node-1.16-v20210302 +* amazon-eks-arm64-node-1.15-v20210302 +* amazon-eks-node-1.19-v20210302 +* amazon-eks-node-1.18-v20210302 +* amazon-eks-node-1.17-v20210302 +* amazon-eks-node-1.16-v20210302 +* amazon-eks-node-1.15-v20210302 + +Binaries used to build these AMIs are published: +- s3://amazon-eks/1.19.6/2021-01-05/ +- s3://amazon-eks/1.18.9/2020-11-02/ +- s3://amazon-eks/1.17.12/2020-11-02/ +- s3://amazon-eks/1.16.15/2020-11-02/ +- s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes: +- files/bootstrap.sh: ensure /etc/docker exists before writing to it (#611) +- GPU AMIs now use docker `daemon.json` defined in https://github.com/awslabs/amazon-eks-ami/blob/master/files/docker-daemon.json +- Patch for CVE-2021-3177 +- check that nvidia-smi is configured correctly before updating GPU clocks (#613) +- Fix Makefile indentation for 1.19 (#616) +- Increase fs.inotify.max_user_instances to 8192 from the default of 128 (#614) +- use dynamic lookup of docker gid (#622) +- bump docker version to 19.03.13ce-1 (#624) + ### AMI Release v20210208 * amazon-eks-gpu-node-1.19-v20210208 * amazon-eks-gpu-node-1.18-v20210208 From eedd798451d9aa45b41e10bb330c11de27659f29 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 5 Mar 2021 14:59:33 -0800 Subject: [PATCH 149/621] Adds PLAN.md to describe steps to move from AWS labs to AWS (#625) --- PLAN.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 PLAN.md diff --git a/PLAN.md b/PLAN.md new file mode 100644 index 000000000..c60bb07d7 --- /dev/null +++ b/PLAN.md @@ -0,0 +1,30 @@ +### Goal + +The goal of this plan is to move the `amazon-eks-ami` package from Amazon Web Services - Labs to Amazon Web Services. EKS and EKS customers depend on this package to build and vend AMIs used in production, and while EKS does test the AMIs before releasing, we'd like to enable more rigorous testing and provide customers more visibility into the process AMIs go through before releasing. + +To achieve the higher level goal of moving the project to an AWS project, here are the following goals: + +1. As much as possible, move all scripts, processes, etc. to the open by including maintaining all related scripts in a GitHub repo and using common tools for testing and releasing OSS software. +1. Implement processes that enable timely support for issues and PRs +1. Improve the safety and reliablity of releases by improving testing + +### Stage 1: Improve GitHub Repo Hygiene + +1. Create GitHub project board for tracking progress on current stage +1. Create GitHub project roadmap, similar to [this one](https://github.com/aws/aws-controllers-k8s/projects/1) +1. Triage 100% of current GitHub issues and set SLA to 3 days going forward +1. Review 100% of current PRs and set SLA to 3 days going forward for initial review +1. Update README.md so that customers are comfortable building AMIs, understand how it works and know how to test custom AMIs manually + +### Stage 2: Improve Safety and Reliability + +1. Build AMIs as part of PR process +1. Enable running Kubernetes conformance tests (or similar) with built AMIs +1. Enable adding additional tests to validate built AMIs +1. Run end-to-end tests are part of the PR process +1. All EKS Linux AMIs can be built from GitHub repo, including ARM, GPU, Bottlerocket, etc. + +### Stage 3: Productionalize Release Process + +1. Customers have some visibility into releases and the release process +1. New AMIs are built and released from the GitHub repo automatically, either on a schedule or after PRs are merged From 7afac070c2e4b95cd13de67742890bb9b8fa6dbd Mon Sep 17 00:00:00 2001 From: ajoux-accor <60878319+ajoux-accor@users.noreply.github.com> Date: Tue, 9 Mar 2021 21:05:36 +0100 Subject: [PATCH 150/621] Fix 'Unknown options:' when multiple /var/log/eks_i* files exists (#620) * Fix 'Unknown options:' when multiple /var/log/eks_i* files exists * Resolve PR comment Co-authored-by: ajoux --- log-collector-script/linux/eks-ssm-content.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/log-collector-script/linux/eks-ssm-content.json b/log-collector-script/linux/eks-ssm-content.json index 5484b224a..a830f5068 100644 --- a/log-collector-script/linux/eks-ssm-content.json +++ b/log-collector-script/linux/eks-ssm-content.json @@ -20,6 +20,8 @@ "inputs": { "runCommand": [ "curl -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-log-collector.sh", + "echo \"Cleaning old eks-log-collector files\"", + "rm /var/log/eks_i*", "bash ./eks-log-collector.sh >/dev/null 2>&1", "echo \"EKS logs collected\"", "if [ -f /usr/local/bin/aws ]; then", From 3a2160d65a712281c6b8cbeacb1ab322e1ebaf66 Mon Sep 17 00:00:00 2001 From: Bronson Mirafuentes Date: Thu, 11 Mar 2021 08:48:26 -0800 Subject: [PATCH 151/621] Update CHANGELOG.md (#633) --- CHANGELOG.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ff375a39..ade417289 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,33 @@ # Changelog +### AMI Release v20210310 +* amazon-eks-gpu-node-1.19-v20210310 +* amazon-eks-gpu-node-1.18-v20210310 +* amazon-eks-gpu-node-1.17-v20210310 +* amazon-eks-gpu-node-1.16-v20210310 +* amazon-eks-gpu-node-1.15-v20210310 +* amazon-eks-arm64-node-1.19-v20210310 +* amazon-eks-arm64-node-1.18-v20210310 +* amazon-eks-arm64-node-1.17-v20210310 +* amazon-eks-arm64-node-1.16-v20210310 +* amazon-eks-arm64-node-1.15-v20210310 +* amazon-eks-node-1.19-v20210310 +* amazon-eks-node-1.18-v20210309 +* amazon-eks-node-1.17-v20210309 +* amazon-eks-node-1.16-v20210309 +* amazon-eks-node-1.15-v20210309 + +Binaries used to build these AMIs are published : +s3://amazon-eks/1.19.6/2021-01-05/ +s3://amazon-eks/1.18.9/2020-11-02/ +s3://amazon-eks/1.17.12/2020-11-02/ +s3://amazon-eks/1.16.15/2020-11-02/ +s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes : +- Updates Nvidia drivers to version `460.27.04` +- GPU AMIs no longer uses `daemon.json` defined in https://github.com/awslabs/amazon-eks-ami/blob/master/files/docker-daemon.json + ### AMI Release v20210302 * amazon-eks-gpu-node-1.19-v20210302 * amazon-eks-gpu-node-1.18-v20210302 From bd903ad0e8b4d7c8967bc87bac995c66730dc997 Mon Sep 17 00:00:00 2001 From: Bronson Mirafuentes Date: Thu, 11 Mar 2021 09:47:59 -0800 Subject: [PATCH 152/621] update log-collector-script to pull configure-multicard-interfaces logs (#631) --- log-collector-script/linux/eks-log-collector.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index aa6681ef9..fe45cd4d2 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -470,6 +470,9 @@ get_networking_info() { timeout 75 ip rule show > "${COLLECT_DIR}"/networking/iprule.txt timeout 75 ip route show table all >> "${COLLECT_DIR}"/networking/iproute.txt + # configure-multicard-interfaces + timeout 75 journalctl -u configure-multicard-interfaces > "${COLLECT_DIR}"/networking/configure-multicard-interfaces.txt || echo -e "\tTimed out, ignoring \"configure-multicard-interfaces unit output \" " + ok } From a1d8ded1dad9b1f657b05b391e08e4dcd9de5c9c Mon Sep 17 00:00:00 2001 From: shun Date: Fri, 12 Mar 2021 03:34:59 +0900 Subject: [PATCH 153/621] Remove the redundant conditional statement (#604) --- files/bootstrap.sh | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index f90a99ea4..b2a3c1698 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -375,11 +375,7 @@ echo "$(jq --arg mebibytes_to_reserve "${mebibytes_to_reserve}Mi" --arg cpu_mill '. += {kubeReserved: {"cpu": $cpu_millicores_to_reserve, "ephemeral-storage": "1Gi", "memory": $mebibytes_to_reserve}}' $KUBELET_CONFIG)" > $KUBELET_CONFIG if [[ "$USE_MAX_PODS" = "true" ]]; then - if [[ -n "$MAX_PODS" ]]; then - echo "$(jq ".maxPods=$MAX_PODS" $KUBELET_CONFIG)" > $KUBELET_CONFIG - else - echo "No entry for $INSTANCE_TYPE in $MAX_PODS_FILE. Not setting max pods for kubelet" - fi + echo "$(jq ".maxPods=$MAX_PODS" $KUBELET_CONFIG)" > $KUBELET_CONFIG fi mkdir -p /etc/systemd/system/kubelet.service.d From 21b5af7502698759ae2ee42c968be3307a6fc138 Mon Sep 17 00:00:00 2001 From: Graham Rounds Date: Fri, 12 Mar 2021 12:19:49 -0700 Subject: [PATCH 154/621] add source_ami_filter_name to makefile PACKER_VARIABLES (#540) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6b0b9ed66..b65f86497 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ PACKER_BINARY ?= packer -PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date kernel_version docker_version containerd_version cni_plugin_version source_ami_id source_ami_owners arch instance_type security_group_id additional_yum_repos pull_cni_from_github sonobuoy_e2e_registry +PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date kernel_version docker_version containerd_version cni_plugin_version source_ami_id source_ami_owners source_ami_filter_name arch instance_type security_group_id additional_yum_repos pull_cni_from_github sonobuoy_e2e_registry K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) From 092e1651f7d3a1372d294d15414c7ecf4d8d60e0 Mon Sep 17 00:00:00 2001 From: Bronson Mirafuentes Date: Fri, 12 Mar 2021 15:33:01 -0800 Subject: [PATCH 155/621] Update CHANGELOG.md (#635) * Update CHANGELOG.md --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ade417289..ec329ebb6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,9 @@ Notable changes : - GPU AMIs no longer uses `daemon.json` defined in https://github.com/awslabs/amazon-eks-ami/blob/master/files/docker-daemon.json ### AMI Release v20210302 + +**GPU AMIs in this release are not compatible with any eksctl version after [eksctl 0.34.0](https://github.com/weaveworks/eksctl/releases/tag/0.34.0)** + * amazon-eks-gpu-node-1.19-v20210302 * amazon-eks-gpu-node-1.18-v20210302 * amazon-eks-gpu-node-1.17-v20210302 From 732b6b2f4bee5b92297c416de1f8cba30a70b364 Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 18 Mar 2021 09:50:53 -0700 Subject: [PATCH 156/621] Adds info on adding instance types to eni-max-pods.txt (#637) --- USER_GUIDE.md | 25 +++++++++++++++++++++++++ files/eni-max-pods.txt | 15 ++++++++++++++- 2 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 USER_GUIDE.md diff --git a/USER_GUIDE.md b/USER_GUIDE.md new file mode 100644 index 000000000..03f8c445d --- /dev/null +++ b/USER_GUIDE.md @@ -0,0 +1,25 @@ +## User Guide + +This guide will provide more detailed usage information on this repo. + +## Updating known instance types + +`files/bootstrap.sh` configures the maximum number of pods on a node based off of the number of ENIs available, which is determined by the instance type. Larger instances generally have more ENIs. The number of ENIs limits how many IPV4 addresses are available on an instance, and we need one IP address per pod. You can [see this file](https://github.com/aws/amazon-vpc-cni-k8s/blob/master/scripts/gen_vpc_ip_limits.go) for the code that calculates the max pods for more information. + +To add support for new instance types, at a minimum, we need to update `files/eni-max-pods.txt` using the [amazon-vpc-cni-k8s package.](https://github.com/aws/amazon-vpc-cni-k8s) to set the number of max pods available for those instance types. If the instance type is not on the list, `bootstrap.sh` will fail when the node is started. + +``` +$ git clone git@github.com:aws/amazon-vpc-cni-k8s.git + +# AWS credentials required at this point +$ make generate-limits +# misc/eni-max-pods.txt should be generated + +# Copy the generated file to this repo, something like this: +$ cp misc/eni-max-pods.txt ../amazon-eks-ami/files/ + +# Verify that expected types were added +$ git diff +``` + +At this point, you can build an AMI and it will include the updated list of instance types. diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 99e61b9ec..8a7877a9b 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2021-01-13T12:54:18-08:00 +# This file was generated at 2021-03-16T15:26:13-07:00 # # Mapping is calculated from AWS EC2 API using the following formula: # * First IP on each ENI is not used for pods @@ -229,6 +229,7 @@ m5dn.2xlarge 58 m5dn.4xlarge 234 m5dn.8xlarge 234 m5dn.large 29 +m5dn.metal 737 m5dn.xlarge 58 m5n.12xlarge 234 m5n.16xlarge 737 @@ -237,6 +238,7 @@ m5n.2xlarge 58 m5n.4xlarge 234 m5n.8xlarge 234 m5n.large 29 +m5n.metal 737 m5n.xlarge 58 m5zn.12xlarge 737 m5zn.2xlarge 58 @@ -333,6 +335,7 @@ r5dn.2xlarge 58 r5dn.4xlarge 234 r5dn.8xlarge 234 r5dn.large 29 +r5dn.metal 737 r5dn.xlarge 58 r5n.12xlarge 234 r5n.16xlarge 737 @@ -341,6 +344,7 @@ r5n.2xlarge 58 r5n.4xlarge 234 r5n.8xlarge 234 r5n.large 29 +r5n.metal 737 r5n.xlarge 58 r6g.12xlarge 234 r6g.16xlarge 737 @@ -402,6 +406,15 @@ x1e.32xlarge 234 x1e.4xlarge 58 x1e.8xlarge 58 x1e.xlarge 29 +x2gd.12xlarge 234 +x2gd.16xlarge 737 +x2gd.2xlarge 58 +x2gd.4xlarge 234 +x2gd.8xlarge 234 +x2gd.large 29 +x2gd.medium 8 +x2gd.metal 737 +x2gd.xlarge 58 z1d.12xlarge 737 z1d.2xlarge 58 z1d.3xlarge 234 From b402ccd6a2f0cc33607473bd1c535e84eadb150e Mon Sep 17 00:00:00 2001 From: Visuna Date: Thu, 25 Mar 2021 22:35:12 -0700 Subject: [PATCH 157/621] Update CHANGELOG.md (#641) * Update CHANGELOG.md Update CHANGELOG.md for v20210322 release --- CHANGELOG.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ec329ebb6..25687afb7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,35 @@ # Changelog +### AMI Release v20210322 + +* amazon-eks-gpu-node-1.19-v20210322 +* amazon-eks-gpu-node-1.18-v20210322 +* amazon-eks-gpu-node-1.17-v20210322 +* amazon-eks-gpu-node-1.16-v20210322 +* amazon-eks-gpu-node-1.15-v20210322 +* amazon-eks-arm64-node-1.19-v20210322 +* amazon-eks-arm64-node-1.18-v20210322 +* amazon-eks-arm64-node-1.17-v20210322 +* amazon-eks-arm64-node-1.16-v20210322 +* amazon-eks-arm64-node-1.15-v20210322 +* amazon-eks-node-1.19-v20210322 +* amazon-eks-node-1.18-v20210322 +* amazon-eks-node-1.17-v20210322 +* amazon-eks-node-1.16-v20210322 +* amazon-eks-node-1.15-v20210322 + +Binaries used to build these AMIs are published : +s3://amazon-eks/1.19.6/2021-01-05/ +s3://amazon-eks/1.18.9/2020-11-02/ +s3://amazon-eks/1.17.12/2020-11-02/ +s3://amazon-eks/1.16.15/2020-11-02/ +s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes : +- Updates Nvidia drivers to version `460.32.03` +- patch for CVE-2021-27363, CVE-2021-27364, CVE-2021-27365 +- set kubelet log verbosity to 2 + ### AMI Release v20210310 * amazon-eks-gpu-node-1.19-v20210310 * amazon-eks-gpu-node-1.18-v20210310 From 1b6b5cde690dce7653e36ae6c90d1a9741bd38d4 Mon Sep 17 00:00:00 2001 From: Visuna Date: Thu, 1 Apr 2021 10:55:57 -0700 Subject: [PATCH 158/621] Enable CSIServiceAccountToken feature gate for v1.20 (#644) --- scripts/install-worker.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 286021233..8850e8102 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -214,6 +214,12 @@ sudo mv $TEMPLATE_DIR/kubelet-kubeconfig /var/lib/kubelet/kubeconfig sudo chown root:root /var/lib/kubelet/kubeconfig sudo mv $TEMPLATE_DIR/kubelet.service /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/kubelet.service +# Inject CSIServiceAccountToken feature gate to kubelet config if kubernetes version starts with 1.20. +# This is only injected for 1.20 since CSIServiceAccountToken will be moved to beta starting 1.21. +if [[ $KUBERNETES_VERSION == "1.20"* ]]; then + KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED=$(cat $TEMPLATE_DIR/kubelet-config.json | jq '.featureGates += {CSIServiceAccountToken: true}') + echo $KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED > $TEMPLATE_DIR/kubelet-config.json +fi sudo mv $TEMPLATE_DIR/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json sudo chown root:root /etc/kubernetes/kubelet/kubelet-config.json From e5d837b4f90e237311e08988360d7ca900854840 Mon Sep 17 00:00:00 2001 From: Saurav Agarwalla Date: Fri, 2 Apr 2021 15:40:43 -0700 Subject: [PATCH 159/621] Update CHANGELOG.md for v20210329 release (#646) --- CHANGELOG.md | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 25687afb7..f326a3531 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,36 @@ # Changelog +### AMI Release v20210329 + +* amazon-eks-gpu-node-1.19-v20210329 +* amazon-eks-gpu-node-1.18-v20210329 +* amazon-eks-gpu-node-1.17-v20210329 +* amazon-eks-gpu-node-1.16-v20210329 +* amazon-eks-gpu-node-1.15-v20210329 +* amazon-eks-arm64-node-1.19-v20210329 +* amazon-eks-arm64-node-1.18-v20210329 +* amazon-eks-arm64-node-1.17-v20210329 +* amazon-eks-arm64-node-1.16-v20210329 +* amazon-eks-arm64-node-1.15-v20210329 +* amazon-eks-node-1.19-v20210329 +* amazon-eks-node-1.18-v20210329 +* amazon-eks-node-1.17-v20210329 +* amazon-eks-node-1.16-v20210329 +* amazon-eks-node-1.15-v20210329 + +Binaries used to build these AMIs are published: +s3://amazon-eks/1.19.6/2021-01-05/ +s3://amazon-eks/1.18.9/2020-11-02/ +s3://amazon-eks/1.17.12/2020-11-02/ +s3://amazon-eks/1.16.15/2020-11-02/ +s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes: +A regression was introduced to the 4.14 Amazon Linux Kernel where I/O could slow significantly after running some workloads for a long period of time (observations point to between 4 hours and several days). This release contains the Kernel patch which fixes the above issue. + + + + ### AMI Release v20210322 * amazon-eks-gpu-node-1.19-v20210322 From 8eb9287d3755a1a3e93f63246014b1db54e83bc2 Mon Sep 17 00:00:00 2001 From: Vishal Gupta Date: Mon, 12 Apr 2021 14:03:57 -0700 Subject: [PATCH 160/621] pinning runc version to 1.0.0-rc92 (#650) pinning runc version to 1.0.0-rc92 Co-authored-by: Vishal Gupta --- scripts/install-worker.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 8850e8102..d4aecf62a 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -125,6 +125,10 @@ if [[ "$INSTALL_DOCKER" == "true" ]]; then sudo yum downgrade -y containerd-${CONTAINERD_VERSION} + # runc `1.0.0-rc93` resulted in a regression: https://github.com/awslabs/amazon-eks-ami/issues/648 + # pinning it to `1.0.0-rc92` + sudo yum downgrade -y runc.${MACHINE} 1.0.0-0.1.20200826.gitff819c7.amzn2 + # Enable docker daemon to start on boot. sudo systemctl daemon-reload sudo systemctl enable docker From 5a3df0fdb17e540f8d5a9b405096f32d6b9b0a3f Mon Sep 17 00:00:00 2001 From: Vishal Gupta Date: Thu, 15 Apr 2021 09:11:56 -0700 Subject: [PATCH 161/621] updating change log of v20210414 release (#651) Co-authored-by: Vishal Gupta --- CHANGELOG.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f326a3531..caaeab974 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,34 @@ # Changelog +### AMI Release v20210414 + +* amazon-eks-gpu-node-1.19-v20210414 +* amazon-eks-gpu-node-1.18-v20210414 +* amazon-eks-gpu-node-1.17-v20210414 +* amazon-eks-gpu-node-1.16-v20210414 +* amazon-eks-gpu-node-1.15-v20210414 +* amazon-eks-arm64-node-1.19-v20210414 +* amazon-eks-arm64-node-1.18-v20210414 +* amazon-eks-arm64-node-1.17-v20210414 +* amazon-eks-arm64-node-1.16-v20210414 +* amazon-eks-arm64-node-1.15-v20210414 +* amazon-eks-node-1.19-v20210414 +* amazon-eks-node-1.18-v20210414 +* amazon-eks-node-1.17-v20210414 +* amazon-eks-node-1.16-v20210414 +* amazon-eks-node-1.15-v20210414 + +Binaries used to build these AMIs are published: +s3://amazon-eks/1.19.6/2021-01-05/ +s3://amazon-eks/1.18.9/2020-11-02/ +s3://amazon-eks/1.17.12/2020-11-02/ +s3://amazon-eks/1.16.15/2020-11-02/ +s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes: +A regression was introduced for 1.19 AMI in the last release as a result of runc version update to `1.0.0-rc93` causing nodes to flap between `Ready` and `NotReady`, more details [#648](https://github.com/awslabs/amazon-eks-ami/issues/648). We are reverting the runc version back to 1.0.0-rc92. + + ### AMI Release v20210329 * amazon-eks-gpu-node-1.19-v20210329 From 189baaa77c14120a1b62c42bacced17ba429466b Mon Sep 17 00:00:00 2001 From: Vishal Gupta Date: Thu, 22 Apr 2021 13:19:31 -0700 Subject: [PATCH 162/621] adding version lock on runc, containerd and docker (#654) * adding version lock on runc and containerd * moving runc version to packer variable and locking docker version also Co-authored-by: Vishal Gupta --- Makefile | 2 +- eks-worker-al2.json | 2 ++ scripts/install-worker.sh | 7 ++++++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index b65f86497..6a38fe7fe 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ PACKER_BINARY ?= packer -PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date kernel_version docker_version containerd_version cni_plugin_version source_ami_id source_ami_owners source_ami_filter_name arch instance_type security_group_id additional_yum_repos pull_cni_from_github sonobuoy_e2e_registry +PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date kernel_version docker_version containerd_version runc_version cni_plugin_version source_ami_id source_ami_owners source_ami_filter_name arch instance_type security_group_id additional_yum_repos pull_cni_from_github sonobuoy_e2e_registry K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 20fc62aba..9bc714cce 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -15,6 +15,7 @@ "kernel_version": "", "docker_version": "19.03.13ce-1.amzn2", "containerd_version": "1.4.1-2.amzn2", + "runc_version": "1.0.0-0.1.20200826.gitff819c7.amzn2", "cni_plugin_version": "v0.8.6", "pull_cni_from_github": "true", "source_ami_id": "", @@ -141,6 +142,7 @@ "BINARY_BUCKET_REGION={{user `binary_bucket_region`}}", "DOCKER_VERSION={{user `docker_version`}}", "CONTAINERD_VERSION={{user `containerd_version`}}", + "RUNC_VERSION={{user `runc_version`}}", "CNI_PLUGIN_VERSION={{user `cni_plugin_version`}}", "PULL_CNI_FROM_GITHUB={{user `pull_cni_from_github`}}", "AWS_ACCESS_KEY_ID={{user `aws_access_key_id`}}", diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index d4aecf62a..2d75e54f6 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -25,6 +25,7 @@ validate_env_set BINARY_BUCKET_NAME validate_env_set BINARY_BUCKET_REGION validate_env_set DOCKER_VERSION validate_env_set CONTAINERD_VERSION +validate_env_set RUNC_VERSION validate_env_set CNI_PLUGIN_VERSION validate_env_set KUBERNETES_VERSION validate_env_set KUBERNETES_BUILD_DATE @@ -127,7 +128,11 @@ if [[ "$INSTALL_DOCKER" == "true" ]]; then # runc `1.0.0-rc93` resulted in a regression: https://github.com/awslabs/amazon-eks-ami/issues/648 # pinning it to `1.0.0-rc92` - sudo yum downgrade -y runc.${MACHINE} 1.0.0-0.1.20200826.gitff819c7.amzn2 + sudo yum downgrade -y runc.${MACHINE} ${RUNC_VERSION} + + # install versionlock plugin and lock runc, containerd and docker versions + sudo yum install -y yum-plugin-versionlock + sudo yum versionlock runc-* containerd-* docker-* # Enable docker daemon to start on boot. sudo systemctl daemon-reload From 8f4e8da1274e4a908dc9631a6561f8b80b206ad9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=B6ran=20Damberg?= Date: Fri, 30 Apr 2021 19:36:58 +0200 Subject: [PATCH 163/621] Initialize SONOBUOY_E2E_REGISTRY variable (#655) --- scripts/install-worker.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 2d75e54f6..fe722f7f0 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -246,6 +246,7 @@ sudo mv $TEMPLATE_DIR/eni-max-pods.txt /etc/eks/eni-max-pods.txt sudo mv $TEMPLATE_DIR/bootstrap.sh /etc/eks/bootstrap.sh sudo chmod +x /etc/eks/bootstrap.sh +SONOBUOY_E2E_REGISTRY="${SONOBUOY_E2E_REGISTRY:-}" if [[ -n "$SONOBUOY_E2E_REGISTRY" ]]; then sudo mv $TEMPLATE_DIR/sonobuoy-e2e-registry-config /etc/eks/sonobuoy-e2e-registry-config sudo sed -i s,SONOBUOY_E2E_REGISTRY,$SONOBUOY_E2E_REGISTRY,g /etc/eks/sonobuoy-e2e-registry-config From 9c58dd3a8f56e5157e7db6c50f4975d37c037217 Mon Sep 17 00:00:00 2001 From: Abeer Sethi <38733827+abeer91@users.noreply.github.com> Date: Mon, 3 May 2021 10:22:40 -0700 Subject: [PATCH 164/621] Adding release v20210501 to Changelog (#659) --- CHANGELOG.md | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index caaeab974..744a4d330 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,38 @@ # Changelog +## AMI Release v20210501 + +* amazon-eks-gpu-node-1.19-v20210501 +* amazon-eks-gpu-node-1.18-v20210501 +* amazon-eks-gpu-node-1.17-v20210501 +* amazon-eks-gpu-node-1.16-v20210501 +* amazon-eks-gpu-node-1.15-v20210501 +* amazon-eks-arm64-node-1.19-v20210501 +* amazon-eks-arm64-node-1.18-v20210501 +* amazon-eks-arm64-node-1.17-v20210501 +* amazon-eks-arm64-node-1.16-v20210501 +* amazon-eks-arm64-node-1.15-v20210501 +* amazon-eks-node-1.19-v20210501 +* amazon-eks-node-1.18-v20210501 +* amazon-eks-node-1.17-v20210501 +* amazon-eks-node-1.16-v20210501 +* amazon-eks-node-1.15-v20210501 + +Binaries used to build these AMIs are published: + +s3://amazon-eks/1.19.6/2021-01-05/ +s3://amazon-eks/1.18.9/2020-11-02/ +s3://amazon-eks/1.17.12/2020-11-02/ +s3://amazon-eks/1.16.15/2020-11-02/ +s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes: + +* Patches for Linux kernel 4.14, used by AMIs with Kubernetes v1.18 and below (CVE ALAS2-2021-1627) +* Patches for Linux kernel 5.4, used by AMIs with Kubernetes v1.19 to fix a race condition with Conntrack. + + + ### AMI Release v20210414 * amazon-eks-gpu-node-1.19-v20210414 From e18d406919f6f87214904e7560d36f1eb4f39220 Mon Sep 17 00:00:00 2001 From: Visuna Date: Fri, 14 May 2021 14:04:03 -0700 Subject: [PATCH 165/621] Release 1.20 AMIs (#664) * Release 1.20 AMIs Co-authored-by: Qingqing Li --- CHANGELOG.md | 30 ++++++++++++++++++++++++++++++ Makefile | 6 +++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 744a4d330..b80e57266 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,35 @@ # Changelog +## AMI Release v20210512 + +* amazon-eks-gpu-node-1.19-v20210512 +* amazon-eks-gpu-node-1.18-v20210512 +* amazon-eks-gpu-node-1.17-v20210512 +* amazon-eks-gpu-node-1.16-v20210512 +* amazon-eks-gpu-node-1.15-v20210512 +* amazon-eks-arm64-node-1.19-v20210512 +* amazon-eks-arm64-node-1.18-v20210512 +* amazon-eks-arm64-node-1.17-v20210512 +* amazon-eks-arm64-node-1.16-v20210512 +* amazon-eks-arm64-node-1.15-v20210512 +* amazon-eks-node-1.19-v20210512 +* amazon-eks-node-1.18-v20210512 +* amazon-eks-node-1.17-v20210512 +* amazon-eks-node-1.16-v20210512 +* amazon-eks-node-1.15-v20210512 + +Binaries used to build these AMIs are published: + +* s3://amazon-eks/1.20.4/2021-04-12/ +* s3://amazon-eks/1.19.6/2021-01-05/ +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes: +* Release 1.20 AMIs + ## AMI Release v20210501 * amazon-eks-gpu-node-1.19-v20210501 diff --git a/Makefile b/Makefile index 6a38fe7fe..b80e0ac34 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.15 1.16 1.17 1.18 1.19 +all: 1.15 1.16 1.17 1.18 1.19 1.20 .PHONY: validate validate: @@ -62,3 +62,7 @@ k8s: validate .PHONY: 1.19 1.19: $(MAKE) k8s kubernetes_version=1.19.6 kubernetes_build_date=2021-01-05 pull_cni_from_github=true + +.PHONY: 1.20 +1.20: + $(MAKE) k8s kubernetes_version=1.20.4 kubernetes_build_date=2021-04-12 pull_cni_from_github=true \ No newline at end of file From 8c99122b4d740981b40bbc523749a7c5ada2a208 Mon Sep 17 00:00:00 2001 From: Vishal Gupta Date: Wed, 19 May 2021 13:06:27 -0700 Subject: [PATCH 166/621] runc cve patch (#666) Co-authored-by: Vishal Gupta --- CHANGELOG.md | 32 ++++++++++++++++++++++++++++++++ eks-worker-al2.json | 2 +- scripts/install-worker.sh | 24 ++++++++++++++---------- 3 files changed, 47 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b80e57266..19040bab4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,37 @@ # Changelog +## AMI Release v20210518 + +* amazon-eks-gpu-node-1.19-v20210518 +* amazon-eks-gpu-node-1.18-v20210518 +* amazon-eks-gpu-node-1.17-v20210518 +* amazon-eks-gpu-node-1.16-v20210518 +* amazon-eks-gpu-node-1.15-v20210518 +* amazon-eks-arm64-node-1.19-v20210518 +* amazon-eks-arm64-node-1.18-v20210518 +* amazon-eks-arm64-node-1.17-v20210518 +* amazon-eks-arm64-node-1.16-v20210518 +* amazon-eks-arm64-node-1.15-v20210518 +* amazon-eks-node-1.19-v20210518 +* amazon-eks-node-1.18-v20210518 +* amazon-eks-node-1.17-v20210518 +* amazon-eks-node-1.16-v20210518 +* amazon-eks-node-1.15-v20210518 + +Binaries used to build these AMIs are published: + +* s3://amazon-eks/1.20.4/2021-04-12/ +* s3://amazon-eks/1.19.6/2021-01-05/ +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes: +* `runc` version upgrade to `rc93` +* [fix](https://github.com/opencontainers/runc/pull/2871) for [#2530](https://github.com/opencontainers/runc/issues/2530) backported to `rc93` +* [`runc` CVE 2021-30465](https://github.com/opencontainers/runc/security/advisories/GHSA-c3xm-pvg7-gh7r) patch backported to `rc93` + ## AMI Release v20210512 * amazon-eks-gpu-node-1.19-v20210512 diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 9bc714cce..faeb9e7bd 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -15,7 +15,7 @@ "kernel_version": "", "docker_version": "19.03.13ce-1.amzn2", "containerd_version": "1.4.1-2.amzn2", - "runc_version": "1.0.0-0.1.20200826.gitff819c7.amzn2", + "runc_version": "1.0.0-0.3.20210225.git12644e6.amzn2", "cni_plugin_version": "v0.8.6", "pull_cni_from_github": "true", "source_ami_id": "", diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index fe722f7f0..dd100e44e 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -114,7 +114,21 @@ if [[ "$INSTALL_DOCKER" == "true" ]]; then sudo amazon-linux-extras enable docker sudo groupadd -fog 1950 docker sudo useradd --gid $(getent group docker | cut -d: -f3) docker + + # install version lock to put a lock on dependecies + sudo yum install -y yum-plugin-versionlock + + # install runc and lock version + sudo yum install -y runc-${RUNC_VERSION} + sudo yum versionlock runc-* + + # install containerd and lock version + sudo yum install -y containerd-${CONTAINERD_VERSION} + sudo yum versionlock containerd-* + + # install docker and lock version sudo yum install -y docker-${DOCKER_VERSION}* + sudo yum versionlock docker-* sudo usermod -aG docker $USER # Remove all options from sysconfig docker. @@ -124,16 +138,6 @@ if [[ "$INSTALL_DOCKER" == "true" ]]; then sudo mv $TEMPLATE_DIR/docker-daemon.json /etc/docker/daemon.json sudo chown root:root /etc/docker/daemon.json - sudo yum downgrade -y containerd-${CONTAINERD_VERSION} - - # runc `1.0.0-rc93` resulted in a regression: https://github.com/awslabs/amazon-eks-ami/issues/648 - # pinning it to `1.0.0-rc92` - sudo yum downgrade -y runc.${MACHINE} ${RUNC_VERSION} - - # install versionlock plugin and lock runc, containerd and docker versions - sudo yum install -y yum-plugin-versionlock - sudo yum versionlock runc-* containerd-* docker-* - # Enable docker daemon to start on boot. sudo systemctl daemon-reload sudo systemctl enable docker From 1e0d0aa70ddca1f9a787822fa562f451ee015199 Mon Sep 17 00:00:00 2001 From: Vishal Gupta Date: Thu, 20 May 2021 14:00:11 -0700 Subject: [PATCH 167/621] runc cve patch for GPU AMIs (#668) Co-authored-by: Vishal Gupta --- CHANGELOG.md | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 19040bab4..44bcfe8ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,47 @@ # Changelog +## AMI Release v20210519 +* amazon-eks-gpu-node-1.20-v20210519 +* amazon-eks-gpu-node-1.19-v20210519 +* amazon-eks-gpu-node-1.18-v20210519 +* amazon-eks-gpu-node-1.17-v20210519 +* amazon-eks-gpu-node-1.16-v20210519 +* amazon-eks-gpu-node-1.15-v20210519 +* amazon-eks-arm64-node-1.20-v20210519 +* amazon-eks-arm64-node-1.19-v20210519 +* amazon-eks-arm64-node-1.18-v20210519 +* amazon-eks-arm64-node-1.17-v20210519 +* amazon-eks-arm64-node-1.16-v20210519 +* amazon-eks-arm64-node-1.15-v20210519 +* amazon-eks-node-1.20-v20210519 +* amazon-eks-node-1.19-v20210519 +* amazon-eks-node-1.18-v20210519 +* amazon-eks-node-1.17-v20210519 +* amazon-eks-node-1.16-v20210519 +* amazon-eks-node-1.15-v20210519 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.20.4/2021-04-12/ +* s3://amazon-eks/1.19.6/2021-01-05/ +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +AMI details: +* kernal: 5.4.110-54.189.amzn2.x86_64 (1.19 and above), 4.14.231-173.361.amzn2.x86_64 (1.18 and below) +* dockerd: 19.03.13-ce +* containerd: 1.4.1 +* runc: 1.0.0-rc93 +* cuda: 460.73.01 +* nvidia-container-runtime-hook: 1.4.0 + + +Notable changes: +* `runc` version upgrade to `rc93` for GPU AMIs +* [fix](https://github.com/opencontainers/runc/pull/2871) for [#2530](https://github.com/opencontainers/runc/issues/2530) backported to `rc93` for GPU AMIs +* [`runc` CVE 2021-30465](https://github.com/opencontainers/runc/security/advisories/GHSA-c3xm-pvg7-gh7r) patch backported to `rc93` for GPU AMIs + ## AMI Release v20210518 * amazon-eks-gpu-node-1.19-v20210518 From 396355d3e77353694d5e53bfef87baf44533a989 Mon Sep 17 00:00:00 2001 From: Vishal Gupta Date: Fri, 28 May 2021 12:42:57 -0700 Subject: [PATCH 168/621] release bind CVE patch (#675) * release bind CVE patch * fixed typo Co-authored-by: Vishal Gupta --- CHANGELOG.md | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 44bcfe8ac..7f0d976d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,46 @@ # Changelog +## AMI Release v20210526 +* amazon-eks-gpu-node-1.20-v20210526 +* amazon-eks-gpu-node-1.19-v20210526 +* amazon-eks-gpu-node-1.18-v20210526 +* amazon-eks-gpu-node-1.17-v20210526 +* amazon-eks-gpu-node-1.16-v20210526 +* amazon-eks-gpu-node-1.15-v20210526 +* amazon-eks-arm64-node-1.20-v20210526 +* amazon-eks-arm64-node-1.19-v20210526 +* amazon-eks-arm64-node-1.18-v20210526 +* amazon-eks-arm64-node-1.17-v20210526 +* amazon-eks-arm64-node-1.16-v20210526 +* amazon-eks-arm64-node-1.15-v20210526 +* amazon-eks-node-1.20-v20210526 +* amazon-eks-node-1.19-v20210526 +* amazon-eks-node-1.18-v20210526 +* amazon-eks-node-1.17-v20210526 +* amazon-eks-node-1.16-v20210526 +* amazon-eks-node-1.15-v20210526 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.20.4/2021-04-12/ +* s3://amazon-eks/1.19.6/2021-01-05/ +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +AMI details: +* kernel: 5.4.117-58.216.amzn2.x86_64 (1.19 and above), 4.14.232-176.381.amzn2.x86_64 (1.18 and below) +* dockerd: 19.03.13-ce +* containerd: 1.4.1 +* runc: 1.0.0-rc93 +* cuda: 460.73.01 +* nvidia-container-runtime-hook: 1.4.0 + + +Notable changes: +* [CVE-2021-25215](https://access.redhat.com/security/cve/CVE-2021-25215) patch +* kenel patch for following CVEs: [CVE-2021-31829](https://access.redhat.com/security/cve/CVE-2021-31829), [CVE-2021-23133](https://access.redhat.com/security/cve/CVE-2021-23133), [CVE-2020-29374](https://access.redhat.com/security/cve/CVE-2020-29374) + ## AMI Release v20210519 * amazon-eks-gpu-node-1.20-v20210519 * amazon-eks-gpu-node-1.19-v20210519 @@ -29,7 +70,7 @@ Binaries used to build these AMIs are published: * s3://amazon-eks/1.15.12/2020-11-02/ AMI details: -* kernal: 5.4.110-54.189.amzn2.x86_64 (1.19 and above), 4.14.231-173.361.amzn2.x86_64 (1.18 and below) +* kernel: 5.4.110-54.189.amzn2.x86_64 (1.19 and above), 4.14.231-173.361.amzn2.x86_64 (1.18 and below) * dockerd: 19.03.13-ce * containerd: 1.4.1 * runc: 1.0.0-rc93 From c3de675fd71fe39354bb02e5bdfb94aa09086dab Mon Sep 17 00:00:00 2001 From: Sylvain Rabot Date: Thu, 3 Jun 2021 18:47:18 +0200 Subject: [PATCH 169/621] Add ipvsadm (#670) Signed-off-by: Sylvain Rabot --- scripts/install-worker.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index dd100e44e..46cb9d9b4 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -64,7 +64,8 @@ sudo yum install -y \ nfs-utils \ socat \ unzip \ - wget + wget \ + ipvsadm # Remove the ec2-net-utils package, if it's installed. This package interferes with the route setup on the instance. if yum list installed | grep ec2-net-utils; then sudo yum remove ec2-net-utils -y -q; fi From 3616f1da5dab0cc6256ee60df4a0e5f2d8a42684 Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 8 Jun 2021 15:23:59 -0700 Subject: [PATCH 170/621] Resolves #673 by updating eni-max-pods.txt (#676) --- amazon-eks-nodegroup.yaml | 17 +++++++++++++++++ files/eni-max-pods.txt | 6 +++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/amazon-eks-nodegroup.yaml b/amazon-eks-nodegroup.yaml index 3ae645796..630551546 100644 --- a/amazon-eks-nodegroup.yaml +++ b/amazon-eks-nodegroup.yaml @@ -296,6 +296,7 @@ Parameters: - m5dn.4xlarge - m5dn.8xlarge - m5dn.large + - m5dn.metal - m5dn.xlarge - m5n.12xlarge - m5n.16xlarge @@ -304,6 +305,7 @@ Parameters: - m5n.4xlarge - m5n.8xlarge - m5n.large + - m5n.metal - m5n.xlarge - m5zn.12xlarge - m5zn.2xlarge @@ -400,6 +402,7 @@ Parameters: - r5dn.4xlarge - r5dn.8xlarge - r5dn.large + - r5dn.metal - r5dn.xlarge - r5n.12xlarge - r5n.16xlarge @@ -408,6 +411,7 @@ Parameters: - r5n.4xlarge - r5n.8xlarge - r5n.large + - r5n.metal - r5n.xlarge - r6g.12xlarge - r6g.16xlarge @@ -456,10 +460,14 @@ Parameters: - t4g.nano - t4g.small - t4g.xlarge + - u-12tb1.112xlarge - u-12tb1.metal - u-18tb1.metal - u-24tb1.metal + - u-6tb1.112xlarge + - u-6tb1.56xlarge - u-6tb1.metal + - u-9tb1.112xlarge - u-9tb1.metal - x1.16xlarge - x1.32xlarge @@ -469,6 +477,15 @@ Parameters: - x1e.4xlarge - x1e.8xlarge - x1e.xlarge + - x2gd.12xlarge + - x2gd.16xlarge + - x2gd.2xlarge + - x2gd.4xlarge + - x2gd.8xlarge + - x2gd.large + - x2gd.medium + - x2gd.metal + - x2gd.xlarge - z1d.12xlarge - z1d.2xlarge - z1d.3xlarge diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 8a7877a9b..ac83f32cd 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2021-03-16T15:26:13-07:00 +# This file was generated at 2021-06-08T13:46:17-07:00 # # Mapping is calculated from AWS EC2 API using the following formula: # * First IP on each ENI is not used for pods @@ -393,10 +393,14 @@ t4g.micro 4 t4g.nano 4 t4g.small 11 t4g.xlarge 58 +u-12tb1.112xlarge 737 u-12tb1.metal 147 u-18tb1.metal 737 u-24tb1.metal 737 +u-6tb1.112xlarge 737 +u-6tb1.56xlarge 737 u-6tb1.metal 147 +u-9tb1.112xlarge 737 u-9tb1.metal 147 x1.16xlarge 234 x1.32xlarge 234 From f522c2e38e8de70d7fc4a2c9dfe91429e65ceacd Mon Sep 17 00:00:00 2001 From: MyannaHarris Date: Thu, 17 Jun 2021 12:15:57 -0700 Subject: [PATCH 171/621] [SSM Agent] Install the SSM Agent in the EKS worker AMI This change installs the SSM Agent in the EKS worker AMI by default. The nodeRole used for the worker nodes will need to have the SSM permissions added to it for the SSM agent to be able to report back to SSM. If the SSM permissions aren't present the agent will be active but unable to report to SSM. --- scripts/install-worker.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 46cb9d9b4..e474bd11a 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -257,6 +257,21 @@ if [[ -n "$SONOBUOY_E2E_REGISTRY" ]]; then sudo sed -i s,SONOBUOY_E2E_REGISTRY,$SONOBUOY_E2E_REGISTRY,g /etc/eks/sonobuoy-e2e-registry-config fi +################################################################################ +### SSM Agent ################################################################## +################################################################################ + +if [ "$BINARY_BUCKET_REGION" != "us-iso-east-1" ] && [ "$BINARY_BUCKET_REGION" != "us-isob-east-1" ]; then + if [ "$BINARY_BUCKET_REGION" = "cn-north-1" ] || [ "$BINARY_BUCKET_REGION" = "cn-northwest-1" ]; then + sudo yum install -y https://s3.cn-north-1.amazonaws.com.cn/amazon-ssm-cn-north-1/latest/linux_$ARCH/amazon-ssm-agent.rpm + else + sudo yum install -y https://s3.amazonaws.com/ec2-downloads-windows/SSMAgent/latest/linux_$ARCH/amazon-ssm-agent.rpm + fi + + sudo systemctl enable amazon-ssm-agent + sudo systemctl start amazon-ssm-agent +fi + ################################################################################ ### AMI Metadata ############################################################### ################################################################################ From 81629ce0e214708a71b0a07e7c486706b652e99f Mon Sep 17 00:00:00 2001 From: MyannaHarris Date: Thu, 24 Jun 2021 12:04:02 -0700 Subject: [PATCH 172/621] [SSM Agent] Release AMIs with SSM agent installed * SSM agent automatically installed --- CHANGELOG.md | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f0d976d4..0bc05d00e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,45 @@ # Changelog +## AMI Release v20210621 +* amazon-eks-gpu-node-1.20-v20210621 +* amazon-eks-gpu-node-1.19-v20210621 +* amazon-eks-gpu-node-1.18-v20210621 +* amazon-eks-gpu-node-1.17-v20210621 +* amazon-eks-gpu-node-1.16-v20210621 +* amazon-eks-gpu-node-1.15-v20210621 +* amazon-eks-arm64-node-1.20-v20210621 +* amazon-eks-arm64-node-1.19-v20210621 +* amazon-eks-arm64-node-1.18-v20210621 +* amazon-eks-arm64-node-1.17-v20210621 +* amazon-eks-arm64-node-1.16-v20210621 +* amazon-eks-arm64-node-1.15-v20210621 +* amazon-eks-node-1.20-v20210621 +* amazon-eks-node-1.19-v20210621 +* amazon-eks-node-1.18-v20210621 +* amazon-eks-node-1.17-v20210621 +* amazon-eks-node-1.16-v20210621 +* amazon-eks-node-1.15-v20210621 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.20.4/2021-04-12/ +* s3://amazon-eks/1.19.6/2021-01-05/ +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +AMI details: +* kernel: 5.4.117-58.216.amzn2.x86_64 (1.19 and above), 4.14.232-176.381.amzn2.x86_64 (1.18 and below) +* dockerd: 19.03.13-ce +* containerd: 1.4.1 +* runc: 1.0.0-rc93 +* cuda: 460.73.01 +* nvidia-container-runtime-hook: 1.4.0 +* SSM agent: 3.0.1295.0 + +Notable changes: +* The SSM Agent will now be automatically installed + ## AMI Release v20210526 * amazon-eks-gpu-node-1.20-v20210526 * amazon-eks-gpu-node-1.19-v20210526 From 87ad861831c4f743d78d4e5a8cdae4aa17e48c31 Mon Sep 17 00:00:00 2001 From: MyannaHarris Date: Tue, 29 Jun 2021 10:21:56 -0700 Subject: [PATCH 173/621] [SSM Agent] Just enable the SSM agent and don't start it --- scripts/install-worker.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index e474bd11a..b432911d4 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -269,7 +269,6 @@ if [ "$BINARY_BUCKET_REGION" != "us-iso-east-1" ] && [ "$BINARY_BUCKET_REGION" ! fi sudo systemctl enable amazon-ssm-agent - sudo systemctl start amazon-ssm-agent fi ################################################################################ From 8ffac8ffadf0dfa7e07e86a642339bf5960dce9c Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 30 Jun 2021 08:24:52 -0700 Subject: [PATCH 174/621] Adds release v20210628 to the CHANGELOG (#688) --- CHANGELOG.md | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0bc05d00e..cd50ffec1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,46 @@ # Changelog +## AMI Release v20210628 +* amazon-eks-gpu-node-1.20-v20210628 +* amazon-eks-gpu-node-1.19-v20210628 +* amazon-eks-gpu-node-1.18-v20210628 +* amazon-eks-gpu-node-1.17-v20210628 +* amazon-eks-gpu-node-1.16-v20210628 +* amazon-eks-gpu-node-1.15-v20210628 +* amazon-eks-arm64-node-1.20-v20210628 +* amazon-eks-arm64-node-1.19-v20210628 +* amazon-eks-arm64-node-1.18-v20210628 +* amazon-eks-arm64-node-1.17-v20210628 +* amazon-eks-arm64-node-1.16-v20210628 +* amazon-eks-arm64-node-1.15-v20210628 +* amazon-eks-node-1.20-v20210628 +* amazon-eks-node-1.19-v20210628 +* amazon-eks-node-1.18-v20210628 +* amazon-eks-node-1.17-v20210628 +* amazon-eks-node-1.16-v20210628 +* amazon-eks-node-1.15-v20210628 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.20.4/2021-04-12/ +* s3://amazon-eks/1.19.6/2021-01-05/ +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +AMI details: +* kernel: 5.4.117-58.216.amzn2 (1.19 and above), 4.14.232-177.418.amzn2 (1.18 and below) +* dockerd: 19.03.13ce +* containerd: 1.4.1 +* runc: 1.0.0-rc93 +* cuda: 460.73.01 +* nvidia-container-runtime-hook: 460.73.01 +* SSM agent: 3.0.1295.0 + +Notable changes: + +Includes the latest security patches for [systemd](https://alas.aws.amazon.com/AL2/ALAS-2021-1647.html), [python3](https://alas.aws.amazon.com/AL2/ALAS-2021-1670.html) and others. + ## AMI Release v20210621 * amazon-eks-gpu-node-1.20-v20210621 * amazon-eks-gpu-node-1.19-v20210621 From ff309f197b79110cae6d8140d0f875338c0c2c5d Mon Sep 17 00:00:00 2001 From: MyannaHarris Date: Thu, 8 Jul 2021 12:18:00 -0700 Subject: [PATCH 175/621] [SSM Agent] Update the SSM agent install command to work for all regions --- scripts/install-worker.sh | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index b432911d4..d6f6787f4 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -261,15 +261,7 @@ fi ### SSM Agent ################################################################## ################################################################################ -if [ "$BINARY_BUCKET_REGION" != "us-iso-east-1" ] && [ "$BINARY_BUCKET_REGION" != "us-isob-east-1" ]; then - if [ "$BINARY_BUCKET_REGION" = "cn-north-1" ] || [ "$BINARY_BUCKET_REGION" = "cn-northwest-1" ]; then - sudo yum install -y https://s3.cn-north-1.amazonaws.com.cn/amazon-ssm-cn-north-1/latest/linux_$ARCH/amazon-ssm-agent.rpm - else - sudo yum install -y https://s3.amazonaws.com/ec2-downloads-windows/SSMAgent/latest/linux_$ARCH/amazon-ssm-agent.rpm - fi - - sudo systemctl enable amazon-ssm-agent -fi +sudo yum install -y amazon-ssm-agent ################################################################################ ### AMI Metadata ############################################################### From af6a02dec0171bae7a20605e1427ba4d9e051bc2 Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Tue, 13 Jul 2021 14:51:14 -0700 Subject: [PATCH 176/621] Containerd runtime support (#698) * adding support for containerd runtime support in eks worker ami Co-authored-by: Sinha --- files/bootstrap.sh | 51 +++++++++++++++++++++++--------- files/containerd-config.toml | 16 ++++++++++ files/kubelet-containerd.service | 22 ++++++++++++++ scripts/install-worker.sh | 37 +++++++++++++++++------ 4 files changed, 103 insertions(+), 23 deletions(-) create mode 100644 files/containerd-config.toml create mode 100644 files/kubelet-containerd.service diff --git a/files/bootstrap.sh b/files/bootstrap.sh index b2a3c1698..c59855a0c 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -26,6 +26,7 @@ function print_help { echo "--dns-cluster-ip Overrides the IP address to use for DNS queries within the cluster. Defaults to 10.100.0.10 or 172.20.0.10 based on the IP address of the primary interface" echo "--pause-container-account The AWS account (number) to pull the pause container from" echo "--pause-container-version The tag of the pause container" + echo "--container-runtime Specify a container runtime (default: dockerd)" } POSITIONAL=() @@ -87,6 +88,11 @@ while [[ $# -gt 0 ]]; do shift shift ;; + --container-runtime) + CONTAINER_RUNTIME=$2 + shift + shift + ;; *) # unknown option POSITIONAL+=("$1") # save it in an array for later shift # past argument @@ -109,6 +115,7 @@ ENABLE_DOCKER_BRIDGE="${ENABLE_DOCKER_BRIDGE:-false}" API_RETRY_ATTEMPTS="${API_RETRY_ATTEMPTS:-3}" DOCKER_CONFIG_JSON="${DOCKER_CONFIG_JSON:-}" PAUSE_CONTAINER_VERSION="${PAUSE_CONTAINER_VERSION:-3.1-eksbuild.1}" +CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-dockerd}" function get_pause_container_account_for_region () { local region="$1" @@ -392,24 +399,40 @@ Environment='KUBELET_EXTRA_ARGS=$KUBELET_EXTRA_ARGS' EOF fi -# Replace with custom docker config contents. -if [[ -n "$DOCKER_CONFIG_JSON" ]]; then +if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then + sudo mkdir -p /etc/containerd + sudo mkdir -p /etc/cni/net.d + sudo mv /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml + sudo mv /etc/eks/containerd/kubelet-containerd.service /etc/systemd/system/kubelet.service + sudo chown root:root /etc/systemd/system/kubelet.service + systemctl daemon-reload + systemctl enable containerd + systemctl start containerd +elif [[ "$CONTAINER_RUNTIME" = "dockerd" ]]; then mkdir -p /etc/docker - - echo "$DOCKER_CONFIG_JSON" > /etc/docker/daemon.json - systemctl restart docker + bash -c "/sbin/iptables-save > /etc/sysconfig/iptables" + mv /etc/eks/iptables-restore.service /etc/systemd/system/iptables-restore.service + sudo chown root:root /etc/systemd/system/iptables-restore.service + systemctl daemon-reload + systemctl enable iptables-restore + + if [[ -n "$DOCKER_CONFIG_JSON" ]]; then + echo "$DOCKER_CONFIG_JSON" > /etc/docker/daemon.json + fi + if [[ "$ENABLE_DOCKER_BRIDGE" = "true" ]]; then + # Enabling the docker bridge network. We have to disable live-restore as it + # prevents docker from recreating the default bridge network on restart + echo "$(jq '.bridge="docker0" | ."live-restore"=false' /etc/docker/daemon.json)" > /etc/docker/daemon.json + fi + systemctl daemon-reload + systemctl enable docker + systemctl start docker +else + echo "Container runtime ${CONTAINER_RUNTIME} is not supported." + exit 1 fi -if [[ "$ENABLE_DOCKER_BRIDGE" = "true" ]]; then - mkdir -p /etc/docker - - # Enabling the docker bridge network. We have to disable live-restore as it - # prevents docker from recreating the default bridge network on restart - echo "$(jq '.bridge="docker0" | ."live-restore"=false' /etc/docker/daemon.json)" > /etc/docker/daemon.json - systemctl restart docker -fi -systemctl daemon-reload systemctl enable kubelet systemctl start kubelet diff --git a/files/containerd-config.toml b/files/containerd-config.toml new file mode 100644 index 000000000..79e53f1ec --- /dev/null +++ b/files/containerd-config.toml @@ -0,0 +1,16 @@ +version = 2 +root = "/var/lib/containerd" +state = "/run/containerd" + +[grpc] +address = "/run/dockershim.sock" + +[plugins."io.containerd.grpc.v1.cri".containerd] +default_runtime_name = "runc" + +[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] +runtime_type = "io.containerd.runc.v2" + +[plugins."io.containerd.grpc.v1.cri".cni] +bin_dir = "/opt/cni/bin" +conf_dir = "/etc/cni/net.d" diff --git a/files/kubelet-containerd.service b/files/kubelet-containerd.service new file mode 100644 index 000000000..ed08ed97a --- /dev/null +++ b/files/kubelet-containerd.service @@ -0,0 +1,22 @@ +[Unit] +Description=Kubernetes Kubelet +Documentation=https://github.com/kubernetes/kubernetes +After=containerd.service +Requires=containerd.service + +[Service] +ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 +ExecStart=/usr/bin/kubelet --cloud-provider aws \ + --config /etc/kubernetes/kubelet/kubelet-config.json \ + --kubeconfig /var/lib/kubelet/kubeconfig \ + --container-runtime remote \ + --container-runtime-endpoint unix:///run/dockershim.sock \ + --network-plugin cni $KUBELET_ARGS $KUBELET_EXTRA_ARGS + +Restart=on-failure +RestartForceExitStatus=SIGPIPE +RestartSec=5 +KillMode=process + +[Install] +WantedBy=multi-user.target diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index d6f6787f4..cb464a31a 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -95,14 +95,8 @@ fi ################################################################################ ### iptables ################################################################### ################################################################################ - -# Enable forwarding via iptables -sudo bash -c "/sbin/iptables-save > /etc/sysconfig/iptables" - -sudo mv $TEMPLATE_DIR/iptables-restore.service /etc/systemd/system/iptables-restore.service - -sudo systemctl daemon-reload -sudo systemctl enable iptables-restore +sudo mkdir -p /etc/eks +sudo mv $TEMPLATE_DIR/iptables-restore.service /etc/eks/iptables-restore.service ################################################################################ ### Docker ##################################################################### @@ -141,9 +135,34 @@ if [[ "$INSTALL_DOCKER" == "true" ]]; then # Enable docker daemon to start on boot. sudo systemctl daemon-reload - sudo systemctl enable docker fi +############################################################################### +### Containerd setup ########################################################## +############################################################################### + +sudo mkdir -p /etc/eks/containerd +if [ -f "/etc/eks/containerd/containerd-config.toml" ]; then + ## this means we are building a gpu ami and have already placed a containerd configuration file in /etc/eks + echo "containerd config is already present" +else + sudo mv $TEMPLATE_DIR/containerd-config.toml /etc/eks/containerd/containerd-config.toml +fi + +sudo mv $TEMPLATE_DIR/kubelet-containerd.service /etc/eks/containerd/kubelet-containerd.service + +cat < Date: Mon, 19 Jul 2021 16:29:25 -0700 Subject: [PATCH 177/621] Changelog v20210716 (#700) * adding support for containerd runtime support in eks worker ami * msg * adding support for containerd runtime support in eks worker ami * adds release v20210716 to the CHANGELOG Co-authored-by: Sinha --- CHANGELOG.md | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cd50ffec1..8417d3589 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,36 @@ # Changelog +## AMI Release v20210716 + +* amazon-eks-gpu-node-1.21-v20210716 +* amazon-eks-gpu-node-1.20-v20210716 +* amazon-eks-gpu-node-1.19-v20210716 +* amazon-eks-gpu-node-1.18-v20210716 +* amazon-eks-gpu-node-1.17-v20210716 +* amazon-eks-gpu-node-1.16-v20210716 +* amazon-eks-gpu-node-1.15-v20210716 +* amazon-eks-arm64-node-1.21-v20210716 +* amazon-eks-arm64-node-1.20-v20210716 +* amazon-eks-arm64-node-1.19-v20210716 +* amazon-eks-arm64-node-1.18-v20210716 +* amazon-eks-arm64-node-1.17-v20210716 +* amazon-eks-arm64-node-1.16-v20210716 +* amazon-eks-arm64-node-1.15-v20210716 +* amazon-eks-node-1.21-v20210716 +* amazon-eks-node-1.20-v20210716 +* amazon-eks-node-1.19-v20210716 +* amazon-eks-node-1.18-v20210716 +* amazon-eks-node-1.17-v20210716 +* amazon-eks-node-1.16-v20210716 +* amazon-eks-node-1.15-v20210716 + +EKS AMI release for Kubernetes version 1.21. +* Note: The containerd has patch for CVE-2-21-32760 + +Containerd runtime support +The EKS Optimized Amazon Linux 2 AMI now contains a bootstrap (https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh) flag --container-runtime to optionally enable the containerd runtime. This flag is available in all supported Kubernetes versions of the AMI. This change is to get ahead of the removal of Docker as a supported runtime in Kubernetes (more details here (https://kubernetes.io/blog/2020/12/02/dockershim-faq/)). Feedback is appreciated. + + ## AMI Release v20210628 * amazon-eks-gpu-node-1.20-v20210628 * amazon-eks-gpu-node-1.19-v20210628 From 0a532cf9cc998a6d4779c9f9a8fc57b02ed5194f Mon Sep 17 00:00:00 2001 From: Abeer Sethi <38733827+abeer91@users.noreply.github.com> Date: Mon, 19 Jul 2021 19:39:19 -0700 Subject: [PATCH 178/621] Adding that 1.21 GPU and ARM AMIs aren't in us-gov-west-1 and us-gov-east-1 (#702) --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8417d3589..b39c06193 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,7 +24,7 @@ * amazon-eks-node-1.16-v20210716 * amazon-eks-node-1.15-v20210716 -EKS AMI release for Kubernetes version 1.21. +EKS AMI release for Kubernetes version 1.21 (1.21 AMIs for GPU and ARM in us-gov-west-1 and us-gov-east-1 aren't a part of this release) * Note: The containerd has patch for CVE-2-21-32760 Containerd runtime support From 464a4b15a39db3417d328c5b23c9e52d1466ab30 Mon Sep 17 00:00:00 2001 From: MyannaHarris Date: Tue, 20 Jul 2021 10:57:22 -0700 Subject: [PATCH 179/621] [1.21] Release 1.21 for us-gov-west-1 and us-gov-east-1 --- CHANGELOG.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b39c06193..93c9b416e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,35 @@ # Changelog +## AMI Release v20210720 + +* amazon-eks-gpu-node-1.21-v20210720 +* amazon-eks-gpu-node-1.20-v20210720 +* amazon-eks-gpu-node-1.19-v20210720 +* amazon-eks-gpu-node-1.18-v20210720 +* amazon-eks-gpu-node-1.17-v20210720 +* amazon-eks-gpu-node-1.16-v20210720 +* amazon-eks-gpu-node-1.15-v20210720 +* amazon-eks-arm64-node-1.21-v20210720 +* amazon-eks-arm64-node-1.20-v20210720 +* amazon-eks-arm64-node-1.19-v20210720 +* amazon-eks-arm64-node-1.18-v20210720 +* amazon-eks-arm64-node-1.17-v20210720 +* amazon-eks-arm64-node-1.16-v20210720 +* amazon-eks-arm64-node-1.15-v20210720 +* amazon-eks-node-1.21-v20210720 +* amazon-eks-node-1.20-v20210720 +* amazon-eks-node-1.19-v20210720 +* amazon-eks-node-1.18-v20210720 +* amazon-eks-node-1.17-v20210720 +* amazon-eks-node-1.16-v20210720 +* amazon-eks-node-1.15-v20210720 + +EKS AMI release for Kubernetes version 1.21 (1.21 AMIs for GPU and ARM in us-gov-west-1 and us-gov-east-1 are included in this release) +* Note: The containerd has patch for CVE-2-21-32760 + +Containerd runtime support +The EKS Optimized Amazon Linux 2 AMI now contains a bootstrap (https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh) flag --container-runtime to optionally enable the containerd runtime. This flag is available in all supported Kubernetes versions of the AMI. This change is to get ahead of the removal of Docker as a supported runtime in Kubernetes (more details here (https://kubernetes.io/blog/2020/12/02/dockershim-faq/)). Feedback is appreciated. + ## AMI Release v20210716 * amazon-eks-gpu-node-1.21-v20210716 From 3b732a4a564acabe1ea262f04555d92766d794db Mon Sep 17 00:00:00 2001 From: Vishal Gupta Date: Tue, 20 Jul 2021 13:48:27 -0700 Subject: [PATCH 180/621] pulling recently release runc, containerd from yum repo (#704) Co-authored-by: Vishal Gupta --- eks-worker-al2.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index faeb9e7bd..73810367a 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -14,8 +14,8 @@ "kubernetes_build_date": null, "kernel_version": "", "docker_version": "19.03.13ce-1.amzn2", - "containerd_version": "1.4.1-2.amzn2", - "runc_version": "1.0.0-0.3.20210225.git12644e6.amzn2", + "containerd_version": "1.4.6-2.amzn2", + "runc_version": "1.0.0-1.amzn2", "cni_plugin_version": "v0.8.6", "pull_cni_from_github": "true", "source_ami_id": "", From 62379e8c4bbbe676efe0791ff1ec83948ff292c9 Mon Sep 17 00:00:00 2001 From: MyannaHarris Date: Tue, 20 Jul 2021 14:28:35 -0700 Subject: [PATCH 181/621] [FIPS] Update CHANGELOG to include FIPS fix information --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 93c9b416e..d14312296 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,8 @@ EKS AMI release for Kubernetes version 1.21 (1.21 AMIs for GPU and ARM in us-gov Containerd runtime support The EKS Optimized Amazon Linux 2 AMI now contains a bootstrap (https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh) flag --container-runtime to optionally enable the containerd runtime. This flag is available in all supported Kubernetes versions of the AMI. This change is to get ahead of the removal of Docker as a supported runtime in Kubernetes (more details here (https://kubernetes.io/blog/2020/12/02/dockershim-faq/)). Feedback is appreciated. +FIPS Kernel Panic issue on 5.4.X is fixed - https://github.com/awslabs/amazon-eks-ami/issues/632 + ## AMI Release v20210716 * amazon-eks-gpu-node-1.21-v20210716 @@ -60,6 +62,7 @@ EKS AMI release for Kubernetes version 1.21 (1.21 AMIs for GPU and ARM in us-gov Containerd runtime support The EKS Optimized Amazon Linux 2 AMI now contains a bootstrap (https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh) flag --container-runtime to optionally enable the containerd runtime. This flag is available in all supported Kubernetes versions of the AMI. This change is to get ahead of the removal of Docker as a supported runtime in Kubernetes (more details here (https://kubernetes.io/blog/2020/12/02/dockershim-faq/)). Feedback is appreciated. +FIPS Kernel Panic issue on 5.4.X is fixed - https://github.com/awslabs/amazon-eks-ami/issues/632 ## AMI Release v20210628 * amazon-eks-gpu-node-1.20-v20210628 From 2bea6dd2092b52e98eece9e1885300caf1e1ba4c Mon Sep 17 00:00:00 2001 From: suket22 Date: Tue, 20 Jul 2021 14:54:25 -0700 Subject: [PATCH 182/621] Adding utility to calculate max-pods for kubelet (#706) --- files/max-pods-calculator.sh | 151 +++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 files/max-pods-calculator.sh diff --git a/files/max-pods-calculator.sh b/files/max-pods-calculator.sh new file mode 100644 index 000000000..0b9f8e653 --- /dev/null +++ b/files/max-pods-calculator.sh @@ -0,0 +1,151 @@ +#!/bin/bash + +set -o pipefail +set -o nounset +set -o errexit + +err_report() { + echo "Exited with error on line $1" +} +trap 'err_report $LINENO' ERR + +function print_help { + echo "usage: $0 [options]" + echo "Calculates maxPods value to be used when starting up the kubelet." + echo "-h,--help print this help." + echo "--instance-type Specify the instance type to calculate max pods value." + echo "--instance-type-from-imds Use this flag if the instance type should be fetched from IMDS." + echo "--cni-version Specify the version of the CNI (example - 1.7.5)." + echo "--cni-custom-networking-enabled Use this flag to indicate if CNI custom networking mode has been enabled." + echo "--cni-prefix-delegation-enabled Use this flag to indicate if CNI prefix delegation has been enabled." + echo "--cni-max-eni specify how many ENIs should be used for prefix delegation. Defaults to using all ENIs per instance." +} + +POSITIONAL=() + +while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -h|--help) + print_help + exit 1 + ;; + --instance-type) + INSTANCE_TYPE=$2 + shift + shift + ;; + --instance-type-from-imds) + INSTANCE_TYPE_FROM_IMDS=true + shift + ;; + --cni-version) + CNI_VERSION=$2 + shift + shift + ;; + --cni-custom-networking-enabled) + CNI_CUSTOM_NETWORKING_ENABLED=true + shift + ;; + --cni-prefix-delegation-enabled) + CNI_PREFIX_DELEGATION_ENABLED=true + shift + ;; + --cni-max-eni) + CNI_MAX_ENI=$2 + shift + shift + ;; + *) # unknown option + POSITIONAL+=("$1") # save it in an array for later + shift # past argument + ;; + esac +done + +CNI_VERSION="${CNI_VERSION:-}" +CNI_CUSTOM_NETWORKING_ENABLED="${CNI_CUSTOM_NETWORKING_ENABLED:-false}" +CNI_PREFIX_DELEGATION_ENABLED="${CNI_PREFIX_DELEGATION_ENABLED:-false}" +CNI_MAX_ENI="${CNI_MAX_ENI:-}" +INSTANCE_TYPE="${INSTANCE_TYPE:-}" +INSTANCE_TYPE_FROM_IMDS="${INSTANCE_TYPE_FROM_IMDS:-false}" + +PREFIX_DELEGATION_SUPPORTED=false +IPS_PER_PREFIX=16 + +if [ "$INSTANCE_TYPE_FROM_IMDS" = true ]; then + TOKEN=$(curl -m 10 -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 600" -s "http://169.254.169.254/latest/api/token") + export AWS_DEFAULT_REGION=$(curl -s --retry 5 -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/dynamic/instance-identity/document | jq .region -r) + INSTANCE_TYPE=$(curl -m 10 -H "X-aws-ec2-metadata-token: $TOKEN" -s http://169.254.169.254/latest/meta-data/instance-type) +elif [ -z "$INSTANCE_TYPE" ]; + # There's no reasonable default for an instanceType so force one to be provided to the script. + then echo "You must specify an instance type to calculate max pods value." + exit 1 +fi + +if [ -z "$CNI_VERSION" ]; + then echo "You must specify a CNI Version to use. Example - 1.7.5" + exit 1 +fi + +calculate_max_ip_addresses_prefix_delegation() { + enis=$1 + instance_max_eni_ips=$2 + echo $(($enis * (($instance_max_eni_ips - 1) * $IPS_PER_PREFIX ) + 2)) +} + +calculate_max_ip_addresses_secondary_ips() { + enis=$1 + instance_max_eni_ips=$2 + echo $(($enis * ($instance_max_eni_ips - 1) + 2)) +} + +min_number() { + printf "%s\n" "$@" | sort -g | head -n1 +} + + +VERSION_SPLIT=(${CNI_VERSION//./ }) +CNI_MAJOR_VERSION="${VERSION_SPLIT[0]}" +CNI_MINOR_VERSION="${VERSION_SPLIT[1]}" +if [[ "$CNI_MAJOR_VERSION" -gt 1 ]] || ([[ "$CNI_MAJOR_VERSION" = 1 ]] && [[ "$CNI_MINOR_VERSION" -gt 8 ]]); then + PREFIX_DELEGATION_SUPPORTED=true +fi + +DESCRIBE_INSTANCES_RESULT=$(aws ec2 describe-instance-types --instance-type $INSTANCE_TYPE --query 'InstanceTypes[0].{Hypervisor: Hypervisor, EniCount: NetworkInfo.MaximumNetworkInterfaces, PodsPerEniCount: NetworkInfo.Ipv4AddressesPerInterface, CpuCount: VCpuInfo.DefaultVCpus'}) + +HYPERVISOR_TYPE=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.Hypervisor' ) +IS_NITRO=false +if [[ "$HYPERVISOR_TYPE" == "nitro" ]]; then + IS_NITRO=true +fi +INSTANCE_MAX_ENIS=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.EniCount' ) +INSTANCE_MAX_ENIS_IPS=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.PodsPerEniCount' ) + +if [ -z "$CNI_MAX_ENI" ] ; then + enis_for_pods=$INSTANCE_MAX_ENIS +else + enis_for_pods="$(min_number $CNI_MAX_ENI $INSTANCE_MAX_ENIS)" +fi + +if [ "$CNI_CUSTOM_NETWORKING_ENABLED" = true ] ; then + enis_for_pods=$((enis_for_pods-1)) +fi + + +if [ "$IS_NITRO" = true ] && [ "$CNI_PREFIX_DELEGATION_ENABLED" = true ] && [ "$PREFIX_DELEGATION_SUPPORTED" = true ]; then + max_pods=$(calculate_max_ip_addresses_prefix_delegation $enis_for_pods $INSTANCE_MAX_ENIS_IPS) +else + max_pods=$(calculate_max_ip_addresses_secondary_ips $enis_for_pods $INSTANCE_MAX_ENIS_IPS) +fi + +# Limit the total number of pods that can be launched on any instance type based on the vCPUs on that instance type. +MAX_POD_CEILING_FOR_LOW_CPU=110 +MAX_POD_CEILING_FOR_HIGH_CPU=250 +CPU_COUNT=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.CpuCount' ) +if [ "$CPU_COUNT" -gt 30 ] ; then + echo $(min_number $MAX_POD_CEILING_FOR_HIGH_CPU $max_pods) +else + echo $(min_number $MAX_POD_CEILING_FOR_LOW_CPU $max_pods) +fi From 5bbb8821e4acb3e2a66536d483b840d02a5244d6 Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Wed, 21 Jul 2021 12:17:53 -0700 Subject: [PATCH 183/621] adding 1.21 to the Makefile (#708) Co-authored-by: Sinha --- Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index b80e0ac34..bdc9513bc 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.15 1.16 1.17 1.18 1.19 1.20 +all: 1.15 1.16 1.17 1.18 1.19 1.20 1.21 .PHONY: validate validate: @@ -65,4 +65,8 @@ k8s: validate .PHONY: 1.20 1.20: - $(MAKE) k8s kubernetes_version=1.20.4 kubernetes_build_date=2021-04-12 pull_cni_from_github=true \ No newline at end of file + $(MAKE) k8s kubernetes_version=1.20.4 kubernetes_build_date=2021-04-12 pull_cni_from_github=true + +.PHONY: 1.21 +1.21: + $(MAKE) k8s kubernetes_version=1.21.2 kubernetes_build_date=2021-07-05 pull_cni_from_github=true \ No newline at end of file From 474de1b68f2df3d5dcb7e3eb4942e5d0e9ca910f Mon Sep 17 00:00:00 2001 From: Laxmi Soumya Josyula <42261978+ljosyula@users.noreply.github.com> Date: Fri, 23 Jul 2021 10:15:51 -0700 Subject: [PATCH 184/621] Adding Pause Container Account for us-iso-east-1 and us-isob-east-1 (#712) Co-authored-by: Laxmi Soumya Josyula --- files/bootstrap.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index c59855a0c..df9b80ded 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -132,6 +132,10 @@ function get_pause_container_account_for_region () { echo "${PAUSE_CONTAINER_ACCOUNT:-013241004608}";; us-gov-east-1) echo "${PAUSE_CONTAINER_ACCOUNT:-151742754352}";; + us-iso-east-1) + echo "${PAUSE_CONTAINER_ACCOUNT:-725322719131}";; + us-isob-east-1) + echo "${PAUSE_CONTAINER_ACCOUNT:-187977181151}";; af-south-1) echo "${PAUSE_CONTAINER_ACCOUNT:-877085696533}";; eu-south-1) From b70e66f14791ffc2c563391698ad71e1cd7d1c05 Mon Sep 17 00:00:00 2001 From: suket22 Date: Fri, 23 Jul 2021 16:30:47 -0700 Subject: [PATCH 185/621] Update CHANGELOG.md (#713) Adding information about the v20210722 AMI release. --- CHANGELOG.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d14312296..26ce1031f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,32 @@ # Changelog +## AMI Release v20210722 + +* amazon-eks-gpu-node-1.21-v20210722 +* amazon-eks-gpu-node-1.20-v20210722 +* amazon-eks-gpu-node-1.19-v20210722 +* amazon-eks-gpu-node-1.18-v20210722 +* amazon-eks-gpu-node-1.17-v20210722 +* amazon-eks-gpu-node-1.16-v20210722 +* amazon-eks-gpu-node-1.15-v20210722 +* amazon-eks-arm64-node-1.21-v20210722 +* amazon-eks-arm64-node-1.20-v20210722 +* amazon-eks-arm64-node-1.19-v20210722 +* amazon-eks-arm64-node-1.18-v20210722 +* amazon-eks-arm64-node-1.17-v20210722 +* amazon-eks-arm64-node-1.16-v20210722 +* amazon-eks-arm64-node-1.15-v20210722 +* amazon-eks-node-1.21-v20210722 +* amazon-eks-node-1.20-v20210722 +* amazon-eks-node-1.19-v20210722 +* amazon-eks-node-1.18-v20210722 +* amazon-eks-node-1.17-v20210722 +* amazon-eks-node-1.16-v20210722 +* amazon-eks-node-1.15-v20210722 + +Notable changes: +* This release includes the security patch for the [kernel](https://alas.aws.amazon.com/ALAS-2021-1524.html), for CVE-2021-33909. + ## AMI Release v20210720 * amazon-eks-gpu-node-1.21-v20210720 From 73693cd549a830cf6c2b502b84ada18b537491b0 Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Thu, 29 Jul 2021 09:00:39 -0700 Subject: [PATCH 186/621] changes done to restart docker-daemon on config file update (#718) Co-authored-by: Sinha --- files/bootstrap.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index df9b80ded..da833df2e 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -430,7 +430,7 @@ elif [[ "$CONTAINER_RUNTIME" = "dockerd" ]]; then fi systemctl daemon-reload systemctl enable docker - systemctl start docker + systemctl restart docker else echo "Container runtime ${CONTAINER_RUNTIME} is not supported." exit 1 From 5d109a125ef3876209850d0d4308d1a77ddf601c Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Tue, 10 Aug 2021 13:41:22 -0700 Subject: [PATCH 187/621] pre-fetching sandbox image during bootstrap in containerd runtime (#730) Co-authored-by: Sinha --- files/bootstrap.sh | 8 +++++- files/containerd-config.toml | 3 +++ files/kubelet-containerd.service | 4 +-- files/pull-sandbox-image.sh | 27 +++++++++++++++++++ files/sandbox-image.service | 12 +++++++++ .../linux/eks-log-collector.sh | 8 ++++++ scripts/install-worker.sh | 3 +++ 7 files changed, 62 insertions(+), 3 deletions(-) create mode 100644 files/pull-sandbox-image.sh create mode 100644 files/sandbox-image.service diff --git a/files/bootstrap.sh b/files/bootstrap.sh index da833df2e..f298ac14e 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -406,12 +406,18 @@ fi if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then sudo mkdir -p /etc/containerd sudo mkdir -p /etc/cni/net.d + sudo sed -i s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g /etc/eks/containerd/containerd-config.toml sudo mv /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml + sudo mv /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service sudo mv /etc/eks/containerd/kubelet-containerd.service /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/kubelet.service + sudo chown root:root /etc/systemd/system/sandbox-image.service systemctl daemon-reload systemctl enable containerd - systemctl start containerd + systemctl restart containerd + systemctl enable sandbox-image + systemctl start sandbox-image + elif [[ "$CONTAINER_RUNTIME" = "dockerd" ]]; then mkdir -p /etc/docker bash -c "/sbin/iptables-save > /etc/sysconfig/iptables" diff --git a/files/containerd-config.toml b/files/containerd-config.toml index 79e53f1ec..d17d9096c 100644 --- a/files/containerd-config.toml +++ b/files/containerd-config.toml @@ -8,6 +8,9 @@ address = "/run/dockershim.sock" [plugins."io.containerd.grpc.v1.cri".containerd] default_runtime_name = "runc" +[plugins."io.containerd.grpc.v1.cri"] +sandbox_image = "SANDBOX_IMAGE" + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] runtime_type = "io.containerd.runc.v2" diff --git a/files/kubelet-containerd.service b/files/kubelet-containerd.service index ed08ed97a..50698c5ca 100644 --- a/files/kubelet-containerd.service +++ b/files/kubelet-containerd.service @@ -1,8 +1,8 @@ [Unit] Description=Kubernetes Kubelet Documentation=https://github.com/kubernetes/kubernetes -After=containerd.service -Requires=containerd.service +After=containerd.service sandbox-image.service +Requires=containerd.service sandbox-image.service [Service] ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 diff --git a/files/pull-sandbox-image.sh b/files/pull-sandbox-image.sh new file mode 100644 index 000000000..02f9c7040 --- /dev/null +++ b/files/pull-sandbox-image.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +### fetching sandbox image from /etc/containerd/config.toml +sandbox_image=$(awk -F'[ ="]+' '$1 == "sandbox_image" { print $2 }' /etc/containerd/config.toml) +region=$(echo "$sandbox_image" | cut -f4 -d ".") +ecr_password=$(aws ecr get-login-password --region $region) +API_RETRY_ATTEMPTS=5 + +for attempt in `seq 0 $API_RETRY_ATTEMPTS`; do + rc=0 + if [[ $attempt -gt 0 ]]; then + echo "Attempt $attempt of $API_RETRY_ATTEMPTS" + fi + ### pull sandbox image from ecr + ### username will always be constant i.e; AWS + sudo ctr --address=/run/dockershim.sock --namespace k8s.io image pull $sandbox_image --user AWS:$ecr_password + rc=$?; + if [[ $rc -eq 0 ]]; then + break + fi + if [[ $attempt -eq $API_RETRY_ATTEMPTS ]]; then + exit $rc + fi + jitter=$((1 + RANDOM % 10)) + sleep_sec="$(( $(( 5 << $((1+$attempt)) )) + $jitter))" + sleep $sleep_sec +done diff --git a/files/sandbox-image.service b/files/sandbox-image.service new file mode 100644 index 000000000..6694e29ba --- /dev/null +++ b/files/sandbox-image.service @@ -0,0 +1,12 @@ +[Unit] +Description=pull sandbox image defined in containerd config.toml +# pulls sandbox image using ctr tool +After=containerd.service +Requires=containerd.service + +[Service] +Type=oneshot +ExecStart=/etc/eks/containerd/pull-sandbox-image.sh + +[Install] +WantedBy=multi-user.target diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index fe45cd4d2..af701a068 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -58,6 +58,7 @@ COMMON_DIRECTORIES=( storage var_log networking + sandbox-image # eks ipamd # eks sysctls # eks kubelet # eks @@ -258,6 +259,7 @@ collect() { get_networking_info get_cni_config get_docker_logs + get_sandboxImage_info } pack() { @@ -551,6 +553,12 @@ get_containerd_info() { ok } +get_sandboxImage_info() { + try "Collect sandbox-image daemon information" + timeout 75 journalctl -u sandbox-image > "${COLLECT_DIR}"/sandbox-image/sandbox-image-log.txt 2>&1 || echo -e "\tTimed out, ignoring \"sandbox-image info output \" " + ok +} + get_docker_info() { try "collect Docker daemon information" diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index cb464a31a..f735cc8f8 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -150,6 +150,9 @@ else fi sudo mv $TEMPLATE_DIR/kubelet-containerd.service /etc/eks/containerd/kubelet-containerd.service +sudo mv $TEMPLATE_DIR/sandbox-image.service /etc/eks/containerd/sandbox-image.service +sudo mv $TEMPLATE_DIR/pull-sandbox-image.sh /etc/eks/containerd/pull-sandbox-image.sh +sudo chmod 777 /etc/eks/containerd/pull-sandbox-image.sh cat < Date: Tue, 17 Aug 2021 17:00:48 -0700 Subject: [PATCH 188/621] Adding information about the v20210813 AMI release. (#734) Co-authored-by: Sinha --- CHANGELOG.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 26ce1031f..7f0c4d01f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,33 @@ # Changelog +## AMI Release v20210813 + +* amazon-eks-gpu-node-1.21-v20210813 +* amazon-eks-gpu-node-1.20-v20210813 +* amazon-eks-gpu-node-1.19-v20210813 +* amazon-eks-gpu-node-1.18-v20210813 +* amazon-eks-gpu-node-1.17-v20210813 +* amazon-eks-gpu-node-1.16-v20210813 +* amazon-eks-gpu-node-1.15-v20210813 +* amazon-eks-arm64-node-1.21-v20210813 +* amazon-eks-arm64-node-1.20-v20210813 +* amazon-eks-arm64-node-1.19-v20210813 +* amazon-eks-arm64-node-1.18-v20210813 +* amazon-eks-arm64-node-1.17-v20210813 +* amazon-eks-arm64-node-1.16-v20210813 +* amazon-eks-arm64-node-1.15-v20210813 +* amazon-eks-node-1.21-v20210813 +* amazon-eks-node-1.20-v20210813 +* amazon-eks-node-1.19-v20210813 +* amazon-eks-node-1.18-v20210813 +* amazon-eks-node-1.17-v20210813 +* amazon-eks-node-1.16-v20210813 +* amazon-eks-node-1.15-v20210813 + +Notable changes: +* Contains fix for sanbox-image issue with containerd in Gov-cloud and CN regions. +* Updating to 1.18.20 and 1.19.13 kubernetes version. + ## AMI Release v20210722 * amazon-eks-gpu-node-1.21-v20210722 From 8450297eb2ef87fe5cbbce52a86ddcdc8b2e6716 Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Thu, 19 Aug 2021 13:50:20 -0700 Subject: [PATCH 189/621] changing build date and directory for 1.18 and 1.19 (#738) Co-authored-by: Sinha --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index bdc9513bc..327e021e8 100644 --- a/Makefile +++ b/Makefile @@ -57,11 +57,11 @@ k8s: validate .PHONY: 1.18 1.18: - $(MAKE) k8s kubernetes_version=1.18.9 kubernetes_build_date=2020-11-02 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.18.20 kubernetes_build_date=2021-08-12 pull_cni_from_github=true .PHONY: 1.19 1.19: - $(MAKE) k8s kubernetes_version=1.19.6 kubernetes_build_date=2021-01-05 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.19.13 kubernetes_build_date=2021-08-12 pull_cni_from_github=true .PHONY: 1.20 1.20: From c78bb6bac21e9323f1f9c57568ece93c1f1d507b Mon Sep 17 00:00:00 2001 From: Sinha Date: Wed, 25 Aug 2021 13:40:37 -0700 Subject: [PATCH 190/621] adjusting permission of pull-sandbox-image.sh --- scripts/install-worker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index f735cc8f8..ccb4a6441 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -152,7 +152,7 @@ fi sudo mv $TEMPLATE_DIR/kubelet-containerd.service /etc/eks/containerd/kubelet-containerd.service sudo mv $TEMPLATE_DIR/sandbox-image.service /etc/eks/containerd/sandbox-image.service sudo mv $TEMPLATE_DIR/pull-sandbox-image.sh /etc/eks/containerd/pull-sandbox-image.sh -sudo chmod 777 /etc/eks/containerd/pull-sandbox-image.sh +sudo chmod +x /etc/eks/containerd/pull-sandbox-image.sh cat < Date: Fri, 27 Aug 2021 02:19:29 -0500 Subject: [PATCH 191/621] remove iptables as required (#744) --- .../linux/eks-log-collector.sh | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index af701a068..51c720b8d 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -42,8 +42,6 @@ REQUIRED_UTILS=( tar date mkdir - iptables - iptables-save grep awk df @@ -154,7 +152,7 @@ check_required_utils() { for utils in ${REQUIRED_UTILS[*]}; do # If exit code of "command -v" not equal to 0, fail if ! command -v "${utils}" >/dev/null 2>&1; then - die "Application \"${utils}\" is missing, please install \"${utils}\" as this script requires it, and will not function without it." + echo -e "\nApplication \"${utils}\" is missing, please install \"${utils}\" as this script requires it." fi done } @@ -301,13 +299,16 @@ get_selinux_info() { } get_iptables_info() { - try "collect iptables information" - - iptables --wait 1 --numeric --verbose --list --table mangle | tee "${COLLECT_DIR}"/networking/iptables-mangle.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables-mangle.txt - iptables --wait 1 --numeric --verbose --list --table filter | tee "${COLLECT_DIR}"/networking/iptables-filter.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables-filter.txt - iptables --wait 1 --numeric --verbose --list --table nat | tee "${COLLECT_DIR}"/networking/iptables-nat.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables-nat.txt - iptables --wait 1 --numeric --verbose --list | tee "${COLLECT_DIR}"/networking/iptables.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables.txt - iptables-save > "${COLLECT_DIR}"/networking/iptables-save.txt + if ! command -v iptables >/dev/null 2>&1; then + echo "IPtables not installed" |tee -a iptables.txt + else + try "collect iptables information" + iptables --wait 1 --numeric --verbose --list --table mangle | tee "${COLLECT_DIR}"/networking/iptables-mangle.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables-mangle.txt + iptables --wait 1 --numeric --verbose --list --table filter | tee "${COLLECT_DIR}"/networking/iptables-filter.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables-filter.txt + iptables --wait 1 --numeric --verbose --list --table nat | tee "${COLLECT_DIR}"/networking/iptables-nat.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables-nat.txt + iptables --wait 1 --numeric --verbose --list | tee "${COLLECT_DIR}"/networking/iptables.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables.txt + iptables-save > "${COLLECT_DIR}"/networking/iptables-save.txt + fi ok } From a1be1f354ba91cae2e990415a997a739cd2b09ac Mon Sep 17 00:00:00 2001 From: Abeer Sethi <38733827+abeer91@users.noreply.github.com> Date: Fri, 27 Aug 2021 11:50:03 -0700 Subject: [PATCH 192/621] Updating changelog for release v20210826 --- CHANGELOG.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f0c4d01f..089ccc9d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,34 @@ # Changelog +## AMI Release v20210826 + +* amazon-eks-gpu-node-1.21-v20210826 +* amazon-eks-gpu-node-1.20-v20210826 +* amazon-eks-gpu-node-1.19-v20210826 +* amazon-eks-gpu-node-1.18-v20210826 +* amazon-eks-gpu-node-1.17-v20210826 +* amazon-eks-gpu-node-1.16-v20210826 +* amazon-eks-gpu-node-1.15-v20210826 +* amazon-eks-arm64-node-1.21-v20210826 +* amazon-eks-arm64-node-1.20-v20210826 +* amazon-eks-arm64-node-1.19-v20210826 +* amazon-eks-arm64-node-1.18-v20210826 +* amazon-eks-arm64-node-1.17-v20210826 +* amazon-eks-arm64-node-1.16-v20210826 +* amazon-eks-arm64-node-1.15-v20210826 +* amazon-eks-node-1.21-v20210826 +* amazon-eks-node-1.20-v20210826 +* amazon-eks-node-1.19-v20210826 +* amazon-eks-node-1.18-v20210826 +* amazon-eks-node-1.17-v20210826 +* amazon-eks-node-1.16-v20210826 +* amazon-eks-node-1.15-v20210826 + +Notable changes: + +* Fix to reduce permissions of `pull-sandbox-image.sh` [c78bb6b](https://github.com/awslabs/amazon-eks-ami/commit/c78bb6bac21e9323f1f9c57568ece93c1f1d507b) + + ## AMI Release v20210813 * amazon-eks-gpu-node-1.21-v20210813 From 548b1650e63f3ab582d79418c7b3336943642fd1 Mon Sep 17 00:00:00 2001 From: Abeer Sethi <38733827+abeer91@users.noreply.github.com> Date: Tue, 31 Aug 2021 11:24:50 -0700 Subject: [PATCH 193/621] Updating eni-max-pods.txt Generated using `make generate-limits` --- files/eni-max-pods.txt | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index ac83f32cd..f5d1b531f 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2021-06-08T13:46:17-07:00 +# This file was generated at 2021-08-31T18:22:52Z # # Mapping is calculated from AWS EC2 API using the following formula: # * First IP on each ENI is not used for pods @@ -134,8 +134,10 @@ g3.4xlarge 234 g3.8xlarge 234 g3s.xlarge 58 g4ad.16xlarge 234 +g4ad.2xlarge 8 g4ad.4xlarge 29 g4ad.8xlarge 58 +g4ad.xlarge 8 g4dn.12xlarge 234 g4dn.16xlarge 58 g4dn.2xlarge 29 @@ -265,6 +267,15 @@ m6gd.large 29 m6gd.medium 8 m6gd.metal 737 m6gd.xlarge 58 +m6i.12xlarge 234 +m6i.16xlarge 737 +m6i.24xlarge 737 +m6i.2xlarge 58 +m6i.32xlarge 737 +m6i.4xlarge 234 +m6i.8xlarge 234 +m6i.large 29 +m6i.xlarge 58 mac1.metal 234 p2.16xlarge 234 p2.8xlarge 234 From ac4fc45b109313bb838873480385a9df1715c952 Mon Sep 17 00:00:00 2001 From: Visuna Date: Wed, 8 Sep 2021 11:17:41 -0700 Subject: [PATCH 194/621] Updating changelog for release v20210830 (#751) --- CHANGELOG.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 089ccc9d4..2e5ae4b96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,32 @@ # Changelog +## AMI Release v20210830 + +* amazon-eks-gpu-node-1.21-v20210830 +* amazon-eks-gpu-node-1.20-v20210830 +* amazon-eks-gpu-node-1.19-v20210830 +* amazon-eks-gpu-node-1.18-v20210830 +* amazon-eks-gpu-node-1.17-v20210830 +* amazon-eks-gpu-node-1.16-v20210830 +* amazon-eks-arm64-node-1.21-v20210830 +* amazon-eks-arm64-node-1.20-v20210830 +* amazon-eks-arm64-node-1.19-v20210830 +* amazon-eks-arm64-node-1.18-v20210830 +* amazon-eks-arm64-node-1.17-v20210830 +* amazon-eks-arm64-node-1.16-v20210830 +* amazon-eks-node-1.21-v20210830 +* amazon-eks-node-1.20-v20210830 +* amazon-eks-node-1.19-v20210830 +* amazon-eks-node-1.18-v20210830 +* amazon-eks-node-1.17-v20210830 +* amazon-eks-node-1.16-v20210830 + +Notable changes: + +* Upgrade kubelet version for 1.17 and 1.20 + * 1.17.12 -> 1.17.17 + * 1.20.4 -> 1.20.7 + ## AMI Release v20210826 * amazon-eks-gpu-node-1.21-v20210826 From d03b2ac370f473ddfd1f2d11d5ba93ecb5c1ec19 Mon Sep 17 00:00:00 2001 From: Visuna Date: Thu, 9 Sep 2021 14:27:22 -0700 Subject: [PATCH 195/621] Update kubernetes build dates and remove 1.15 (#754) * Update kubernetes build dates and remove 1.15 ### Testing Verified AMI could be built with updated build dates * Update Makefile --- Makefile | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index 327e021e8..5dfaea314 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.15 1.16 1.17 1.18 1.19 1.20 1.21 +all: 1.16 1.17 1.18 1.19 1.20 1.21 .PHONY: validate validate: @@ -43,30 +43,26 @@ k8s: validate # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html -.PHONY: 1.15 -1.15: - $(MAKE) k8s kubernetes_version=1.15.12 kubernetes_build_date=2020-11-02 pull_cni_from_github=true - .PHONY: 1.16 1.16: - $(MAKE) k8s kubernetes_version=1.16.15 kubernetes_build_date=2020-11-02 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.16.15 kubernetes_build_date=2021-09-02 pull_cni_from_github=true .PHONY: 1.17 1.17: - $(MAKE) k8s kubernetes_version=1.17.12 kubernetes_build_date=2020-11-02 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.17.17 kubernetes_build_date=2021-09-02 pull_cni_from_github=true .PHONY: 1.18 1.18: - $(MAKE) k8s kubernetes_version=1.18.20 kubernetes_build_date=2021-08-12 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.18.20 kubernetes_build_date=2021-09-02 pull_cni_from_github=true .PHONY: 1.19 1.19: - $(MAKE) k8s kubernetes_version=1.19.13 kubernetes_build_date=2021-08-12 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.19.13 kubernetes_build_date=2021-09-02 pull_cni_from_github=true .PHONY: 1.20 1.20: - $(MAKE) k8s kubernetes_version=1.20.4 kubernetes_build_date=2021-04-12 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.20.7 kubernetes_build_date=2021-09-02 pull_cni_from_github=true .PHONY: 1.21 1.21: - $(MAKE) k8s kubernetes_version=1.21.2 kubernetes_build_date=2021-07-05 pull_cni_from_github=true \ No newline at end of file + $(MAKE) k8s kubernetes_version=1.21.2 kubernetes_build_date=2021-09-02 pull_cni_from_github=true From 9576786266df8bee08e97c1c7f2d0e2f85752092 Mon Sep 17 00:00:00 2001 From: Rafael Gaspar Date: Fri, 17 Sep 2021 18:21:35 +0200 Subject: [PATCH 196/621] Use default containerd socket path (#724) * Use default containerd socket path * Create symlink so aws-node can talk to containerd --- files/bootstrap.sh | 1 + files/containerd-config.toml | 2 +- files/kubelet-containerd.service | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index f298ac14e..041f7dcf6 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -412,6 +412,7 @@ if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then sudo mv /etc/eks/containerd/kubelet-containerd.service /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/sandbox-image.service + ln -sf /run/containerd/containerd.sock /run/dockershim.sock systemctl daemon-reload systemctl enable containerd systemctl restart containerd diff --git a/files/containerd-config.toml b/files/containerd-config.toml index d17d9096c..31404027f 100644 --- a/files/containerd-config.toml +++ b/files/containerd-config.toml @@ -3,7 +3,7 @@ root = "/var/lib/containerd" state = "/run/containerd" [grpc] -address = "/run/dockershim.sock" +address = "/run/containerd/containerd.sock" [plugins."io.containerd.grpc.v1.cri".containerd] default_runtime_name = "runc" diff --git a/files/kubelet-containerd.service b/files/kubelet-containerd.service index 50698c5ca..35a6bf188 100644 --- a/files/kubelet-containerd.service +++ b/files/kubelet-containerd.service @@ -10,7 +10,7 @@ ExecStart=/usr/bin/kubelet --cloud-provider aws \ --config /etc/kubernetes/kubelet/kubelet-config.json \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime remote \ - --container-runtime-endpoint unix:///run/dockershim.sock \ + --container-runtime-endpoint unix:///run/containerd/containerd.sock \ --network-plugin cni $KUBELET_ARGS $KUBELET_EXTRA_ARGS Restart=on-failure From 9ffe85683bdc0df8724ee6ca20ba465ec29a7a75 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Tue, 21 Sep 2021 13:58:14 -0700 Subject: [PATCH 197/621] Updating changelog for release v20210914 (#763) --- CHANGELOG.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e5ae4b96..d032398c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,27 @@ # Changelog +## AMI Release v20210914 + +* amazon-eks-gpu-node-1.21-v20210914 +* amazon-eks-gpu-node-1.20-v20210914 +* amazon-eks-gpu-node-1.19-v20210914 +* amazon-eks-gpu-node-1.18-v20210914 +* amazon-eks-gpu-node-1.17-v20210914 +* amazon-eks-gpu-node-1.16-v20210914 +* amazon-eks-arm64-node-1.21-v20210914 +* amazon-eks-arm64-node-1.20-v20210914 +* amazon-eks-arm64-node-1.19-v20210914 +* amazon-eks-arm64-node-1.18-v20210914 +* amazon-eks-arm64-node-1.17-v20210914 +* amazon-eks-arm64-node-1.16-v20210914 +* amazon-eks-node-1.21-v20210914 +* amazon-eks-node-1.20-v20210914 +* amazon-eks-node-1.19-v20210914 +* amazon-eks-node-1.18-v20210914 +* amazon-eks-node-1.17-v20210914 +* amazon-eks-node-1.16-v20210914 + +Notable changes: +Adding support for new ec2 instance types i.e. m6i ## AMI Release v20210830 From c4d08d2395987da3172c299fa77c52760e3e57e3 Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 23 Sep 2021 13:55:40 -0700 Subject: [PATCH 198/621] Adds option to upgrade to 5.10 kernel (#765) --- scripts/upgrade_kernel.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh index 83376eedf..7a686604b 100755 --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -27,6 +27,8 @@ if [[ $KERNEL_VERSION == "4.14" ]]; then sudo yum update -y kernel elif [[ $KERNEL_VERSION == "5.4" ]]; then sudo amazon-linux-extras install -y kernel-5.4 +elif [[ $KERNEL_VERSION == "5.10" ]]; then + sudo amazon-linux-extras install -y kernel-5.10 else echo "$KERNEL_VERSION is not a valid kernel version" exit 1 From b70f15e3a7cd03a1a75fd913ea0e0631a8e1fee2 Mon Sep 17 00:00:00 2001 From: Abeer Sethi <38733827+abeer91@users.noreply.github.com> Date: Mon, 4 Oct 2021 14:31:53 -0700 Subject: [PATCH 199/621] Address CVEs Docker (CVE-2021-41089, CVE-2021-41091, CVE-2021-41092) and containerd (CVE-2021-41103) (#770) --- CHANGELOG.md | 48 +++++++++++++++++++++++++++++++++++++++++++++ eks-worker-al2.json | 6 +++--- 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d032398c6..ae1422143 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,52 @@ # Changelog + +## AMI Release v20211003 + +* amazon-eks-gpu-node-1.20-v20211003 +* amazon-eks-gpu-node-1.19-v20211003 +* amazon-eks-gpu-node-1.18-v20211003 +* amazon-eks-gpu-node-1.17-v20211003 +* amazon-eks-gpu-node-1.16-v20211003 +* amazon-eks-gpu-node-1.15-v20211003 +* amazon-eks-arm64-node-1.20-v20211003 +* amazon-eks-arm64-node-1.19-v20211003 +* amazon-eks-arm64-node-1.18-v20211003 +* amazon-eks-arm64-node-1.17-v20211003 +* amazon-eks-arm64-node-1.16-v20211003 +* amazon-eks-arm64-node-1.15-v20211003 +* amazon-eks-node-1.20-v20211003 +* amazon-eks-node-1.19-v20211003 +* amazon-eks-node-1.18-v20211003 +* amazon-eks-node-1.17-v20211003 +* amazon-eks-node-1.16-v20211003 +* amazon-eks-node-1.15-v20211003 + +Binaries used to build these AMIs are published: + +s3://amazon-eks/1.20.4/2021-04-12/ +s3://amazon-eks/1.19.6/2021-01-05/ +s3://amazon-eks/1.18.9/2020-11-02/ +s3://amazon-eks/1.17.12/2020-11-02/ +s3://amazon-eks/1.16.15/2020-11-02/ +s3://amazon-eks/1.15.12/2020-11-02/ + +AMI details: + +* kernel: 5.4.144-69.257.amzn2 (1.19 and above), (1.18 and below) +* dockerd: 20.10.7-3.amzn2 +* containerd: 1.4.6-3.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 460.73.01 +* nvidia-container-runtime-hook: 460.73.01 +* SSM agent: 3.0.1124.0 + +Notable changes: + +* Updated version of RunC to 1.0.0-2.amzn2 +* Updated version of Docker to 20.10.7-3.amzn2 +* Updated version of Containerd to 1.4.6-3.amzn2 +* Following CVEs are addressed Docker (CVE-2021-41089, CVE-2021-41091, CVE-2021-41092) and containerd (CVE-2021-41103) + ## AMI Release v20210914 * amazon-eks-gpu-node-1.21-v20210914 diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 73810367a..848f1b00e 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -13,9 +13,9 @@ "kubernetes_version": null, "kubernetes_build_date": null, "kernel_version": "", - "docker_version": "19.03.13ce-1.amzn2", - "containerd_version": "1.4.6-2.amzn2", - "runc_version": "1.0.0-1.amzn2", + "docker_version": "20.10.7-3.amzn2", + "containerd_version": "1.4.6-3.amzn2", + "runc_version": "1.0.0-2.amzn2", "cni_plugin_version": "v0.8.6", "pull_cni_from_github": "true", "source_ami_id": "", From 14e870efa6ea737bdab1346e04c538656164111a Mon Sep 17 00:00:00 2001 From: Sinha Date: Tue, 5 Oct 2021 11:06:05 -0700 Subject: [PATCH 200/621] adding changelog for v20211001 --- CHANGELOG.md | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ae1422143..67c01fc14 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,6 +47,50 @@ Notable changes: * Updated version of Containerd to 1.4.6-3.amzn2 * Following CVEs are addressed Docker (CVE-2021-41089, CVE-2021-41091, CVE-2021-41092) and containerd (CVE-2021-41103) +## AMI Release v20211001 + +* amazon-eks-gpu-node-1.21-v20211001 +* amazon-eks-gpu-node-1.20-v20211001 +* amazon-eks-gpu-node-1.19-v20211001 +* amazon-eks-gpu-node-1.18-v20211001 +* amazon-eks-gpu-node-1.17-v20211001 +* amazon-eks-gpu-node-1.16-v20211001 +* amazon-eks-arm64-node-1.21-v20211001 +* amazon-eks-arm64-node-1.20-v20211001 +* amazon-eks-arm64-node-1.19-v20211001 +* amazon-eks-arm64-node-1.18-v20211001 +* amazon-eks-arm64-node-1.17-v20211001 +* amazon-eks-arm64-node-1.16-v20211001 +* amazon-eks-node-1.21-v20211001 +* amazon-eks-node-1.20-v20211001 +* amazon-eks-node-1.19-v20211001 +* amazon-eks-node-1.18-v20211001 +* amazon-eks-node-1.17-v20211001 +* amazon-eks-node-1.16-v20211001 + +Binaries used to build these AMIs are published: + +s3://amazon-eks/1.20.4/2021-04-12/ +s3://amazon-eks/1.19.6/2021-01-05/ +s3://amazon-eks/1.18.9/2020-11-02/ +s3://amazon-eks/1.17.12/2020-11-02/ +s3://amazon-eks/1.16.15/2020-11-02/ +s3://amazon-eks/1.15.12/2020-11-02/ + +AMI details: + +* kernel: 5.4.144-69.257.amzn2 (1.19 and above), (1.18 and below) +* dockerd: 19.03.13-ce +* containerd: 1.4.6 +* runc: 1.0.0.amzn2 +* cuda: 460.73.01 +* nvidia-container-runtime-hook: 460.73.01 +* SSM agent: 3.0.1124.0 + +Notable changes: +* This release includes the patch for the CA to handle Let's Encrypt Certificate Expiry +* Updating default [containerd socket path](https://github.com/awslabs/amazon-eks-ami/commit/9576786266df8bee08e97c1c7f2d0e2f85752092) + ## AMI Release v20210914 * amazon-eks-gpu-node-1.21-v20210914 From ecb59d0b458f0607b60b36ca324b96f3966e1484 Mon Sep 17 00:00:00 2001 From: Abeer Sethi <38733827+abeer91@users.noreply.github.com> Date: Tue, 5 Oct 2021 13:57:44 -0700 Subject: [PATCH 201/621] Adding Changelog for release v20211004 (#774) --- CHANGELOG.md | 55 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 67c01fc14..3b891b862 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,48 @@ # Changelog +AMI Release v20211004 + +* amazon-eks-gpu-node-1.20-v20211004 +* amazon-eks-gpu-node-1.19-v20211004 +* amazon-eks-gpu-node-1.18-v20211004 +* amazon-eks-gpu-node-1.17-v20211004 +* amazon-eks-gpu-node-1.16-v20211004 +* amazon-eks-gpu-node-1.15-v20211004 +* amazon-eks-arm64-node-1.20-v20211004 +* amazon-eks-arm64-node-1.19-v20211004 +* amazon-eks-arm64-node-1.18-v20211004 +* amazon-eks-arm64-node-1.17-v20211004 +* amazon-eks-arm64-node-1.16-v20211004 +* amazon-eks-arm64-node-1.15-v20211004 +* amazon-eks-node-1.20-v20211004 +* amazon-eks-node-1.19-v20211004 +* amazon-eks-node-1.18-v20211004 +* amazon-eks-node-1.17-v20211004 +* amazon-eks-node-1.16-v20211004 +* amazon-eks-node-1.15-v20211004 + +Binaries used to build these AMIs are published: + +* s3://amazon-eks/1.21.2/2021-04-12/ +* s3://amazon-eks/1.20.7/2021-04-12/ +* s3://amazon-eks/1.19.13/2021-01-05/ +* s3://amazon-eks/1.18.20/2020-11-02/ +* s3://amazon-eks/1.17.17/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ + +AMI details: + +* kernel: 5.4.149-73.259.amzn2 (1.19 and above), 4.14.246-187.474.amzn2 (1.18 and below) +* dockerd: 20.10.7-3.amzn2 +* containerd: 1.4.6-3.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 460.73.01 +* nvidia-container-runtime-hook: 460.73.01 +* SSM agent: 3.0.1124.0 + +Notable changes: +* Created AMI released on the latest commit + ## AMI Release v20211003 * amazon-eks-gpu-node-1.20-v20211003 @@ -23,12 +66,12 @@ Binaries used to build these AMIs are published: -s3://amazon-eks/1.20.4/2021-04-12/ -s3://amazon-eks/1.19.6/2021-01-05/ -s3://amazon-eks/1.18.9/2020-11-02/ -s3://amazon-eks/1.17.12/2020-11-02/ -s3://amazon-eks/1.16.15/2020-11-02/ -s3://amazon-eks/1.15.12/2020-11-02/ +* s3://amazon-eks/1.21.2/2021-04-12/ +* s3://amazon-eks/1.20.7/2021-04-12/ +* s3://amazon-eks/1.19.13/2021-01-05/ +* s3://amazon-eks/1.18.20/2020-11-02/ +* s3://amazon-eks/1.17.17/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ AMI details: From 9298dd9d7a5f255655dc01162167722b8213d68a Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Wed, 6 Oct 2021 09:43:12 -0700 Subject: [PATCH 202/621] adding support for IPv6 node group (#773) Co-authored-by: Sinha --- files/bootstrap.sh | 64 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 5 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 041f7dcf6..1099bb729 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -27,6 +27,8 @@ function print_help { echo "--pause-container-account The AWS account (number) to pull the pause container from" echo "--pause-container-version The tag of the pause container" echo "--container-runtime Specify a container runtime (default: dockerd)" + echo "--ip-family Specify ip family of the cluster" + echo "--service-ipv6-cidr ipv6 cidr range of the cluster" } POSITIONAL=() @@ -93,6 +95,16 @@ while [[ $# -gt 0 ]]; do shift shift ;; + --ip-family) + IP_FAMILY=$2 + shift + shift + ;; + --service-ipv6-cidr) + SERVICE_IPV6_CIDR=$2 + shift + shift + ;; *) # unknown option POSITIONAL+=("$1") # save it in an array for later shift # past argument @@ -116,6 +128,8 @@ API_RETRY_ATTEMPTS="${API_RETRY_ATTEMPTS:-3}" DOCKER_CONFIG_JSON="${DOCKER_CONFIG_JSON:-}" PAUSE_CONTAINER_VERSION="${PAUSE_CONTAINER_VERSION:-3.1-eksbuild.1}" CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-dockerd}" +IP_FAMILY="${IP_FAMILY:-}" +SERVICE_IPV6_CIDR="${SERVICE_IPV6_CIDR:-}" function get_pause_container_account_for_region () { local region="$1" @@ -279,6 +293,25 @@ if [ -z "$CLUSTER_NAME" ]; then exit 1 fi +if [[ ! -z "${IP_FAMILY}" ]]; then + if [[ "${IP_FAMILY}" != "ipv4" ]] && [[ "${IP_FAMILY}" != "ipv6" ]] ; then + echo "Invalid IpFamily. Only ipv4 or ipv6 are allowed" + exit 1 + fi + + if [[ "${IP_FAMILY}" == "ipv6" ]] && [[ ! -z "${B64_CLUSTER_CA}" ]] && [[ ! -z "${APISERVER_ENDPOINT}" ]] && [[ -z "${SERVICE_IPV6_CIDR}" ]]; then + echo "Service Ipv6 Cidr must be provided when ip-family is specified as IPV6" + exit 1 + fi +fi + +if [[ ! -z "${SERVICE_IPV6_CIDR}" ]]; then + if [[ "${IP_FAMILY}" == "ipv4" ]]; then + echo "ip-family should be ipv6 when service-ipv6-cidr is specified" + exit 1 + fi + IP_FAMILY="ipv6" +fi TOKEN=$(get_token) AWS_DEFAULT_REGION=$(get_meta_data 'latest/dynamic/instance-identity/document' | jq .region -r) @@ -317,7 +350,7 @@ if [[ -z "${B64_CLUSTER_CA}" ]] || [[ -z "${APISERVER_ENDPOINT}" ]]; then --region=${AWS_DEFAULT_REGION} \ --name=${CLUSTER_NAME} \ --output=text \ - --query 'cluster.{certificateAuthorityData: certificateAuthority.data, endpoint: endpoint, kubernetesNetworkConfig: kubernetesNetworkConfig.serviceIpv4Cidr}' > $DESCRIBE_CLUSTER_RESULT || rc=$? + --query 'cluster.{certificateAuthorityData: certificateAuthority.data, endpoint: endpoint, serviceIpv4Cidr: kubernetesNetworkConfig.serviceIpv4Cidr, serviceIpv6Cidr: kubernetesNetworkConfig.serviceIpv6Cidr, clusterIpFamily: kubernetesNetworkConfig.ipFamily}' > $DESCRIBE_CLUSTER_RESULT || rc=$? if [[ $rc -eq 0 ]]; then break fi @@ -329,8 +362,19 @@ if [[ -z "${B64_CLUSTER_CA}" ]] || [[ -z "${APISERVER_ENDPOINT}" ]]; then sleep $sleep_sec done B64_CLUSTER_CA=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $1}') - APISERVER_ENDPOINT=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $2}') - SERVICE_IPV4_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $3}') + APISERVER_ENDPOINT=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $3}') + SERVICE_IPV4_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $4}') + SERVICE_IPV6_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $5}') +fi + +if [[ -z "${IP_FAMILY}" ]]; then + IP_FAMILY=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $2}') +fi + +if [[ -z "${IP_FAMILY}" ]] || [[ "${IP_FAMILY}" == "None" ]]; then + ### this can happen when the ifFamily field is not found in describeCluster response + ### or B64_CLUSTER_CA and APISERVER_ENDPOINT are defined but IPFamily isn't + IP_FAMILY="ipv4" fi echo $B64_CLUSTER_CA | base64 -d > $CA_CERTIFICATE_FILE_PATH @@ -340,12 +384,17 @@ sed -i s,MASTER_ENDPOINT,$APISERVER_ENDPOINT,g /var/lib/kubelet/kubeconfig sed -i s,AWS_REGION,$AWS_DEFAULT_REGION,g /var/lib/kubelet/kubeconfig ### kubelet.service configuration +if [[ "${IP_FAMILY}" == "ipv6" ]]; then + DNS_CLUSTER_IP=$(awk -F/ '{print $1}' <<< $SERVICE_IPV6_CIDR)a +fi + +MAC=$(get_meta_data 'latest/meta-data/network/interfaces/macs/' | head -n 1 | sed 's/\/$//') + if [[ -z "${DNS_CLUSTER_IP}" ]]; then if [[ ! -z "${SERVICE_IPV4_CIDR}" ]] && [[ "${SERVICE_IPV4_CIDR}" != "None" ]] ; then #Sets the DNS Cluster IP address that would be chosen from the serviceIpv4Cidr. (x.y.z.10) DNS_CLUSTER_IP=${SERVICE_IPV4_CIDR%.*}.10 else - MAC=$(get_meta_data 'latest/meta-data/network/interfaces/macs/' | head -n 1 | sed 's/\/$//') TEN_RANGE=$(get_meta_data "latest/meta-data/network/interfaces/macs/$MAC/vpc-ipv4-cidr-blocks" | grep -c '^10\..*' || true ) DNS_CLUSTER_IP=10.100.0.10 if [[ "$TEN_RANGE" != "0" ]]; then @@ -359,7 +408,12 @@ fi KUBELET_CONFIG=/etc/kubernetes/kubelet/kubelet-config.json echo "$(jq ".clusterDNS=[\"$DNS_CLUSTER_IP\"]" $KUBELET_CONFIG)" > $KUBELET_CONFIG -INTERNAL_IP=$(get_meta_data 'latest/meta-data/local-ipv4') +if [[ "${IP_FAMILY}" == "ipv4" ]]; then + INTERNAL_IP=$(get_meta_data 'latest/meta-data/local-ipv4') +else + INTERNAL_IP_URI=latest/meta-data/network/interfaces/macs/$MAC/ipv6s + INTERNAL_IP=$(get_meta_data $INTERNAL_IP_URI) +fi INSTANCE_TYPE=$(get_meta_data 'latest/meta-data/instance-type') # Sets kubeReserved and evictionHard in /etc/kubernetes/kubelet/kubelet-config.json for worker nodes. The following two function From ec53e9657510a7ffe57601f403f9df2616734094 Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Wed, 6 Oct 2021 15:35:03 -0700 Subject: [PATCH 203/621] Handling a failure scenario with ipv6 change (#779) * adding support for IPv6 node group * handling a failure scenario with ipv6 change Co-authored-by: Sinha --- files/bootstrap.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 1099bb729..0342e9081 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -365,10 +365,10 @@ if [[ -z "${B64_CLUSTER_CA}" ]] || [[ -z "${APISERVER_ENDPOINT}" ]]; then APISERVER_ENDPOINT=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $3}') SERVICE_IPV4_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $4}') SERVICE_IPV6_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $5}') -fi -if [[ -z "${IP_FAMILY}" ]]; then + if [[ -z "${IP_FAMILY}" ]]; then IP_FAMILY=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $2}') + fi fi if [[ -z "${IP_FAMILY}" ]] || [[ "${IP_FAMILY}" == "None" ]]; then From dbba9499841d3936d285bd2427f90ef0cdd385b3 Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 12 Oct 2021 10:58:52 -0700 Subject: [PATCH 204/621] Updates 1.19+ kubernetes build date in Makefile (#781) --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 5dfaea314..baf3f34f6 100644 --- a/Makefile +++ b/Makefile @@ -57,12 +57,12 @@ k8s: validate .PHONY: 1.19 1.19: - $(MAKE) k8s kubernetes_version=1.19.13 kubernetes_build_date=2021-09-02 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.19.14 kubernetes_build_date=2021-10-12 pull_cni_from_github=true .PHONY: 1.20 1.20: - $(MAKE) k8s kubernetes_version=1.20.7 kubernetes_build_date=2021-09-02 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.20.10 kubernetes_build_date=2021-10-12 pull_cni_from_github=true .PHONY: 1.21 1.21: - $(MAKE) k8s kubernetes_version=1.21.2 kubernetes_build_date=2021-09-02 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.21.4 kubernetes_build_date=2021-10-12 pull_cni_from_github=true From 47abf451de2a24bab7411015463c5f87671f5307 Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 12 Oct 2021 16:50:10 -0700 Subject: [PATCH 205/621] Adds release v20211008 to the CHANGELOG (#783) --- CHANGELOG.md | 49 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b891b862..b4eec0c80 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,53 @@ # Changelog -AMI Release v20211004 +## AMI Release v20211008 + +* amazon-eks-gpu-node-1.21-v20211008 +* amazon-eks-gpu-node-1.20-v20211008 +* amazon-eks-gpu-node-1.19-v20211008 +* amazon-eks-gpu-node-1.18-v20211008 +* amazon-eks-gpu-node-1.17-v20211008 +* amazon-eks-gpu-node-1.16-v20211008 +* amazon-eks-gpu-node-1.15-v20211008 +* amazon-eks-arm64-node-1.21-v20211008 +* amazon-eks-arm64-node-1.20-v20211008 +* amazon-eks-arm64-node-1.19-v20211008 +* amazon-eks-arm64-node-1.18-v20211008 +* amazon-eks-arm64-node-1.17-v20211008 +* amazon-eks-arm64-node-1.16-v20211008 +* amazon-eks-arm64-node-1.15-v20211008 +* amazon-eks-node-1.21-v20211008 +* amazon-eks-node-1.20-v20211008 +* amazon-eks-node-1.19-v20211008 +* amazon-eks-node-1.18-v20211008 +* amazon-eks-node-1.17-v20211008 +* amazon-eks-node-1.16-v20211008 +* amazon-eks-node-1.15-v20211008 + +Binaries used to build these AMIs are published: + +* s3://amazon-eks/1.21.4/2021-10-12/ +* s3://amazon-eks/1.20.10/2021-10-12/ +* s3://amazon-eks/1.19.14/2021-10-12/ +* s3://amazon-eks/1.18.20/2021-09-02/ +* s3://amazon-eks/1.17.17/2021-09-02/ +* s3://amazon-eks/1.16.15/2021-09-02/ + +AMI details: + +* kernel: 5.4.149-73.259.amzn2 (1.19 and above), 4.14.248-189.473.amzn2 (1.18 and below) +* dockerd: 20.10.7-3.amzn2 +* containerd: 1.4.6-3.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 460.73.01 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0 + +Notable changes: + +* kubelet binaries have been updated for Kubernetes versions 1.19, 1.20 and 1.21, which include [a patch to fix an issue where kubelet can fail to unmount volumes](https://github.com/kubernetes/kubernetes/pull/102576) + +## AMI Release v20211004 * amazon-eks-gpu-node-1.20-v20211004 * amazon-eks-gpu-node-1.19-v20211004 From 62894ce55683da84271ea89668265b87c5958d88 Mon Sep 17 00:00:00 2001 From: suket22 Date: Mon, 18 Oct 2021 16:33:52 -0700 Subject: [PATCH 206/621] Adding v20211013 AMIs to the changelog (#786) * Adding v20211013 AMIs to the changelog --- CHANGELOG.md | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b4eec0c80..f2cf1302e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,49 @@ # Changelog +## AMI Release v20211013 + +* amazon-eks-gpu-node-1.21-v20211013 +* amazon-eks-gpu-node-1.20-v20211013 +* amazon-eks-gpu-node-1.19-v20211013 +* amazon-eks-gpu-node-1.18-v20211013 +* amazon-eks-gpu-node-1.17-v20211013 +* amazon-eks-gpu-node-1.16-v20211013 +* amazon-eks-arm64-node-1.21-v20211013 +* amazon-eks-arm64-node-1.20-v20211013 +* amazon-eks-arm64-node-1.19-v20211013 +* amazon-eks-arm64-node-1.18-v20211013 +* amazon-eks-arm64-node-1.17-v20211013 +* amazon-eks-arm64-node-1.16-v20211013 +* amazon-eks-node-1.21-v20211013 +* amazon-eks-node-1.20-v20211013 +* amazon-eks-node-1.19-v20211013 +* amazon-eks-node-1.18-v20211013 +* amazon-eks-node-1.17-v20211013 +* amazon-eks-node-1.16-v20211013 + +Binaries used to build these AMIs are published: + +* s3://amazon-eks/1.21.4/2021-10-12/ +* s3://amazon-eks/1.20.10/2021-10-12/ +* s3://amazon-eks/1.19.14/2021-10-12/ +* s3://amazon-eks/1.18.20/2021-09-02/ +* s3://amazon-eks/1.17.17/2021-09-02/ +* s3://amazon-eks/1.16.15/2021-09-02/ + +AMI details: + +* kernel: 5.4.149-73.259.amzn2 (1.19 and above), 4.14.248-189.473.amzn2 (1.18 and below) +* dockerd: 20.10.7-3.amzn2 +* containerd: 1.4.6-3.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 460.73.01 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0 + +Notable changes: + +* A fix has been made to the GPU AMIs to ensure they work correctly with containerd as the container runtime. + ## AMI Release v20211008 * amazon-eks-gpu-node-1.21-v20211008 From f2e85111a5a89a3179625bdf202037c2449e8d21 Mon Sep 17 00:00:00 2001 From: Theodore Salvo Date: Wed, 20 Oct 2021 17:27:52 -0400 Subject: [PATCH 207/621] Change default output of AWS API call to JSON (#788) --- files/max-pods-calculator.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/max-pods-calculator.sh b/files/max-pods-calculator.sh index 0b9f8e653..bd314ec68 100644 --- a/files/max-pods-calculator.sh +++ b/files/max-pods-calculator.sh @@ -113,7 +113,7 @@ if [[ "$CNI_MAJOR_VERSION" -gt 1 ]] || ([[ "$CNI_MAJOR_VERSION" = 1 ]] && [[ "$C PREFIX_DELEGATION_SUPPORTED=true fi -DESCRIBE_INSTANCES_RESULT=$(aws ec2 describe-instance-types --instance-type $INSTANCE_TYPE --query 'InstanceTypes[0].{Hypervisor: Hypervisor, EniCount: NetworkInfo.MaximumNetworkInterfaces, PodsPerEniCount: NetworkInfo.Ipv4AddressesPerInterface, CpuCount: VCpuInfo.DefaultVCpus'}) +DESCRIBE_INSTANCES_RESULT=$(aws ec2 describe-instance-types --instance-type $INSTANCE_TYPE --query 'InstanceTypes[0].{Hypervisor: Hypervisor, EniCount: NetworkInfo.MaximumNetworkInterfaces, PodsPerEniCount: NetworkInfo.Ipv4AddressesPerInterface, CpuCount: VCpuInfo.DefaultVCpus'} --output json) HYPERVISOR_TYPE=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.Hypervisor' ) IS_NITRO=false From 0f0882bed357c98238f32f8511061458c6936b44 Mon Sep 17 00:00:00 2001 From: Kausheel Kumar Date: Sat, 23 Oct 2021 08:30:20 +1100 Subject: [PATCH 208/621] Include Multus CNI logs in EKS Logs Collector script (#721) * Include Multus CNI logs * Updated wording in script output --- log-collector-script/linux/eks-log-collector.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 51c720b8d..84af9bc17 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -253,6 +253,7 @@ collect() { get_docker_info get_k8s_info get_ipamd_info + get_multus_info get_sysctls_info get_networking_info get_cni_config @@ -449,6 +450,13 @@ get_ipamd_info() { ok } +get_multus_info() { + try "collect Multus logs if they exist" + cp --force --dereference --recursive /var/log/pods/kube-system_kube-multus* "${COLLECT_DIR}"/var_log/ 2>/dev/null + + ok +} + get_sysctls_info() { try "collect sysctls information" # dump all sysctls From e9141f390d7f5b4cb5397c8a1630dbd42ebedf2c Mon Sep 17 00:00:00 2001 From: Andy Spiers Date: Tue, 2 Nov 2021 21:04:58 +0000 Subject: [PATCH 209/621] Include max-pods-calculator.sh in image (#792) --- scripts/install-worker.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index ccb4a6441..4a4705281 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -272,6 +272,8 @@ sudo mkdir -p /etc/eks sudo mv $TEMPLATE_DIR/eni-max-pods.txt /etc/eks/eni-max-pods.txt sudo mv $TEMPLATE_DIR/bootstrap.sh /etc/eks/bootstrap.sh sudo chmod +x /etc/eks/bootstrap.sh +sudo mv $TEMPLATE_DIR/max-pods-calculator.sh /etc/eks/max-pods-calculator.sh +sudo chmod +x /etc/eks/max-pods-calculator.sh SONOBUOY_E2E_REGISTRY="${SONOBUOY_E2E_REGISTRY:-}" if [[ -n "$SONOBUOY_E2E_REGISTRY" ]]; then From ff11634b5c343be3de84f1d7a307c571fd1099b0 Mon Sep 17 00:00:00 2001 From: suket22 Date: Wed, 3 Nov 2021 10:17:30 -0700 Subject: [PATCH 210/621] Update maxPods for instanceTypes c6i, vt1 and dl1 (#796) --- files/eni-max-pods.txt | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index f5d1b531f..74ffa3ce3 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2021-08-31T18:22:52Z +# This file was generated at 2021-11-03T09:55:58-07:00 # # Mapping is calculated from AWS EC2 API using the following formula: # * First IP on each ENI is not used for pods @@ -108,6 +108,15 @@ c6gn.8xlarge 234 c6gn.large 29 c6gn.medium 8 c6gn.xlarge 58 +c6i.12xlarge 234 +c6i.16xlarge 737 +c6i.24xlarge 737 +c6i.2xlarge 58 +c6i.32xlarge 737 +c6i.4xlarge 234 +c6i.8xlarge 234 +c6i.large 29 +c6i.xlarge 58 cc2.8xlarge 234 cr1.8xlarge 234 d2.2xlarge 58 @@ -124,6 +133,7 @@ d3en.4xlarge 38 d3en.6xlarge 58 d3en.8xlarge 78 d3en.xlarge 10 +dl1.24xlarge 2942 f1.16xlarge 394 f1.2xlarge 58 f1.4xlarge 234 @@ -413,6 +423,9 @@ u-6tb1.56xlarge 737 u-6tb1.metal 147 u-9tb1.112xlarge 737 u-9tb1.metal 147 +vt1.24xlarge 737 +vt1.3xlarge 58 +vt1.6xlarge 234 x1.16xlarge 234 x1.32xlarge 234 x1e.16xlarge 234 From 216a53b7650313ec02e06bb42e9e4efc329396f6 Mon Sep 17 00:00:00 2001 From: Visuna Date: Fri, 5 Nov 2021 14:46:25 -0700 Subject: [PATCH 211/621] Disable weak ciphers (#798) Co-authored-by: Qingqing Li --- scripts/install-worker.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 4a4705281..a057d528c 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -92,6 +92,14 @@ else echo "tsc as a clock source is not applicable, skipping." fi +################################################################################ +### SSH ######################################################################## +################################################################################ + +# Disable weak ciphers +echo -e "\nCiphers chacha20-poly1305@openssh.com,aes128-ctr,aes256-ctr,aes128-gcm@openssh.com,aes256-gcm@openssh.com" | sudo tee -a /etc/ssh/sshd_config +sudo systemctl restart sshd.service + ################################################################################ ### iptables ################################################################### ################################################################################ From 830cd373a16f737e839aebf3bab8ff1094e200a1 Mon Sep 17 00:00:00 2001 From: Visuna Date: Wed, 10 Nov 2021 17:03:01 -0800 Subject: [PATCH 212/621] Update build directories (#801) ### Testing make 1.19 make 1.20 make 1.21 --- Makefile | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index baf3f34f6..ad756df6f 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.16 1.17 1.18 1.19 1.20 1.21 +all: 1.17 1.18 1.19 1.20 1.21 .PHONY: validate validate: @@ -42,11 +42,6 @@ k8s: validate # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html - -.PHONY: 1.16 -1.16: - $(MAKE) k8s kubernetes_version=1.16.15 kubernetes_build_date=2021-09-02 pull_cni_from_github=true - .PHONY: 1.17 1.17: $(MAKE) k8s kubernetes_version=1.17.17 kubernetes_build_date=2021-09-02 pull_cni_from_github=true @@ -57,12 +52,12 @@ k8s: validate .PHONY: 1.19 1.19: - $(MAKE) k8s kubernetes_version=1.19.14 kubernetes_build_date=2021-10-12 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.19.15 kubernetes_build_date=2021-11-10 pull_cni_from_github=true .PHONY: 1.20 1.20: - $(MAKE) k8s kubernetes_version=1.20.10 kubernetes_build_date=2021-10-12 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.20.11 kubernetes_build_date=2021-11-10 pull_cni_from_github=true .PHONY: 1.21 1.21: - $(MAKE) k8s kubernetes_version=1.21.4 kubernetes_build_date=2021-10-12 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.21.5 kubernetes_build_date=2021-11-10 pull_cni_from_github=true From e25679c227aa6f7ce2f410283f21725a5e8eb000 Mon Sep 17 00:00:00 2001 From: Visuna Date: Thu, 11 Nov 2021 16:22:20 -0800 Subject: [PATCH 213/621] Update CHANGELOG.md (#804) --- CHANGELOG.md | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f2cf1302e..cd4a0d18b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,52 @@ # Changelog +### AMI Release v20211109 +* amazon-eks-gpu-node-1.21-v20211109 +* amazon-eks-gpu-node-1.20-v20211109 +* amazon-eks-gpu-node-1.19-v20211109 +* amazon-eks-gpu-node-1.18-v20211109 +* amazon-eks-gpu-node-1.17-v20211109 +* amazon-eks-arm64-node-1.21-v20211109 +* amazon-eks-arm64-node-1.20-v20211109 +* amazon-eks-arm64-node-1.19-v20211109 +* amazon-eks-arm64-node-1.18-v20211109 +* amazon-eks-arm64-node-1.17-v20211109 +* amazon-eks-node-1.21-v20211109 +* amazon-eks-node-1.20-v20211109 +* amazon-eks-node-1.19-v20211109 +* amazon-eks-node-1.18-v20211109 +* amazon-eks-node-1.17-v20211109 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2020-09-02/ +* s3://amazon-eks/1.17.17/2020-09-02/ + +AMI details: +* kernel: 5.4.149-73.259.amzn2 (1.19 and above), 4.14.252-195.483.amzn2 (1.18 and below) +* dockerd: 20.10.7-3.amzn2 +* containerd: 1.4.6-3.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 470.57.02 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0 + +Notable changes: +* Upgrade kernel version for 1.17 and 1.18 to 4.14.252-195.483.amzn2 +* Upgrade cuda version from 460.73.01 to 470.57.02 +* Upgrade kubelet version + * 1.19.14 -> 1.19.15 + * 1.20.10 -> 1.20.11 + * 1.21.4 -> 1.21.5 +* Remove cbc ciphers and use following recommended ciphers + * chacha20-poly1305@openssh.com + * aes128-ctr + * aes256-ctr + * aes128-gcm@openssh.com + * aes256-gcm@openssh.com + ## AMI Release v20211013 * amazon-eks-gpu-node-1.21-v20211013 From 4540137497170115ce79652c45ac662049201720 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 17 Nov 2021 14:11:09 -0800 Subject: [PATCH 214/621] Adds g5 instances to max-pods file (#808) --- files/eni-max-pods.txt | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 74ffa3ce3..67006dd66 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2021-11-03T09:55:58-07:00 +# This file was generated at 2021-11-17T11:55:40-08:00 # # Mapping is calculated from AWS EC2 API using the following formula: # * First IP on each ENI is not used for pods @@ -155,6 +155,14 @@ g4dn.4xlarge 29 g4dn.8xlarge 58 g4dn.metal 737 g4dn.xlarge 29 +g5.12xlarge 737 +g5.16xlarge 234 +g5.24xlarge 737 +g5.2xlarge 58 +g5.48xlarge 737 +g5.4xlarge 234 +g5.8xlarge 234 +g5.xlarge 58 h1.16xlarge 737 h1.2xlarge 58 h1.4xlarge 234 From 02a380b3a69eba9b798632d126c7e436bcb28b2a Mon Sep 17 00:00:00 2001 From: Saurav Agarwalla Date: Fri, 19 Nov 2021 12:21:24 -0800 Subject: [PATCH 215/621] Update CHANGELOG for v20211117 release (#810) Update CHANGELOG for v20211117 release --- CHANGELOG.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cd4a0d18b..9f4eb89e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,41 @@ # Changelog +### AMI Release v20211117 +* amazon-eks-gpu-node-1.21-v20211117 +* amazon-eks-gpu-node-1.20-v20211117 +* amazon-eks-gpu-node-1.19-v20211117 +* amazon-eks-gpu-node-1.18-v20211117 +* amazon-eks-gpu-node-1.17-v20211117 +* amazon-eks-arm64-node-1.21-v20211117 +* amazon-eks-arm64-node-1.20-v20211117 +* amazon-eks-arm64-node-1.19-v20211117 +* amazon-eks-arm64-node-1.18-v20211117 +* amazon-eks-arm64-node-1.17-v20211117 +* amazon-eks-node-1.21-v20211117 +* amazon-eks-node-1.20-v20211117 +* amazon-eks-node-1.19-v20211117 +* amazon-eks-node-1.18-v20211117 +* amazon-eks-node-1.17-v20211117 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2020-09-02/ +* s3://amazon-eks/1.17.17/2020-09-02/ + +AMI details: +* kernel: 5.4.156-83.273.amzn2 (1.19 and above), 4.14.252-195.483.amzn2 (1.18 and below) +* dockerd: 20.10.7-5.amzn2 +* containerd: 1.4.6-7.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 470.57.02 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0-1.amzn2 + +Notable changes: +Update `containerd` to `1.4.6-7.amzn2` and `docker` to `20.10.7-5.amzn2` to patch vulnerabilities in [CVE-2021-41190](https://alas.aws.amazon.com/ALAS-2021-1551.html) + ### AMI Release v20211109 * amazon-eks-gpu-node-1.21-v20211109 * amazon-eks-gpu-node-1.20-v20211109 From b209f51c82174ee9c3eee90a714276378f390db4 Mon Sep 17 00:00:00 2001 From: Saurav Agarwalla Date: Fri, 19 Nov 2021 12:52:10 -0800 Subject: [PATCH 216/621] Update docker and containerd versions (#811) This is necessary to fix vulnerabilities in [CVE-2021-41190](https://alas.aws.amazon.com/ALAS-2021-1551.html) --- eks-worker-al2.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 848f1b00e..178f4941d 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -13,8 +13,8 @@ "kubernetes_version": null, "kubernetes_build_date": null, "kernel_version": "", - "docker_version": "20.10.7-3.amzn2", - "containerd_version": "1.4.6-3.amzn2", + "docker_version": "20.10.7-5.amzn2", + "containerd_version": "1.4.6-7.amzn2", "runc_version": "1.0.0-2.amzn2", "cni_plugin_version": "v0.8.6", "pull_cni_from_github": "true", From cf97438d38cc6d593eb298660793360de5aced65 Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 22 Nov 2021 16:28:12 -0800 Subject: [PATCH 217/621] Removes instance type validation in CFN template (#815) --- amazon-eks-nodegroup.yaml | 407 -------------------------------------- 1 file changed, 407 deletions(-) diff --git a/amazon-eks-nodegroup.yaml b/amazon-eks-nodegroup.yaml index 630551546..9b8cfbd65 100644 --- a/amazon-eks-nodegroup.yaml +++ b/amazon-eks-nodegroup.yaml @@ -87,413 +87,6 @@ Parameters: NodeInstanceType: Type: String Default: t3.medium - AllowedValues: - - a1.2xlarge - - a1.4xlarge - - a1.large - - a1.medium - - a1.metal - - a1.xlarge - - c1.medium - - c1.xlarge - - c3.2xlarge - - c3.4xlarge - - c3.8xlarge - - c3.large - - c3.xlarge - - c4.2xlarge - - c4.4xlarge - - c4.8xlarge - - c4.large - - c4.xlarge - - c5.12xlarge - - c5.18xlarge - - c5.24xlarge - - c5.2xlarge - - c5.4xlarge - - c5.9xlarge - - c5.large - - c5.metal - - c5.xlarge - - c5a.12xlarge - - c5a.16xlarge - - c5a.24xlarge - - c5a.2xlarge - - c5a.4xlarge - - c5a.8xlarge - - c5a.large - - c5a.metal - - c5a.xlarge - - c5ad.12xlarge - - c5ad.16xlarge - - c5ad.24xlarge - - c5ad.2xlarge - - c5ad.4xlarge - - c5ad.8xlarge - - c5ad.large - - c5ad.metal - - c5ad.xlarge - - c5d.12xlarge - - c5d.18xlarge - - c5d.24xlarge - - c5d.2xlarge - - c5d.4xlarge - - c5d.9xlarge - - c5d.large - - c5d.metal - - c5d.xlarge - - c5n.18xlarge - - c5n.2xlarge - - c5n.4xlarge - - c5n.9xlarge - - c5n.large - - c5n.metal - - c5n.xlarge - - c6g.12xlarge - - c6g.16xlarge - - c6g.2xlarge - - c6g.4xlarge - - c6g.8xlarge - - c6g.large - - c6g.medium - - c6g.metal - - c6g.xlarge - - c6gd.12xlarge - - c6gd.16xlarge - - c6gd.2xlarge - - c6gd.4xlarge - - c6gd.8xlarge - - c6gd.large - - c6gd.medium - - c6gd.metal - - c6gd.xlarge - - c6gn.12xlarge - - c6gn.16xlarge - - c6gn.2xlarge - - c6gn.4xlarge - - c6gn.8xlarge - - c6gn.large - - c6gn.medium - - c6gn.xlarge - - cc2.8xlarge - - cr1.8xlarge - - d2.2xlarge - - d2.4xlarge - - d2.8xlarge - - d2.xlarge - - d3.2xlarge - - d3.4xlarge - - d3.8xlarge - - d3.xlarge - - d3en.12xlarge - - d3en.2xlarge - - d3en.4xlarge - - d3en.6xlarge - - d3en.8xlarge - - d3en.xlarge - - f1.16xlarge - - f1.2xlarge - - f1.4xlarge - - g2.2xlarge - - g2.8xlarge - - g3.16xlarge - - g3.4xlarge - - g3.8xlarge - - g3s.xlarge - - g4ad.16xlarge - - g4ad.4xlarge - - g4ad.8xlarge - - g4dn.12xlarge - - g4dn.16xlarge - - g4dn.2xlarge - - g4dn.4xlarge - - g4dn.8xlarge - - g4dn.metal - - g4dn.xlarge - - h1.16xlarge - - h1.2xlarge - - h1.4xlarge - - h1.8xlarge - - hs1.8xlarge - - i2.2xlarge - - i2.4xlarge - - i2.8xlarge - - i2.xlarge - - i3.16xlarge - - i3.2xlarge - - i3.4xlarge - - i3.8xlarge - - i3.large - - i3.metal - - i3.xlarge - - i3en.12xlarge - - i3en.24xlarge - - i3en.2xlarge - - i3en.3xlarge - - i3en.6xlarge - - i3en.large - - i3en.metal - - i3en.xlarge - - inf1.24xlarge - - inf1.2xlarge - - inf1.6xlarge - - inf1.xlarge - - m1.large - - m1.medium - - m1.small - - m1.xlarge - - m2.2xlarge - - m2.4xlarge - - m2.xlarge - - m3.2xlarge - - m3.large - - m3.medium - - m3.xlarge - - m4.10xlarge - - m4.16xlarge - - m4.2xlarge - - m4.4xlarge - - m4.large - - m4.xlarge - - m5.12xlarge - - m5.16xlarge - - m5.24xlarge - - m5.2xlarge - - m5.4xlarge - - m5.8xlarge - - m5.large - - m5.metal - - m5.xlarge - - m5a.12xlarge - - m5a.16xlarge - - m5a.24xlarge - - m5a.2xlarge - - m5a.4xlarge - - m5a.8xlarge - - m5a.large - - m5a.xlarge - - m5ad.12xlarge - - m5ad.16xlarge - - m5ad.24xlarge - - m5ad.2xlarge - - m5ad.4xlarge - - m5ad.8xlarge - - m5ad.large - - m5ad.xlarge - - m5d.12xlarge - - m5d.16xlarge - - m5d.24xlarge - - m5d.2xlarge - - m5d.4xlarge - - m5d.8xlarge - - m5d.large - - m5d.metal - - m5d.xlarge - - m5dn.12xlarge - - m5dn.16xlarge - - m5dn.24xlarge - - m5dn.2xlarge - - m5dn.4xlarge - - m5dn.8xlarge - - m5dn.large - - m5dn.metal - - m5dn.xlarge - - m5n.12xlarge - - m5n.16xlarge - - m5n.24xlarge - - m5n.2xlarge - - m5n.4xlarge - - m5n.8xlarge - - m5n.large - - m5n.metal - - m5n.xlarge - - m5zn.12xlarge - - m5zn.2xlarge - - m5zn.3xlarge - - m5zn.6xlarge - - m5zn.large - - m5zn.metal - - m5zn.xlarge - - m6g.12xlarge - - m6g.16xlarge - - m6g.2xlarge - - m6g.4xlarge - - m6g.8xlarge - - m6g.large - - m6g.medium - - m6g.metal - - m6g.xlarge - - m6gd.12xlarge - - m6gd.16xlarge - - m6gd.2xlarge - - m6gd.4xlarge - - m6gd.8xlarge - - m6gd.large - - m6gd.medium - - m6gd.metal - - m6gd.xlarge - - mac1.metal - - p2.16xlarge - - p2.8xlarge - - p2.xlarge - - p3.16xlarge - - p3.2xlarge - - p3.8xlarge - - p3dn.24xlarge - - p4d.24xlarge - - r3.2xlarge - - r3.4xlarge - - r3.8xlarge - - r3.large - - r3.xlarge - - r4.16xlarge - - r4.2xlarge - - r4.4xlarge - - r4.8xlarge - - r4.large - - r4.xlarge - - r5.12xlarge - - r5.16xlarge - - r5.24xlarge - - r5.2xlarge - - r5.4xlarge - - r5.8xlarge - - r5.large - - r5.metal - - r5.xlarge - - r5a.12xlarge - - r5a.16xlarge - - r5a.24xlarge - - r5a.2xlarge - - r5a.4xlarge - - r5a.8xlarge - - r5a.large - - r5a.xlarge - - r5ad.12xlarge - - r5ad.16xlarge - - r5ad.24xlarge - - r5ad.2xlarge - - r5ad.4xlarge - - r5ad.8xlarge - - r5ad.large - - r5ad.xlarge - - r5b.12xlarge - - r5b.16xlarge - - r5b.24xlarge - - r5b.2xlarge - - r5b.4xlarge - - r5b.8xlarge - - r5b.large - - r5b.metal - - r5b.xlarge - - r5d.12xlarge - - r5d.16xlarge - - r5d.24xlarge - - r5d.2xlarge - - r5d.4xlarge - - r5d.8xlarge - - r5d.large - - r5d.metal - - r5d.xlarge - - r5dn.12xlarge - - r5dn.16xlarge - - r5dn.24xlarge - - r5dn.2xlarge - - r5dn.4xlarge - - r5dn.8xlarge - - r5dn.large - - r5dn.metal - - r5dn.xlarge - - r5n.12xlarge - - r5n.16xlarge - - r5n.24xlarge - - r5n.2xlarge - - r5n.4xlarge - - r5n.8xlarge - - r5n.large - - r5n.metal - - r5n.xlarge - - r6g.12xlarge - - r6g.16xlarge - - r6g.2xlarge - - r6g.4xlarge - - r6g.8xlarge - - r6g.large - - r6g.medium - - r6g.metal - - r6g.xlarge - - r6gd.12xlarge - - r6gd.16xlarge - - r6gd.2xlarge - - r6gd.4xlarge - - r6gd.8xlarge - - r6gd.large - - r6gd.medium - - r6gd.metal - - r6gd.xlarge - - t1.micro - - t2.2xlarge - - t2.large - - t2.medium - - t2.micro - - t2.nano - - t2.small - - t2.xlarge - - t3.2xlarge - - t3.large - - t3.medium - - t3.micro - - t3.nano - - t3.small - - t3.xlarge - - t3a.2xlarge - - t3a.large - - t3a.medium - - t3a.micro - - t3a.nano - - t3a.small - - t3a.xlarge - - t4g.2xlarge - - t4g.large - - t4g.medium - - t4g.micro - - t4g.nano - - t4g.small - - t4g.xlarge - - u-12tb1.112xlarge - - u-12tb1.metal - - u-18tb1.metal - - u-24tb1.metal - - u-6tb1.112xlarge - - u-6tb1.56xlarge - - u-6tb1.metal - - u-9tb1.112xlarge - - u-9tb1.metal - - x1.16xlarge - - x1.32xlarge - - x1e.16xlarge - - x1e.2xlarge - - x1e.32xlarge - - x1e.4xlarge - - x1e.8xlarge - - x1e.xlarge - - x2gd.12xlarge - - x2gd.16xlarge - - x2gd.2xlarge - - x2gd.4xlarge - - x2gd.8xlarge - - x2gd.large - - x2gd.medium - - x2gd.metal - - x2gd.xlarge - - z1d.12xlarge - - z1d.2xlarge - - z1d.3xlarge - - z1d.6xlarge - - z1d.large - - z1d.metal - - z1d.xlarge - ConstraintDescription: Must be a valid EC2 instance type Description: EC2 instance type for the node instances NodeVolumeSize: From ea46f6bb2604138b375ec53fb8d41c64e1d2de75 Mon Sep 17 00:00:00 2001 From: Carter McKinnon Date: Tue, 23 Nov 2021 13:03:57 -0800 Subject: [PATCH 218/621] Remove 1.17. Learn more about Kubernetes version support for EKS at https://docs.aws.amazon.com/eks/latest/userguide/kubernetes-versions.html (#818) --- Makefile | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Makefile b/Makefile index ad756df6f..127e1e205 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.17 1.18 1.19 1.20 1.21 +all: 1.18 1.19 1.20 1.21 .PHONY: validate validate: @@ -42,10 +42,6 @@ k8s: validate # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html -.PHONY: 1.17 -1.17: - $(MAKE) k8s kubernetes_version=1.17.17 kubernetes_build_date=2021-09-02 pull_cni_from_github=true - .PHONY: 1.18 1.18: $(MAKE) k8s kubernetes_version=1.18.20 kubernetes_build_date=2021-09-02 pull_cni_from_github=true From 015e9947782928fd3c89e1ea3d54f5f07f6b72cc Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 23 Nov 2021 16:52:57 -0800 Subject: [PATCH 219/621] Adds basic uname validation based on kernel version passed in (#819) --- eks-worker-al2.json | 5 ++++- scripts/validate.sh | 11 +++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 178f4941d..18d829ddb 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -163,7 +163,10 @@ { "type": "shell", "remote_folder": "{{ user `remote_folder`}}", - "script": "{{template_dir}}/scripts/validate.sh" + "script": "{{template_dir}}/scripts/validate.sh", + "environment_vars": [ + "KERNEL_VERSION={{user `kernel_version`}}" + ] } ], "post-processors": [ diff --git a/scripts/validate.sh b/scripts/validate.sh index cc6e22e8e..02ab54326 100644 --- a/scripts/validate.sh +++ b/scripts/validate.sh @@ -34,3 +34,14 @@ validate_file_nonexists '/var/log/cloud-init-output.log' validate_file_nonexists '/var/log/cloud-init.log' validate_file_nonexists '/var/log/secure' validate_file_nonexists '/var/log/wtmp' + +actual_kernel=$(uname -r) +echo "Verifying that kernel version $actual_kernel matches $KERNEL_VERSION" + +if [[ $actual_kernel == $KERNEL_VERSION* ]] +then + echo "Kernel matches expected version" +else + echo "Kernel does not match expected version." + exit 1 +fi From 6dd313c6c61e5b1ffbbf8f3c739466452026c615 Mon Sep 17 00:00:00 2001 From: Sergio Ballesteros Date: Mon, 6 Dec 2021 17:44:41 +0100 Subject: [PATCH 220/621] Update eni-max-pods.txt (#822) --- files/eni-max-pods.txt | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 67006dd66..fd1feda3b 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2021-11-17T11:55:40-08:00 +# This file was generated at 2021-11-30T17:52:24+01:00 # # Mapping is calculated from AWS EC2 API using the following formula: # * First IP on each ENI is not used for pods @@ -116,6 +116,7 @@ c6i.32xlarge 737 c6i.4xlarge 234 c6i.8xlarge 234 c6i.large 29 +c6i.metal 737 c6i.xlarge 58 cc2.8xlarge 234 cr1.8xlarge 234 @@ -133,7 +134,6 @@ d3en.4xlarge 38 d3en.6xlarge 58 d3en.8xlarge 78 d3en.xlarge 10 -dl1.24xlarge 2942 f1.16xlarge 394 f1.2xlarge 58 f1.4xlarge 234 @@ -187,10 +187,22 @@ i3en.6xlarge 234 i3en.large 29 i3en.metal 737 i3en.xlarge 58 +im4gn.16xlarge 737 +im4gn.2xlarge 58 +im4gn.4xlarge 234 +im4gn.8xlarge 234 +im4gn.large 29 +im4gn.xlarge 58 inf1.24xlarge 321 inf1.2xlarge 38 inf1.6xlarge 234 inf1.xlarge 38 +is4gen.2xlarge 58 +is4gen.4xlarge 234 +is4gen.8xlarge 234 +is4gen.large 29 +is4gen.medium 8 +is4gen.xlarge 58 m1.large 29 m1.medium 12 m1.small 8 @@ -258,7 +270,6 @@ m5n.2xlarge 58 m5n.4xlarge 234 m5n.8xlarge 234 m5n.large 29 -m5n.metal 737 m5n.xlarge 58 m5zn.12xlarge 737 m5zn.2xlarge 58 @@ -267,6 +278,16 @@ m5zn.6xlarge 234 m5zn.large 29 m5zn.metal 737 m5zn.xlarge 58 +m6a.12xlarge 234 +m6a.16xlarge 737 +m6a.24xlarge 737 +m6a.2xlarge 58 +m6a.32xlarge 737 +m6a.48xlarge 737 +m6a.4xlarge 234 +m6a.8xlarge 234 +m6a.large 29 +m6a.xlarge 58 m6g.12xlarge 234 m6g.16xlarge 737 m6g.2xlarge 58 @@ -293,6 +314,7 @@ m6i.32xlarge 737 m6i.4xlarge 234 m6i.8xlarge 234 m6i.large 29 +m6i.metal 737 m6i.xlarge 58 mac1.metal 234 p2.16xlarge 234 @@ -393,6 +415,16 @@ r6gd.large 29 r6gd.medium 8 r6gd.metal 737 r6gd.xlarge 58 +r6i.12xlarge 234 +r6i.16xlarge 737 +r6i.24xlarge 737 +r6i.2xlarge 58 +r6i.32xlarge 737 +r6i.4xlarge 234 +r6i.8xlarge 234 +r6i.large 29 +r6i.metal 737 +r6i.xlarge 58 t1.micro 4 t2.2xlarge 44 t2.large 35 From e68b46fabe46944cdb0c71c1834d46ffde1dfcd5 Mon Sep 17 00:00:00 2001 From: suket22 Date: Mon, 6 Dec 2021 10:38:38 -0800 Subject: [PATCH 221/621] Adds maxPods for g5g, m5n.metal and dl1.24xlarge (#826) * Adds maxPods for g5g, m5n.metal and dl1.24xlarge * Fix dl1.24xlarge max pods --- files/eni-max-pods.txt | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index fd1feda3b..93dbf6646 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2021-11-30T17:52:24+01:00 +# This file was generated at 2021-12-06T09:52:52-08:00 # # Mapping is calculated from AWS EC2 API using the following formula: # * First IP on each ENI is not used for pods @@ -134,6 +134,7 @@ d3en.4xlarge 38 d3en.6xlarge 58 d3en.8xlarge 78 d3en.xlarge 10 +dl1.24xlarge 737 f1.16xlarge 394 f1.2xlarge 58 f1.4xlarge 234 @@ -163,6 +164,12 @@ g5.48xlarge 737 g5.4xlarge 234 g5.8xlarge 234 g5.xlarge 58 +g5g.16xlarge 737 +g5g.2xlarge 58 +g5g.4xlarge 234 +g5g.8xlarge 234 +g5g.metal 737 +g5g.xlarge 58 h1.16xlarge 737 h1.2xlarge 58 h1.4xlarge 234 @@ -270,6 +277,7 @@ m5n.2xlarge 58 m5n.4xlarge 234 m5n.8xlarge 234 m5n.large 29 +m5n.metal 737 m5n.xlarge 58 m5zn.12xlarge 737 m5zn.2xlarge 58 From 077fb181f47347f58faacceabc47e105029c8fda Mon Sep 17 00:00:00 2001 From: Carter McKinnon Date: Mon, 6 Dec 2021 13:52:24 -0800 Subject: [PATCH 222/621] Use default containerd address. (#823) --- files/pull-sandbox-image.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/pull-sandbox-image.sh b/files/pull-sandbox-image.sh index 02f9c7040..588f155f2 100644 --- a/files/pull-sandbox-image.sh +++ b/files/pull-sandbox-image.sh @@ -13,7 +13,7 @@ for attempt in `seq 0 $API_RETRY_ATTEMPTS`; do fi ### pull sandbox image from ecr ### username will always be constant i.e; AWS - sudo ctr --address=/run/dockershim.sock --namespace k8s.io image pull $sandbox_image --user AWS:$ecr_password + sudo ctr --namespace k8s.io image pull $sandbox_image --user AWS:$ecr_password rc=$?; if [[ $rc -eq 0 ]]; then break From d2d40a9b891d056a719109bfbefa09fdd628f27e Mon Sep 17 00:00:00 2001 From: suket22 Date: Wed, 8 Dec 2021 15:13:06 -0800 Subject: [PATCH 223/621] Update Changelog for v20211206 AMIs (#827) --- CHANGELOG.md | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f4eb89e6..0cc2171f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,38 @@ # Changelog +### AMI Release v20211206 +* amazon-eks-gpu-node-1.21-v20211206 +* amazon-eks-gpu-node-1.20-v20211206 +* amazon-eks-gpu-node-1.19-v20211206 +* amazon-eks-gpu-node-1.18-v20211206 +* amazon-eks-arm64-node-1.21-v20211206 +* amazon-eks-arm64-node-1.20-v20211206 +* amazon-eks-arm64-node-1.19-v20211206 +* amazon-eks-arm64-node-1.18-v20211206 +* amazon-eks-node-1.21-v20211206 +* amazon-eks-node-1.20-v20211206 +* amazon-eks-node-1.19-v20211206 +* amazon-eks-node-1.18-v20211206 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2020-09-02/ + +AMI details: +* kernel: 5.4.156-83.273.amzn2 (1.19 and above), 4.14.252-195.483.amzn2 (1.18 and below) +* dockerd: 20.10.7-5.amzn2 +* containerd: 1.4.6-7.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 470.57.02 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0-1.amzn2 + +Notable changes: +* Adds new instanceTypes to the eni-max-pods.txt file. +* Patch for [AL2/ALAS-2021-1722](https://alas.aws.amazon.com/AL2/ALAS-2021-1722.html). + ### AMI Release v20211117 * amazon-eks-gpu-node-1.21-v20211117 * amazon-eks-gpu-node-1.20-v20211117 From 2d3e6c1ba6c1afcc4dec22f8b71216b6dc2aa86c Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Tue, 11 Jan 2022 17:09:51 -0800 Subject: [PATCH 224/621] lowering ip-family case before comparison (#838) Co-authored-by: Sinha --- files/bootstrap.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 0342e9081..8a9391bf7 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -294,6 +294,7 @@ if [ -z "$CLUSTER_NAME" ]; then fi if [[ ! -z "${IP_FAMILY}" ]]; then + IP_FAMILY="$(tr [A-Z] [a-z] <<< "$IP_FAMILY")" if [[ "${IP_FAMILY}" != "ipv4" ]] && [[ "${IP_FAMILY}" != "ipv6" ]] ; then echo "Invalid IpFamily. Only ipv4 or ipv6 are allowed" exit 1 From 52c8b01ad7fe55b825a4072111eac5f1d918124c Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Wed, 12 Jan 2022 18:41:02 -0800 Subject: [PATCH 225/621] adding changelog for v20220112 (#843) Co-authored-by: Sinha --- CHANGELOG.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0cc2171f3..3c4fa1cca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,39 @@ # Changelog +### AMI Release v20220112 +* amazon-eks-gpu-node-1.21-v20220112 +* amazon-eks-gpu-node-1.20-v20220112 +* amazon-eks-gpu-node-1.19-v20220112 +* amazon-eks-gpu-node-1.18-v20220112 +* amazon-eks-arm64-node-1.21-v20220112 +* amazon-eks-arm64-node-1.20-v20220112 +* amazon-eks-arm64-node-1.19-v20220112 +* amazon-eks-arm64-node-1.18-v20220112 +* amazon-eks-node-1.21-v20220112 +* amazon-eks-node-1.20-v20220112 +* amazon-eks-node-1.19-v20220112 +* amazon-eks-node-1.18-v20220112 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2020-09-02/ + +AMI details: +* kernel: 5.4.162-86.275.amzn2 (1.19 and above), 4.14.256-197.484.amzn2 (1.18 and below) +* dockerd: 20.10.7-5.amzn2 +* containerd: 1.4.6-7.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 470.57.02 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0-1.amzn2 + +Notable changes: +* Updating aws-cli ( aws-cli/1.22.32 ). Latest CLI is installed using the recommended steps [here](https://docs.aws.amazon.com/cli/v1/userguide/install-linux.html#install-linux-bundled). This change is specific to this AMI release. +* Added fix to handle failures when serviceIpv6Cidr isn't provided. Related issue: https://github.com/awslabs/amazon-eks-ami/issues/839. +* Added fix to make ipFamily check case-insensitive + ### AMI Release v20211206 * amazon-eks-gpu-node-1.21-v20211206 * amazon-eks-gpu-node-1.20-v20211206 From cf6cf102c2e8ce5cc985d33ca27b67661e24f250 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Sat, 22 Jan 2022 09:39:54 -0800 Subject: [PATCH 226/621] Update build date for 1.21 (#850) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 127e1e205..6a74478d3 100644 --- a/Makefile +++ b/Makefile @@ -56,4 +56,4 @@ k8s: validate .PHONY: 1.21 1.21: - $(MAKE) k8s kubernetes_version=1.21.5 kubernetes_build_date=2021-11-10 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.21.5 kubernetes_build_date=2022-01-21 pull_cni_from_github=true From dae6222ca1216af4e6f276702458afc16ec9c3ef Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Sun, 23 Jan 2022 14:28:30 -0800 Subject: [PATCH 227/621] Adding changelog for v20220123 AMI release (#851) --- CHANGELOG.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c4fa1cca..a44df770c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,36 @@ # Changelog +### AMI Release v20220123 + - amazon-eks-node-1.18-v20220123 + - amazon-eks-arm64-node-1.18-v20220123 + - amazon-eks-gpu-node-1.18-v20220123 + - amazon-eks-node-1.19-v20220123 + - amazon-eks-arm64-node-1.19-v20220123 + - amazon-eks-gpu-node-1.19-v20220123 + - amazon-eks-node-1.20-v20220123 + - amazon-eks-arm64-node-1.20-v20220123 + - amazon-eks-gpu-node-1.20-v20220123 + - amazon-eks-node-1.21-v20220123 + - amazon-eks-arm64-node-1.21-v20220123 + - amazon-eks-gpu-node-1.21-v20220123 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.21.5/2022-01-21/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2020-09-02/ + +AMI details: +* kernel: 5.4.172-90.336.amzn2 (1.19 and above), 4.14.256-197.484.amzn2 (1.18 and below) +* dockerd: 20.10.7-5.amzn2 +* containerd: 1.4.6-7.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 470.57.02 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0 + +Notable changes: +* Upgrade kernel version for Kubernetes 1.19 and above to 5.4.172-90.336.amzn2.x86_64 for CVE-2022-0185 +* Bug fix in kubelet for 1.21 AMIs to handle compacted IPv6 addresses returned by EC2 API. New Kubelet version: `v1.21.5-eks-9017834` ### AMI Release v20220112 * amazon-eks-gpu-node-1.21-v20220112 From 5513072734102c99ea94ec426d8818faac6a3c67 Mon Sep 17 00:00:00 2001 From: Carter McKinnon Date: Mon, 7 Feb 2022 10:04:34 -0800 Subject: [PATCH 228/621] Update eni-max-pods.txt (#859) Add support for new instance types. --- files/eni-max-pods.txt | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 93dbf6646..ee57e94ed 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,7 +11,31 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2021-12-06T09:52:52-08:00 +# This file was generated at 2022-02-03T16:29:41-08:00 +# +# The regions queried were: +# - af-south-1 +# - ap-east-1 +# - ap-northeast-1 +# - ap-northeast-2 +# - ap-northeast-3 +# - ap-south-1 +# - ap-southeast-1 +# - ap-southeast-2 +# - ap-southeast-3 +# - ca-central-1 +# - eu-central-1 +# - eu-north-1 +# - eu-south-1 +# - eu-west-1 +# - eu-west-2 +# - eu-west-3 +# - me-south-1 +# - sa-east-1 +# - us-east-1 +# - us-east-2 +# - us-west-1 +# - us-west-2 # # Mapping is calculated from AWS EC2 API using the following formula: # * First IP on each ENI is not used for pods @@ -174,6 +198,7 @@ h1.16xlarge 737 h1.2xlarge 58 h1.4xlarge 234 h1.8xlarge 234 +hpc6a.48xlarge 100 hs1.8xlarge 234 i2.2xlarge 58 i2.4xlarge 234 @@ -491,6 +516,12 @@ x2gd.large 29 x2gd.medium 8 x2gd.metal 737 x2gd.xlarge 58 +x2iezn.12xlarge 737 +x2iezn.2xlarge 58 +x2iezn.4xlarge 234 +x2iezn.6xlarge 234 +x2iezn.8xlarge 234 +x2iezn.metal 737 z1d.12xlarge 737 z1d.2xlarge 58 z1d.3xlarge 234 From 73a6c167af321a0c2504089553e7d905c269553f Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 10 Feb 2022 12:54:39 -0800 Subject: [PATCH 229/621] Remove chacha20 from cipher list, to support FIPS. (#861) --- scripts/install-worker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index a057d528c..2130db472 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -97,7 +97,7 @@ fi ################################################################################ # Disable weak ciphers -echo -e "\nCiphers chacha20-poly1305@openssh.com,aes128-ctr,aes256-ctr,aes128-gcm@openssh.com,aes256-gcm@openssh.com" | sudo tee -a /etc/ssh/sshd_config +echo -e "\nCiphers aes128-ctr,aes256-ctr,aes128-gcm@openssh.com,aes256-gcm@openssh.com" | sudo tee -a /etc/ssh/sshd_config sudo systemctl restart sshd.service ################################################################################ From 1bd48bc31e325cce9ba2316a2c36322100074d1a Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 14 Feb 2022 11:44:35 -0800 Subject: [PATCH 230/621] Update CHANGELOG.md (#864) --- CHANGELOG.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a44df770c..d782f8d7d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,38 @@ # Changelog +### AMI Release v20220210 +* amazon-eks-gpu-node-1.21-v20220210 +* amazon-eks-gpu-node-1.20-v20220210 +* amazon-eks-gpu-node-1.19-v20220210 +* amazon-eks-gpu-node-1.18-v20220210 +* amazon-eks-arm64-node-1.21-v20220210 +* amazon-eks-arm64-node-1.20-v20220210 +* amazon-eks-arm64-node-1.19-v20220210 +* amazon-eks-arm64-node-1.18-v20220210 +* amazon-eks-node-1.21-v20220210 +* amazon-eks-node-1.20-v20220210 +* amazon-eks-node-1.19-v20220210 +* amazon-eks-node-1.18-v20220210 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.21.5/2022-01-21/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2021-09-02/ + +AMI details: +* kernel: 5.4.176-91.338.amzn2 (1.19 and above), 4.14.262-200.489.amzn2 (1.18 and below) +* dockerd: 20.10.7-5.amzn2 +* containerd: 1.4.6-7.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0 + +Notable changes: +- Upgrade kernel version for Kubernetes 1.18 to `4.14.262-200.489.amzn2`, addressing several CVE's. More information available in [ALAS2-2022-1749](https://alas.aws.amazon.com/AL2/ALAS-2022-1749.html) +- Support for `hpc6a` instance types. +- Removes support for the `chacha20-poly1305@openssh.com` cipher, which is not FIPS-compliant. + ### AMI Release v20220123 - amazon-eks-node-1.18-v20220123 - amazon-eks-arm64-node-1.18-v20220123 From ff9c99cc438417a109ddb43fc2000da21c358f8a Mon Sep 17 00:00:00 2001 From: Jayanth Varavani <1111446+jayanthvn@users.noreply.github.com> Date: Tue, 15 Feb 2022 12:43:34 -0800 Subject: [PATCH 231/621] New instanc type support (#865) --- files/eni-max-pods.txt | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index ee57e94ed..203187e6f 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,26 +11,21 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2022-02-03T16:29:41-08:00 +# This file was generated at 2022-02-15T18:47:49Z # # The regions queried were: -# - af-south-1 -# - ap-east-1 # - ap-northeast-1 # - ap-northeast-2 # - ap-northeast-3 # - ap-south-1 # - ap-southeast-1 # - ap-southeast-2 -# - ap-southeast-3 # - ca-central-1 # - eu-central-1 # - eu-north-1 -# - eu-south-1 # - eu-west-1 # - eu-west-2 # - eu-west-3 -# - me-south-1 # - sa-east-1 # - us-east-1 # - us-east-2 @@ -106,6 +101,16 @@ c5n.9xlarge 234 c5n.large 29 c5n.metal 737 c5n.xlarge 58 +c6a.12xlarge 234 +c6a.16xlarge 737 +c6a.24xlarge 737 +c6a.2xlarge 58 +c6a.32xlarge 737 +c6a.48xlarge 737 +c6a.4xlarge 234 +c6a.8xlarge 234 +c6a.large 29 +c6a.xlarge 58 c6g.12xlarge 234 c6g.16xlarge 737 c6g.2xlarge 58 @@ -491,6 +496,7 @@ u-12tb1.112xlarge 737 u-12tb1.metal 147 u-18tb1.metal 737 u-24tb1.metal 737 +u-3tb1.56xlarge 234 u-6tb1.112xlarge 737 u-6tb1.56xlarge 737 u-6tb1.metal 147 From 7c70de84546a15062219e27ae6a2dfc4302856a6 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 17 Feb 2022 15:16:45 -0800 Subject: [PATCH 232/621] Correct kubelet S3 paths (#866) * Correct kubelet S3 paths This was a typo that carried over several releases: `2020-09-02` should be `2021-09-02` --- CHANGELOG.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d782f8d7d..d7509ee7d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,7 +51,7 @@ Binaries used to build these AMIs are published: * s3://amazon-eks/1.21.5/2022-01-21/ * s3://amazon-eks/1.20.11/2021-11-10/ * s3://amazon-eks/1.19.15/2021-11-10/ -* s3://amazon-eks/1.18.20/2020-09-02/ +* s3://amazon-eks/1.18.20/2021-09-02/ AMI details: * kernel: 5.4.172-90.336.amzn2 (1.19 and above), 4.14.256-197.484.amzn2 (1.18 and below) @@ -84,7 +84,7 @@ Binaries used to build these AMIs are published: * s3://amazon-eks/1.21.5/2021-11-10/ * s3://amazon-eks/1.20.11/2021-11-10/ * s3://amazon-eks/1.19.15/2021-11-10/ -* s3://amazon-eks/1.18.20/2020-09-02/ +* s3://amazon-eks/1.18.20/2021-09-02/ AMI details: * kernel: 5.4.162-86.275.amzn2 (1.19 and above), 4.14.256-197.484.amzn2 (1.18 and below) @@ -118,7 +118,7 @@ Binaries used to build these AMIs are published: * s3://amazon-eks/1.21.5/2021-11-10/ * s3://amazon-eks/1.20.11/2021-11-10/ * s3://amazon-eks/1.19.15/2021-11-10/ -* s3://amazon-eks/1.18.20/2020-09-02/ +* s3://amazon-eks/1.18.20/2021-09-02/ AMI details: * kernel: 5.4.156-83.273.amzn2 (1.19 and above), 4.14.252-195.483.amzn2 (1.18 and below) @@ -154,8 +154,8 @@ Binaries used to build these AMIs are published: * s3://amazon-eks/1.21.5/2021-11-10/ * s3://amazon-eks/1.20.11/2021-11-10/ * s3://amazon-eks/1.19.15/2021-11-10/ -* s3://amazon-eks/1.18.20/2020-09-02/ -* s3://amazon-eks/1.17.17/2020-09-02/ +* s3://amazon-eks/1.18.20/2021-09-02/ +* s3://amazon-eks/1.17.17/2021-09-02/ AMI details: * kernel: 5.4.156-83.273.amzn2 (1.19 and above), 4.14.252-195.483.amzn2 (1.18 and below) @@ -190,8 +190,8 @@ Binaries used to build these AMIs are published: * s3://amazon-eks/1.21.5/2021-11-10/ * s3://amazon-eks/1.20.11/2021-11-10/ * s3://amazon-eks/1.19.15/2021-11-10/ -* s3://amazon-eks/1.18.20/2020-09-02/ -* s3://amazon-eks/1.17.17/2020-09-02/ +* s3://amazon-eks/1.18.20/2021-09-02/ +* s3://amazon-eks/1.17.17/2021-09-02/ AMI details: * kernel: 5.4.149-73.259.amzn2 (1.19 and above), 4.14.252-195.483.amzn2 (1.18 and below) From ecca47c113fbb10a84b1fe283b85cb2e67471020 Mon Sep 17 00:00:00 2001 From: Suket Sharma Date: Sat, 19 Feb 2022 02:14:27 +0530 Subject: [PATCH 233/621] Update CHANGELOG.md (#868) --- CHANGELOG.md | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d7509ee7d..295ac7428 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,37 @@ # Changelog + +### AMI Release v20220216 +* amazon-eks-gpu-node-1.21-v20220216 +* amazon-eks-gpu-node-1.20-v20220216 +* amazon-eks-gpu-node-1.19-v20220216 +* amazon-eks-gpu-node-1.18-v20220216 +* amazon-eks-arm64-node-1.21-v20220216 +* amazon-eks-arm64-node-1.20-v20220216 +* amazon-eks-arm64-node-1.19-v20220216 +* amazon-eks-arm64-node-1.18-v20220216 +* amazon-eks-node-1.21-v20220216 +* amazon-eks-node-1.20-v20220216 +* amazon-eks-node-1.19-v20220216 +* amazon-eks-node-1.18-v20220216 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.21.5/2022-01-21/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2021-09-02/ + +AMI details: +* kernel: 5.4.176-91.338.amzn2 (1.19 and above), 4.14.262-200.489.amzn2 (1.18 and below) +* dockerd: 20.10.7-5.amzn2 +* containerd: 1.4.6-7.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0 + +Notable changes: +- Support for `c6a` instance types. + ### AMI Release v20220210 * amazon-eks-gpu-node-1.21-v20220210 * amazon-eks-gpu-node-1.20-v20220210 From f43f5b3d88da64af8eb97336ae07e08b3ff25093 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 18 Feb 2022 16:28:35 -0800 Subject: [PATCH 234/621] Downgrade and lock ec2-utils at 1.2-45 until 1.2-47 is available. (#867) --- scripts/install-worker.sh | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 2130db472..677e6208c 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -59,13 +59,18 @@ sudo yum install -y \ chrony \ conntrack \ curl \ - jq \ ec2-instance-connect \ + ipvsadm \ + jq \ nfs-utils \ socat \ unzip \ wget \ - ipvsadm + yum-plugin-versionlock + +# Downgrade and lock ec2-utils until 1.2-47 is available: https://github.com/aws/amazon-ec2-utils/issues/22 +sudo yum downgrade -y ec2-utils-1.2-45.amzn2.noarch +sudo yum versionlock ec2-utils-* # Remove the ec2-net-utils package, if it's installed. This package interferes with the route setup on the instance. if yum list installed | grep ec2-net-utils; then sudo yum remove ec2-net-utils -y -q; fi @@ -118,9 +123,6 @@ if [[ "$INSTALL_DOCKER" == "true" ]]; then sudo groupadd -fog 1950 docker sudo useradd --gid $(getent group docker | cut -d: -f3) docker - # install version lock to put a lock on dependecies - sudo yum install -y yum-plugin-versionlock - # install runc and lock version sudo yum install -y runc-${RUNC_VERSION} sudo yum versionlock runc-* From 2289937fe9fb3933d066b6ae72a4299b2624e154 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 22 Feb 2022 12:42:15 -0800 Subject: [PATCH 235/621] Remove ec2-utils versionlock. (#869) The 1.2-47 package is now rolling out. --- scripts/install-worker.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 677e6208c..b552ec56a 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -68,10 +68,6 @@ sudo yum install -y \ wget \ yum-plugin-versionlock -# Downgrade and lock ec2-utils until 1.2-47 is available: https://github.com/aws/amazon-ec2-utils/issues/22 -sudo yum downgrade -y ec2-utils-1.2-45.amzn2.noarch -sudo yum versionlock ec2-utils-* - # Remove the ec2-net-utils package, if it's installed. This package interferes with the route setup on the instance. if yum list installed | grep ec2-net-utils; then sudo yum remove ec2-net-utils -y -q; fi From 3fab3b79aac18c6dffb246273876f38bd36ae486 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 1 Mar 2022 11:26:30 -0800 Subject: [PATCH 236/621] 20220226 details (#874) --- CHANGELOG.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 295ac7428..97174b03d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,37 @@ # Changelog +### AMI Release v20220226 +* amazon-eks-gpu-node-1.21-v20220226 +* amazon-eks-gpu-node-1.20-v20220226 +* amazon-eks-gpu-node-1.19-v20220226 +* amazon-eks-gpu-node-1.18-v20220226 +* amazon-eks-arm64-node-1.21-v20220226 +* amazon-eks-arm64-node-1.20-v20220226 +* amazon-eks-arm64-node-1.19-v20220226 +* amazon-eks-arm64-node-1.18-v20220226 +* amazon-eks-node-1.21-v20220226 +* amazon-eks-node-1.20-v20220226 +* amazon-eks-node-1.19-v20220226 +* amazon-eks-node-1.18-v20220226 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.21.5/2022-01-21/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2021-09-02/ + +AMI details: +* kernel: 5.4.176-91.338.amzn2 (1.19 and above), 4.14.262-200.489.amzn2 (1.18 and below) +* dockerd: 20.10.7-5.amzn2 +* containerd: 1.4.6-7.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0 + +Notable changes: +- Upgrade `ec2-utils` version to `1.2-47`, addressing an issue with device symbolic links. More information is available [here](https://github.com/aws/amazon-ec2-utils/issues/22). + ### AMI Release v20220216 * amazon-eks-gpu-node-1.21-v20220216 * amazon-eks-gpu-node-1.20-v20220216 From b9fff39589a1f9b22bfaad07b853fd90cbfdc667 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 3 Mar 2022 17:11:13 -0800 Subject: [PATCH 237/621] Update containerd to 1.4.6-8 for CVE-2022-23648 (#877) --- eks-worker-al2.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 18d829ddb..ffcf644f3 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -14,7 +14,7 @@ "kubernetes_build_date": null, "kernel_version": "", "docker_version": "20.10.7-5.amzn2", - "containerd_version": "1.4.6-7.amzn2", + "containerd_version": "1.4.6-8.amzn2", "runc_version": "1.0.0-2.amzn2", "cni_plugin_version": "v0.8.6", "pull_cni_from_github": "true", From 66418ad736032e13133e8e378714d6e523b9cec4 Mon Sep 17 00:00:00 2001 From: Saurav Agarwalla Date: Mon, 7 Mar 2022 12:27:32 -0800 Subject: [PATCH 238/621] Add pause container account for ap-southeast-3 (#878) --- files/bootstrap.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 8a9391bf7..d3410b2b2 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -154,6 +154,8 @@ function get_pause_container_account_for_region () { echo "${PAUSE_CONTAINER_ACCOUNT:-877085696533}";; eu-south-1) echo "${PAUSE_CONTAINER_ACCOUNT:-590381155156}";; + ap-southeast-3) + echo "${PAUSE_CONTAINER_ACCOUNT:-296578399912}";; *) echo "${PAUSE_CONTAINER_ACCOUNT:-602401143452}";; esac From a419e33ec8623b19179f38f841bc52d48816148d Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 7 Mar 2022 17:48:53 -0800 Subject: [PATCH 239/621] Add changelog for v20220303 (#879) --- CHANGELOG.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 97174b03d..ae0714ddd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,37 @@ # Changelog +### AMI Release v20220303 +* amazon-eks-gpu-node-1.21-v20220303 +* amazon-eks-gpu-node-1.20-v20220303 +* amazon-eks-gpu-node-1.19-v20220303 +* amazon-eks-gpu-node-1.18-v20220303 +* amazon-eks-arm64-node-1.21-v20220303 +* amazon-eks-arm64-node-1.20-v20220303 +* amazon-eks-arm64-node-1.19-v20220303 +* amazon-eks-arm64-node-1.18-v20220303 +* amazon-eks-node-1.21-v20220303 +* amazon-eks-node-1.20-v20220303 +* amazon-eks-node-1.19-v20220303 +* amazon-eks-node-1.18-v20220303 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2021-09-02/ + +AMI details: +* kernel: 5.4.176-91.338.amzn2 (1.19 and above), 4.14.262-200.489.amzn2 (1.18 and below) +* dockerd: 20.10.7-5.amzn2 +* containerd: 1.4.6-8.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0 + +Notable changes: +- Update `containerd` to `1.4.6-8.amzn2` for CVE-2022-23648. + ### AMI Release v20220226 * amazon-eks-gpu-node-1.21-v20220226 * amazon-eks-gpu-node-1.20-v20220226 From bc1d8a16155a0c4a9fef98859ef82fccc80bf01e Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Thu, 10 Mar 2022 16:32:58 -0800 Subject: [PATCH 240/621] Added changelog for v20220309 (#884) --- CHANGELOG.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ae0714ddd..809c311ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,37 @@ # Changelog +### AMI Release v20220309 +* amazon-eks-gpu-node-1.21-v20220309 +* amazon-eks-gpu-node-1.20-v20220309 +* amazon-eks-gpu-node-1.19-v20220309 +* amazon-eks-gpu-node-1.18-v20220309 +* amazon-eks-arm64-node-1.21-v20220309 +* amazon-eks-arm64-node-1.20-v20220309 +* amazon-eks-arm64-node-1.19-v20220309 +* amazon-eks-arm64-node-1.18-v20220309 +* amazon-eks-node-1.21-v20220309 +* amazon-eks-node-1.20-v20220309 +* amazon-eks-node-1.19-v20220309 +* amazon-eks-node-1.18-v20220309 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2021-09-02/ + +AMI details: +* kernel: 5.4.181-99.354.amzn2 (1.19 and above), 4.14.268-205.500.amzn2 (1.18 and below) +* dockerd: 20.10.7-5.amzn2 +* containerd: 1.4.6-8.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0 + +Notable changes: +- Update kernel version to 4.14.268-205.500.amzn2 for 1.18 and below, 5.4.181-99.354.amzn2 for 1.19 and above. For more information, see [ALAS-2022-1761](https://alas.aws.amazon.com/AL2/ALAS-2022-1761.html) and [ALASKERNEL-5.4-2022-023](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2022-023.html). + ### AMI Release v20220303 * amazon-eks-gpu-node-1.21-v20220303 * amazon-eks-gpu-node-1.20-v20220303 From da570d5d2474f847d1f24bbf36fedbf437c1de65 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Mon, 14 Mar 2022 10:23:35 -0700 Subject: [PATCH 241/621] Adding support for new k8s version 1.22 and credential provider support (#881) * Adding support for new kubernetes version 1.22 and credential provider support --- Makefile | 6 +++- files/ecr-credential-provider-config | 14 ++++++++++ scripts/install-worker.sh | 41 ++++++++++++++++++++++++++-- 3 files changed, 58 insertions(+), 3 deletions(-) create mode 100644 files/ecr-credential-provider-config diff --git a/Makefile b/Makefile index 6a74478d3..8939a69bd 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.18 1.19 1.20 1.21 +all: 1.18 1.19 1.20 1.21 1.22 .PHONY: validate validate: @@ -57,3 +57,7 @@ k8s: validate .PHONY: 1.21 1.21: $(MAKE) k8s kubernetes_version=1.21.5 kubernetes_build_date=2022-01-21 pull_cni_from_github=true + +.PHONY: 1.22 +1.22: + $(MAKE) k8s kubernetes_version=1.22.6 kubernetes_build_date=2022-03-09 pull_cni_from_github=true diff --git a/files/ecr-credential-provider-config b/files/ecr-credential-provider-config new file mode 100644 index 000000000..d6117ceb4 --- /dev/null +++ b/files/ecr-credential-provider-config @@ -0,0 +1,14 @@ +apiVersion: kubelet.config.k8s.io/v1alpha1 +kind: CredentialProviderConfig +providers: + - name: ecr-credential-provider + matchImages: + - "*.dkr.ecr.*.amazonaws.com" + - "*.dkr.ecr.*.amazonaws.cn" + - "*.dkr.ecr-fips.*.amazonaws.com" + - "*.dkr.ecr.us-iso-east-1.c2s.ic.gov" + - "*.dkr.ecr.us-isob-east-1.sc2s.sgov.gov" + defaultCacheDuration: "12h" + apiVersion: credentialprovider.kubelet.k8s.io/v1alpha1 + args: + - get-credentials \ No newline at end of file diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index b552ec56a..24a8c5dbf 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -155,6 +155,12 @@ else sudo mv $TEMPLATE_DIR/containerd-config.toml /etc/eks/containerd/containerd-config.toml fi +if [[ $KUBERNETES_VERSION == "1.22"* ]]; then + # enable CredentialProviders features in kubelet-containerd service file + IMAGE_CREDENTIAL_PROVIDER_FLAGS='\\\n --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \\\n --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider' + sudo sed -i s,"aws","aws $IMAGE_CREDENTIAL_PROVIDER_FLAGS", $TEMPLATE_DIR/kubelet-containerd.service +fi + sudo mv $TEMPLATE_DIR/kubelet-containerd.service /etc/eks/containerd/kubelet-containerd.service sudo mv $TEMPLATE_DIR/sandbox-image.service /etc/eks/containerd/sandbox-image.service sudo mv $TEMPLATE_DIR/pull-sandbox-image.sh /etc/eks/containerd/pull-sandbox-image.sh @@ -254,14 +260,25 @@ sudo mkdir -p /etc/kubernetes/kubelet sudo mkdir -p /etc/systemd/system/kubelet.service.d sudo mv $TEMPLATE_DIR/kubelet-kubeconfig /var/lib/kubelet/kubeconfig sudo chown root:root /var/lib/kubelet/kubeconfig -sudo mv $TEMPLATE_DIR/kubelet.service /etc/systemd/system/kubelet.service -sudo chown root:root /etc/systemd/system/kubelet.service + # Inject CSIServiceAccountToken feature gate to kubelet config if kubernetes version starts with 1.20. # This is only injected for 1.20 since CSIServiceAccountToken will be moved to beta starting 1.21. if [[ $KUBERNETES_VERSION == "1.20"* ]]; then KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED=$(cat $TEMPLATE_DIR/kubelet-config.json | jq '.featureGates += {CSIServiceAccountToken: true}') echo $KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED > $TEMPLATE_DIR/kubelet-config.json fi + +if [[ $KUBERNETES_VERSION == "1.22"* ]]; then + # enable CredentialProviders feature flags in kubelet service file + IMAGE_CREDENTIAL_PROVIDER_FLAGS='\\\n --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \\\n --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider' + sudo sed -i s,"aws","aws $IMAGE_CREDENTIAL_PROVIDER_FLAGS", $TEMPLATE_DIR/kubelet.service + # enable KubeletCredentialProviders features in kubelet configuration + KUBELET_CREDENTIAL_PROVIDERS_FEATURES=$(cat $TEMPLATE_DIR/kubelet-config.json | jq '.featureGates += {KubeletCredentialProviders: true}') + printf "%s" "$KUBELET_CREDENTIAL_PROVIDERS_FEATURES" > "$TEMPLATE_DIR/kubelet-config.json" +fi + +sudo mv $TEMPLATE_DIR/kubelet.service /etc/systemd/system/kubelet.service +sudo chown root:root /etc/systemd/system/kubelet.service sudo mv $TEMPLATE_DIR/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json sudo chown root:root /etc/kubernetes/kubelet/kubelet-config.json @@ -287,6 +304,26 @@ if [[ -n "$SONOBUOY_E2E_REGISTRY" ]]; then sudo sed -i s,SONOBUOY_E2E_REGISTRY,$SONOBUOY_E2E_REGISTRY,g /etc/eks/sonobuoy-e2e-registry-config fi +################################################################################ +### ECR CREDENTIAL PROVIDER #################################################### +################################################################################ +if [[ $KUBERNETES_VERSION == "1.22"* ]]; then + ECR_BINARY="ecr-credential-provider" + if [[ -n "$AWS_ACCESS_KEY_ID" ]]; then + echo "AWS cli present - using it to copy ecr-credential-provider binaries from s3." + aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/$ECR_BINARY . + else + echo "AWS cli missing - using wget to fetch ecr-credential-provider binaries from s3. Note: This won't work for private bucket." + sudo wget "$S3_URL_BASE/$ECR_BINARY" + fi + sudo chmod +x $ECR_BINARY + sudo mkdir -p /etc/eks/ecr-credential-provider + sudo mv $ECR_BINARY /etc/eks/ecr-credential-provider + + # copying credential provider config file to eks folder + sudo mv $TEMPLATE_DIR/ecr-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config +fi + ################################################################################ ### SSM Agent ################################################################## ################################################################################ From 60550f3eeed450d5f57b591f6c6ea76d1c494439 Mon Sep 17 00:00:00 2001 From: Juan Manuel Mesa Date: Thu, 24 Mar 2022 00:22:07 +0100 Subject: [PATCH 242/621] Add show-max-allowed flag to max pods calculator script (#766) --- files/max-pods-calculator.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/files/max-pods-calculator.sh b/files/max-pods-calculator.sh index bd314ec68..643a9a22b 100644 --- a/files/max-pods-calculator.sh +++ b/files/max-pods-calculator.sh @@ -19,6 +19,7 @@ function print_help { echo "--cni-custom-networking-enabled Use this flag to indicate if CNI custom networking mode has been enabled." echo "--cni-prefix-delegation-enabled Use this flag to indicate if CNI prefix delegation has been enabled." echo "--cni-max-eni specify how many ENIs should be used for prefix delegation. Defaults to using all ENIs per instance." + echo "--show-max-allowed Use this flag to show max number of Pods allowed to run in Worker Node. Otherwise the script will show the recommended value" } POSITIONAL=() @@ -57,6 +58,10 @@ while [[ $# -gt 0 ]]; do shift shift ;; + --show-max-allowed) + SHOW_MAX_ALLOWED=true + shift + ;; *) # unknown option POSITIONAL+=("$1") # save it in an array for later shift # past argument @@ -70,6 +75,7 @@ CNI_PREFIX_DELEGATION_ENABLED="${CNI_PREFIX_DELEGATION_ENABLED:-false}" CNI_MAX_ENI="${CNI_MAX_ENI:-}" INSTANCE_TYPE="${INSTANCE_TYPE:-}" INSTANCE_TYPE_FROM_IMDS="${INSTANCE_TYPE_FROM_IMDS:-false}" +SHOW_MAX_ALLOWED="${SHOW_MAX_ALLOWED:-false}" PREFIX_DELEGATION_SUPPORTED=false IPS_PER_PREFIX=16 @@ -144,8 +150,15 @@ fi MAX_POD_CEILING_FOR_LOW_CPU=110 MAX_POD_CEILING_FOR_HIGH_CPU=250 CPU_COUNT=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.CpuCount' ) + +if [ "$SHOW_MAX_ALLOWED" = true ] ; then + echo $max_pods + exit 0 +fi + if [ "$CPU_COUNT" -gt 30 ] ; then echo $(min_number $MAX_POD_CEILING_FOR_HIGH_CPU $max_pods) else echo $(min_number $MAX_POD_CEILING_FOR_LOW_CPU $max_pods) fi + From 917c62133800999ae4ae49c1df6415261bd645bd Mon Sep 17 00:00:00 2001 From: Suket Sharma Date: Tue, 29 Mar 2022 09:31:54 -0700 Subject: [PATCH 243/621] Calculate MaxPods when not present in eni-max-pods.txt (#888) * Calculate MaxPods when not present in eni-max-pods.txt * Fixing path --- files/bootstrap.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index d3410b2b2..dfeb42a92 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -430,8 +430,11 @@ set +o pipefail MAX_PODS=$(cat $MAX_PODS_FILE | awk "/^${INSTANCE_TYPE:-unset}/"' { print $2 }') set -o pipefail if [ -z "$MAX_PODS" ] || [ -z "$INSTANCE_TYPE" ]; then - echo "No entry for type '$INSTANCE_TYPE' in $MAX_PODS_FILE" - exit 1 + echo "No entry for type '$INSTANCE_TYPE' in $MAX_PODS_FILE. Will attempt to auto-discover value." + # When determining the value of maxPods, we're using the legacy calculation by default since it's more restrictive than + # the PrefixDelegation based alternative and is likely to be in-use by more customers. + # The legacy numbers also maintain backwards compatibility when used to calculate `kubeReserved.memory` + MAX_PODS=$(/etc/eks/max-pods-calculator.sh --instance-type-from-imds --cni-version 1.10.0 --show-max-allowed) fi # calculates the amount of each resource to reserve @@ -463,7 +466,7 @@ fi if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then sudo mkdir -p /etc/containerd sudo mkdir -p /etc/cni/net.d - sudo sed -i s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g /etc/eks/containerd/containerd-config.toml + sudo sed -i s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g /etc/eks/containerd/containerd-config.toml sudo mv /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml sudo mv /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service sudo mv /etc/eks/containerd/kubelet-containerd.service /etc/systemd/system/kubelet.service @@ -475,7 +478,7 @@ if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then systemctl restart containerd systemctl enable sandbox-image systemctl start sandbox-image - + elif [[ "$CONTAINER_RUNTIME" = "dockerd" ]]; then mkdir -p /etc/docker bash -c "/sbin/iptables-save > /etc/sysconfig/iptables" From 6f43f2c505338d8b69aa20a196a19d6646a2f215 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Wed, 30 Mar 2022 12:52:33 -0700 Subject: [PATCH 244/621] Added Changelog for v20220317 (#889) --- CHANGELOG.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 809c311ea..51d9e44f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,41 @@ # Changelog +### AMI Release v20220317 +* amazon-eks-gpu-node-1.22-v20220317 +* amazon-eks-gpu-node-1.21-v20220317 +* amazon-eks-gpu-node-1.20-v20220317 +* amazon-eks-gpu-node-1.19-v20220317 +* amazon-eks-gpu-node-1.18-v20220317 +* amazon-eks-arm64-node-1.22-v20220317 +* amazon-eks-arm64-node-1.21-v20220317 +* amazon-eks-arm64-node-1.20-v20220317 +* amazon-eks-arm64-node-1.19-v20220317 +* amazon-eks-arm64-node-1.18-v20220317 +* amazon-eks-node-1.22-v20220317 +* amazon-eks-node-1.21-v20220317 +* amazon-eks-node-1.20-v20220317 +* amazon-eks-node-1.19-v20220317 +* amazon-eks-node-1.18-v20220317 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.6/2022-03-09/ +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2021-09-02/ + +AMI details: +* kernel: 5.4.181-99.354.amzn2 (1.19 and above), 4.14.268-205.500.amzn2 (1.18 and below) +* dockerd: 20.10.7-5.amzn2 +* containerd: 1.4.6-8.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0 + +Notable changes: +- Adding support for new k8s version 1.22 + ### AMI Release v20220309 * amazon-eks-gpu-node-1.21-v20220309 * amazon-eks-gpu-node-1.20-v20220309 From 74a638a9ce2aea734aa0f8a0ceb416b7e1f2e4ee Mon Sep 17 00:00:00 2001 From: Abeer Sethi <38733827+abeer91@users.noreply.github.com> Date: Thu, 7 Apr 2022 15:03:14 -0700 Subject: [PATCH 245/621] Update CHANGELOG.md (#894) Adding changelog entry for v20220406 --- CHANGELOG.md | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 51d9e44f9..b7ccc84da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,42 @@ # Changelog +### AMI Release v20220406 +* amazon-eks-gpu-node-1.22-v20220406 +* amazon-eks-gpu-node-1.21-v20220406 +* amazon-eks-gpu-node-1.20-v20220406 +* amazon-eks-gpu-node-1.19-v20220406 +* amazon-eks-gpu-node-1.18-v20220406 +* amazon-eks-arm64-node-1.22-v20220406 +* amazon-eks-arm64-node-1.21-v20220406 +* amazon-eks-arm64-node-1.20-v20220406 +* amazon-eks-arm64-node-1.19-v20220406 +* amazon-eks-arm64-node-1.18-v20220406 +* amazon-eks-node-1.22-v20220406 +* amazon-eks-node-1.21-v20220406 +* amazon-eks-node-1.20-v20220406 +* amazon-eks-node-1.19-v20220406 +* amazon-eks-node-1.18-v20220406 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.6/2022-03-09/ +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2021-09-02/ + +AMI details: +* kernel: 5.4.181-99.354.amzn2 (1.19 and above), 4.14.268-205.500.amzn2 (1.18 and below) +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-2.amzn2.0.1 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0 + +Notable changes: +- Patches for [CVE-2022-24769](https://nvd.nist.gov/vuln/detail/CVE-2022-24769) have been included. +- The bootstrap script will auto-discover maxPods values when instanceType is missing in eni-max-pods.txt + ### AMI Release v20220317 * amazon-eks-gpu-node-1.22-v20220317 * amazon-eks-gpu-node-1.21-v20220317 From 8c28c17e9dc36e37fa8964dc47d56ff2239f2a6e Mon Sep 17 00:00:00 2001 From: Saurav Agarwalla Date: Fri, 8 Apr 2022 13:50:22 -0700 Subject: [PATCH 246/621] Deprecate 1.18 (#892) --- Makefile | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 8939a69bd..544be20cc 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.18 1.19 1.20 1.21 1.22 +all: 1.19 1.20 1.21 1.22 .PHONY: validate validate: @@ -42,10 +42,6 @@ k8s: validate # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html -.PHONY: 1.18 -1.18: - $(MAKE) k8s kubernetes_version=1.18.20 kubernetes_build_date=2021-09-02 pull_cni_from_github=true - .PHONY: 1.19 1.19: $(MAKE) k8s kubernetes_version=1.19.15 kubernetes_build_date=2021-11-10 pull_cni_from_github=true From 02823975992a9291609498b00c00b6434e0e4d19 Mon Sep 17 00:00:00 2001 From: Masatoshi Hayashi Date: Sat, 9 Apr 2022 08:09:38 +0900 Subject: [PATCH 247/621] DNS_CLUSTER_IP cannot be specified in IPv6 Cluster (#860) * DNS_CLUSTER_IP cannot be specified on IPv6 Cluster * Update the error message to include related bootstrap options Co-authored-by: Brandon Wagner Co-authored-by: Brandon Wagner --- files/bootstrap.sh | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index dfeb42a92..da7d8af97 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -301,11 +301,6 @@ if [[ ! -z "${IP_FAMILY}" ]]; then echo "Invalid IpFamily. Only ipv4 or ipv6 are allowed" exit 1 fi - - if [[ "${IP_FAMILY}" == "ipv6" ]] && [[ ! -z "${B64_CLUSTER_CA}" ]] && [[ ! -z "${APISERVER_ENDPOINT}" ]] && [[ -z "${SERVICE_IPV6_CIDR}" ]]; then - echo "Service Ipv6 Cidr must be provided when ip-family is specified as IPV6" - exit 1 - fi fi if [[ ! -z "${SERVICE_IPV6_CIDR}" ]]; then @@ -387,13 +382,17 @@ sed -i s,MASTER_ENDPOINT,$APISERVER_ENDPOINT,g /var/lib/kubelet/kubeconfig sed -i s,AWS_REGION,$AWS_DEFAULT_REGION,g /var/lib/kubelet/kubeconfig ### kubelet.service configuration -if [[ "${IP_FAMILY}" == "ipv6" ]]; then - DNS_CLUSTER_IP=$(awk -F/ '{print $1}' <<< $SERVICE_IPV6_CIDR)a -fi - MAC=$(get_meta_data 'latest/meta-data/network/interfaces/macs/' | head -n 1 | sed 's/\/$//') if [[ -z "${DNS_CLUSTER_IP}" ]]; then + if [[ "${IP_FAMILY}" == "ipv6" ]]; then + if [[ -z "${SERVICE_IPV6_CIDR}" ]]; then + echo "Either --service-ipv6-cidr or --cluster-dns-ip must be provided when --ip-family is set to ipv6" + exit 1 + fi + DNS_CLUSTER_IP=$(awk -F/ '{print $1}' <<< $SERVICE_IPV6_CIDR)a + fi + if [[ ! -z "${SERVICE_IPV4_CIDR}" ]] && [[ "${SERVICE_IPV4_CIDR}" != "None" ]] ; then #Sets the DNS Cluster IP address that would be chosen from the serviceIpv4Cidr. (x.y.z.10) DNS_CLUSTER_IP=${SERVICE_IPV4_CIDR%.*}.10 From aed6c88d7783464edb443d3e1c796ddce2a30375 Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Fri, 15 Apr 2022 14:08:43 -0700 Subject: [PATCH 248/621] Revert "DNS_CLUSTER_IP cannot be specified in IPv6 Cluster (#860)" (#900) This reverts commit 02823975992a9291609498b00c00b6434e0e4d19. Co-authored-by: Ravi Sinha --- files/bootstrap.sh | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index da7d8af97..dfeb42a92 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -301,6 +301,11 @@ if [[ ! -z "${IP_FAMILY}" ]]; then echo "Invalid IpFamily. Only ipv4 or ipv6 are allowed" exit 1 fi + + if [[ "${IP_FAMILY}" == "ipv6" ]] && [[ ! -z "${B64_CLUSTER_CA}" ]] && [[ ! -z "${APISERVER_ENDPOINT}" ]] && [[ -z "${SERVICE_IPV6_CIDR}" ]]; then + echo "Service Ipv6 Cidr must be provided when ip-family is specified as IPV6" + exit 1 + fi fi if [[ ! -z "${SERVICE_IPV6_CIDR}" ]]; then @@ -382,17 +387,13 @@ sed -i s,MASTER_ENDPOINT,$APISERVER_ENDPOINT,g /var/lib/kubelet/kubeconfig sed -i s,AWS_REGION,$AWS_DEFAULT_REGION,g /var/lib/kubelet/kubeconfig ### kubelet.service configuration +if [[ "${IP_FAMILY}" == "ipv6" ]]; then + DNS_CLUSTER_IP=$(awk -F/ '{print $1}' <<< $SERVICE_IPV6_CIDR)a +fi + MAC=$(get_meta_data 'latest/meta-data/network/interfaces/macs/' | head -n 1 | sed 's/\/$//') if [[ -z "${DNS_CLUSTER_IP}" ]]; then - if [[ "${IP_FAMILY}" == "ipv6" ]]; then - if [[ -z "${SERVICE_IPV6_CIDR}" ]]; then - echo "Either --service-ipv6-cidr or --cluster-dns-ip must be provided when --ip-family is set to ipv6" - exit 1 - fi - DNS_CLUSTER_IP=$(awk -F/ '{print $1}' <<< $SERVICE_IPV6_CIDR)a - fi - if [[ ! -z "${SERVICE_IPV4_CIDR}" ]] && [[ "${SERVICE_IPV4_CIDR}" != "None" ]] ; then #Sets the DNS Cluster IP address that would be chosen from the serviceIpv4Cidr. (x.y.z.10) DNS_CLUSTER_IP=${SERVICE_IPV4_CIDR%.*}.10 From 806b3336a54457da739b5e2058c363a8d6017cd7 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 19 Apr 2022 11:38:21 -0700 Subject: [PATCH 249/621] Update CHANGELOG.md (#901) --- CHANGELOG.md | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b7ccc84da..4a58188e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1068,6 +1068,37 @@ Binaries used to build these AMIs are published: Notable changes: * Release 1.20 AMIs +## AMI Release v20210504 + +* amazon-eks-gpu-node-1.19-v20210504 +* amazon-eks-gpu-node-1.18-v20210504 +* amazon-eks-gpu-node-1.17-v20210504 +* amazon-eks-gpu-node-1.16-v20210504 +* amazon-eks-gpu-node-1.15-v20210504 +* amazon-eks-arm64-node-1.19-v20210504 +* amazon-eks-arm64-node-1.18-v20210504 +* amazon-eks-arm64-node-1.17-v20210504 +* amazon-eks-arm64-node-1.16-v20210504 +* amazon-eks-arm64-node-1.15-v20210504 +* amazon-eks-node-1.19-v20210504 +* amazon-eks-node-1.18-v20210504 +* amazon-eks-node-1.17-v20210504 +* amazon-eks-node-1.16-v20210504 +* amazon-eks-node-1.15-v20210504 + +Binaries used to build these AMIs are published: + +s3://amazon-eks/1.19.6/2021-01-05/ +s3://amazon-eks/1.18.9/2020-11-02/ +s3://amazon-eks/1.17.12/2020-11-02/ +s3://amazon-eks/1.16.15/2020-11-02/ +s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes: + +* Update Kernel (1.19: 5.4.110-54.189.amzn2.x86_64, 1.18 and below: 4.14.231-173.361.amzn2.x86_64) to address a vulnerability. More information available in [ALAS-2021-1634](https://alas.aws.amazon.com/AL2/ALAS-2021-1634.html) +* Update Nvidia and Cuda drivers to v460.73.01 + ## AMI Release v20210501 * amazon-eks-gpu-node-1.19-v20210501 From a24cda40666712eca0c4c1544ad11f021b4790d0 Mon Sep 17 00:00:00 2001 From: Laxmi Soumya Josyula <42261978+ljosyula@users.noreply.github.com> Date: Fri, 22 Apr 2022 14:25:26 -0700 Subject: [PATCH 250/621] Update CHANGELOG.md (#902) Co-authored-by: ljosyula --- CHANGELOG.md | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a58188e8..701e2b9a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,38 @@ # Changelog +### AMI Release v20220420 +* amazon-eks-gpu-node-1.22-v20220420 +* amazon-eks-gpu-node-1.21-v20220420 +* amazon-eks-gpu-node-1.20-v20220420 +* amazon-eks-gpu-node-1.19-v20220420 +* amazon-eks-arm64-node-1.22-v20220420 +* amazon-eks-arm64-node-1.21-v20220420 +* amazon-eks-arm64-node-1.20-v20220420 +* amazon-eks-arm64-node-1.19-v20220420 +* amazon-eks-node-1.22-v20220420 +* amazon-eks-node-1.21-v20220420 +* amazon-eks-node-1.20-v20220420 +* amazon-eks-node-1.19-v20220420 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.6/2022-03-09/ +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.188-104.359.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-2.amzn2.0.1 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1188.0 + +Notable changes: +- Patches for [CVE-2022-0778](https://nvd.nist.gov/vuln/detail/CVE-2022-0778), [CVE-2022-23218](https://nvd.nist.gov/vuln/detail/CVE-2022-23218) and [CVE-2022-23219](https://nvd.nist.gov/vuln/detail/CVE-2022-23219) have been included. +- Deprecating 1.18 k8s Version + ### AMI Release v20220406 * amazon-eks-gpu-node-1.22-v20220406 * amazon-eks-gpu-node-1.21-v20220406 From 9bf7ef2e2e7f0d1e9fe142a60ab0b4afebc5a7f7 Mon Sep 17 00:00:00 2001 From: Liz Fong-Jones Date: Tue, 26 Apr 2022 15:50:33 -0700 Subject: [PATCH 251/621] add c7g support (#903) See https://github.com/aws/amazon-vpc-cni-k8s/pull/1940 --- files/eni-max-pods.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 203187e6f..0731af808 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -147,6 +147,15 @@ c6i.8xlarge 234 c6i.large 29 c6i.metal 737 c6i.xlarge 58 +c7g.12xlarge 234 +c7g.16xlarge 737 +c7g.2xlarge 58 +c7g.4xlarge 234 +c7g.8xlarge 234 +c7g.large 29 +c7g.medium 8 +c7g.metal 737 +c7g.xlarge 58 cc2.8xlarge 234 cr1.8xlarge 234 d2.2xlarge 58 From 17e89fd482c74d8d3d6ccd42e1d90052b93fbc9c Mon Sep 17 00:00:00 2001 From: Andrew Johnstone Date: Tue, 26 Apr 2022 23:51:49 +0100 Subject: [PATCH 252/621] Update bootstrap.sh (#893) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When replaying user-data in testing will bail user-data when strict... ``` mv: cannot stat ‘/etc/eks/iptables-restore.service’: No such file or directory Exited with error on line 424 ``` --- files/bootstrap.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index dfeb42a92..bac6969be 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -467,9 +467,9 @@ if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then sudo mkdir -p /etc/containerd sudo mkdir -p /etc/cni/net.d sudo sed -i s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g /etc/eks/containerd/containerd-config.toml - sudo mv /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml - sudo mv /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service - sudo mv /etc/eks/containerd/kubelet-containerd.service /etc/systemd/system/kubelet.service + sudo cp -v /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml + sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service + sudo cp -v /etc/eks/containerd/kubelet-containerd.service /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/sandbox-image.service ln -sf /run/containerd/containerd.sock /run/dockershim.sock @@ -482,7 +482,7 @@ if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then elif [[ "$CONTAINER_RUNTIME" = "dockerd" ]]; then mkdir -p /etc/docker bash -c "/sbin/iptables-save > /etc/sysconfig/iptables" - mv /etc/eks/iptables-restore.service /etc/systemd/system/iptables-restore.service + cp -v /etc/eks/iptables-restore.service /etc/systemd/system/iptables-restore.service sudo chown root:root /etc/systemd/system/iptables-restore.service systemctl daemon-reload systemctl enable iptables-restore From 08b37aa0805fd3c9697a26503e11ff1c414cd5c1 Mon Sep 17 00:00:00 2001 From: Saurav Agarwalla Date: Wed, 27 Apr 2022 18:38:42 -0400 Subject: [PATCH 253/621] Update CHANGELOG for v20220421 release (#906) --- CHANGELOG.md | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 701e2b9a5..aa2d98fd4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,38 @@ # Changelog +### AMI Release v20220421 +* amazon-eks-gpu-node-1.22-v20220421 +* amazon-eks-gpu-node-1.21-v20220421 +* amazon-eks-gpu-node-1.20-v20220421 +* amazon-eks-gpu-node-1.19-v20220421 +* amazon-eks-arm64-node-1.22-v20220421 +* amazon-eks-arm64-node-1.21-v20220421 +* amazon-eks-arm64-node-1.20-v20220421 +* amazon-eks-arm64-node-1.19-v20220421 +* amazon-eks-node-1.22-v20220421 +* amazon-eks-node-1.21-v20220421 +* amazon-eks-node-1.20-v20220421 +* amazon-eks-node-1.19-v20220421 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.6/2022-03-09/ +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.188-104.359.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-2.amzn2.0.1 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1188.0-1.amzn2 + +Notable changes: +* Includes patched Kernel for [CVE-2022-26490](https://alas.aws.amazon.com/cve/html/CVE-2022-26490.html), [CVE-2022-27666](https://alas.aws.amazon.com/cve/html/CVE-2022-27666.html) and [CVE-2022-28356](https://alas.aws.amazon.com/cve/html/CVE-2022-28356.html) +* New release with AMIs now available in ap-southeast-3 + ### AMI Release v20220420 * amazon-eks-gpu-node-1.22-v20220420 * amazon-eks-gpu-node-1.21-v20220420 From ab55838f9f9aefdac1788f006c75c6c1adf80ea2 Mon Sep 17 00:00:00 2001 From: Laxmi Soumya Josyula <42261978+ljosyula@users.noreply.github.com> Date: Mon, 2 May 2022 17:42:35 -0700 Subject: [PATCH 254/621] Update CHANGELOG.md (#909) Co-authored-by: ljosyula --- CHANGELOG.md | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index aa2d98fd4..2e600a06d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,38 @@ # Changelog +### AMI Release v20220429 +* amazon-eks-gpu-node-1.22-v20220429 +* amazon-eks-gpu-node-1.21-v20220429 +* amazon-eks-gpu-node-1.20-v20220429 +* amazon-eks-gpu-node-1.19-v20220429 +* amazon-eks-arm64-node-1.22-v20220429 +* amazon-eks-arm64-node-1.21-v20220429 +* amazon-eks-arm64-node-1.20-v20220429 +* amazon-eks-arm64-node-1.19-v20220429 +* amazon-eks-node-1.22-v20220429 +* amazon-eks-node-1.21-v20220429 +* amazon-eks-node-1.20-v20220429 +* amazon-eks-node-1.19-v20220429 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.6/2022-03-09/ +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.188-104.359.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-2.amzn2.0.1 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1188.0-1.amzn2 + +Notable changes: +* Added c7g support +* [When replaying user-data in testing will bail user-data when strict due to moving files](https://github.com/awslabs/amazon-eks-ami/pull/893/files) + ### AMI Release v20220421 * amazon-eks-gpu-node-1.22-v20220421 * amazon-eks-gpu-node-1.21-v20220421 From a3ce201d5b1922486c42f9bca94f36ebbcac2b01 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Tue, 3 May 2022 16:56:52 -0700 Subject: [PATCH 255/621] Update the runc,docker and containerd versions in config json (#912) --- eks-worker-al2.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index ffcf644f3..37f7af523 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -13,9 +13,9 @@ "kubernetes_version": null, "kubernetes_build_date": null, "kernel_version": "", - "docker_version": "20.10.7-5.amzn2", - "containerd_version": "1.4.6-8.amzn2", - "runc_version": "1.0.0-2.amzn2", + "docker_version": "20.10.13-2.amzn2", + "containerd_version": "1.4.13-2.amzn2.0.1", + "runc_version": "1.0.3-2.amzn2", "cni_plugin_version": "v0.8.6", "pull_cni_from_github": "true", "source_ami_id": "", From edb1d413c80ab0bc12f97eb7a465d42c6c66b870 Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Mon, 9 May 2022 10:15:27 -0700 Subject: [PATCH 256/621] Update k8s version (#916) * Revert "DNS_CLUSTER_IP cannot be specified in IPv6 Cluster (#860)" This reverts commit 02823975992a9291609498b00c00b6434e0e4d19. * updating the deprecated version in eks template Co-authored-by: Ravi Sinha --- amazon-eks-nodegroup.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/amazon-eks-nodegroup.yaml b/amazon-eks-nodegroup.yaml index 9b8cfbd65..89366e2bf 100644 --- a/amazon-eks-nodegroup.yaml +++ b/amazon-eks-nodegroup.yaml @@ -74,7 +74,7 @@ Parameters: NodeImageIdSSMParam: Type: "AWS::SSM::Parameter::Value" - Default: /aws/service/eks/optimized-ami/1.17/amazon-linux-2/recommended/image_id + Default: /aws/service/eks/optimized-ami/1.22/amazon-linux-2/recommended/image_id Description: AWS Systems Manager Parameter Store parameter of the AMI ID for the worker node instances. Change this value to match the version of Kubernetes you are using. DisableIMDSv1: From 3cbbc65d72d34698ffd1f3be70e5f024c17dae54 Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 9 May 2022 11:07:24 -0700 Subject: [PATCH 257/621] Update eni-max-pods.txt (#917) --- files/eni-max-pods.txt | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 0731af808..7dcbd973f 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,21 +11,26 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2022-02-15T18:47:49Z +# This file was generated at 2022-05-09T10:14:37-07:00 # # The regions queried were: +# - af-south-1 +# - ap-east-1 # - ap-northeast-1 # - ap-northeast-2 # - ap-northeast-3 # - ap-south-1 # - ap-southeast-1 # - ap-southeast-2 +# - ap-southeast-3 # - ca-central-1 # - eu-central-1 # - eu-north-1 +# - eu-south-1 # - eu-west-1 # - eu-west-2 # - eu-west-3 +# - me-south-1 # - sa-east-1 # - us-east-1 # - us-east-2 @@ -110,6 +115,7 @@ c6a.48xlarge 737 c6a.4xlarge 234 c6a.8xlarge 234 c6a.large 29 +c6a.metal 737 c6a.xlarge 58 c6g.12xlarge 234 c6g.16xlarge 737 @@ -233,6 +239,13 @@ i3en.6xlarge 234 i3en.large 29 i3en.metal 737 i3en.xlarge 58 +i4i.16xlarge 737 +i4i.2xlarge 58 +i4i.32xlarge 737 +i4i.4xlarge 234 +i4i.8xlarge 234 +i4i.large 29 +i4i.xlarge 58 im4gn.16xlarge 737 im4gn.2xlarge 58 im4gn.4xlarge 234 @@ -334,6 +347,7 @@ m6a.48xlarge 737 m6a.4xlarge 234 m6a.8xlarge 234 m6a.large 29 +m6a.metal 737 m6a.xlarge 58 m6g.12xlarge 234 m6g.16xlarge 737 @@ -531,6 +545,18 @@ x2gd.large 29 x2gd.medium 8 x2gd.metal 737 x2gd.xlarge 58 +x2idn.16xlarge 737 +x2idn.24xlarge 737 +x2idn.32xlarge 737 +x2idn.metal 737 +x2iedn.16xlarge 737 +x2iedn.24xlarge 737 +x2iedn.2xlarge 58 +x2iedn.32xlarge 737 +x2iedn.4xlarge 234 +x2iedn.8xlarge 234 +x2iedn.metal 737 +x2iedn.xlarge 58 x2iezn.12xlarge 737 x2iezn.2xlarge 58 x2iezn.4xlarge 234 From 5f8033d4e790e21fe7f377ead4dfb67f80e4c243 Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 9 May 2022 11:20:21 -0700 Subject: [PATCH 258/621] Update install-worker.sh (#918) --- scripts/install-worker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 24a8c5dbf..ccaa52b69 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -116,7 +116,7 @@ sudo yum install -y yum-utils device-mapper-persistent-data lvm2 INSTALL_DOCKER="${INSTALL_DOCKER:-true}" if [[ "$INSTALL_DOCKER" == "true" ]]; then sudo amazon-linux-extras enable docker - sudo groupadd -fog 1950 docker + sudo groupadd -og 1950 docker sudo useradd --gid $(getent group docker | cut -d: -f3) docker # install runc and lock version From 8d6191f67bf4c89abb5d7605d6a369536fdb45cb Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 10 May 2022 09:50:26 -0700 Subject: [PATCH 259/621] Remove old kernels when upgrading. (#919) Some automated scanning tools may flag vulnerabilities in old kernels that are not actually in use. --- scripts/install-worker.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index ccaa52b69..b5713a726 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -66,7 +66,11 @@ sudo yum install -y \ socat \ unzip \ wget \ - yum-plugin-versionlock + yum-plugin-versionlock \ + yum-utils + +# Remove any old kernel versions. `--count=1` here means "only leave 1 kernel version installed" +sudo package-cleanup --oldkernels --count=1 -y # Remove the ec2-net-utils package, if it's installed. This package interferes with the route setup on the instance. if yum list installed | grep ec2-net-utils; then sudo yum remove ec2-net-utils -y -q; fi @@ -111,7 +115,7 @@ sudo mv $TEMPLATE_DIR/iptables-restore.service /etc/eks/iptables-restore.service ### Docker ##################################################################### ################################################################################ -sudo yum install -y yum-utils device-mapper-persistent-data lvm2 +sudo yum install -y device-mapper-persistent-data lvm2 INSTALL_DOCKER="${INSTALL_DOCKER:-true}" if [[ "$INSTALL_DOCKER" == "true" ]]; then From acc7115338be99109b29f3f1179098eaf97e06c0 Mon Sep 17 00:00:00 2001 From: "Michael S. Fischer" Date: Tue, 17 May 2022 10:52:34 -0700 Subject: [PATCH 260/621] Maintain dockershim compatibility symlink after instance reboot (#921) --- files/bootstrap.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index bac6969be..f7619968c 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -466,13 +466,17 @@ fi if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then sudo mkdir -p /etc/containerd sudo mkdir -p /etc/cni/net.d + mkdir -p /etc/systemd/system/containerd.service.d + cat < /etc/systemd/system/containerd.service.d/10-compat-symlink.conf +[Service] +ExecStartPre=/bin/ln -sf /run/containerd/containerd.sock /run/dockershim.sock +EOF sudo sed -i s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g /etc/eks/containerd/containerd-config.toml sudo cp -v /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service sudo cp -v /etc/eks/containerd/kubelet-containerd.service /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/sandbox-image.service - ln -sf /run/containerd/containerd.sock /run/dockershim.sock systemctl daemon-reload systemctl enable containerd systemctl restart containerd From 210c20f24f240f38e575e0480a5e780d3c975b66 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Thu, 19 May 2022 13:24:05 -0700 Subject: [PATCH 261/621] Update Changelog for [RECALLED] v20220523 (#922) --- CHANGELOG.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e600a06d..813f4a68b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,22 @@ # Changelog +### [Recalled] AMI Release v20220513 +* amazon-eks-gpu-node-1.22-v20220513 +* amazon-eks-gpu-node-1.21-v20220513 +* amazon-eks-gpu-node-1.20-v20220513 +* amazon-eks-gpu-node-1.19-v20220513 +* amazon-eks-arm64-node-1.22-v20220513 +* amazon-eks-arm64-node-1.21-v20220513 +* amazon-eks-arm64-node-1.20-v20220513 +* amazon-eks-arm64-node-1.19-v20220513 +* amazon-eks-node-1.22-v20220513 +* amazon-eks-node-1.21-v20220513 +* amazon-eks-node-1.20-v20220513 +* amazon-eks-node-1.19-v20220513 + +Notice: +* EKS-Optimized AMI SSM parameters contained an incorrect reference to the release version of the AMIs in this release. + ### AMI Release v20220429 * amazon-eks-gpu-node-1.22-v20220429 * amazon-eks-gpu-node-1.21-v20220429 From 9371c6f2a5a9131d66c0b2e8feccd82e3d27ae10 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 25 May 2022 15:21:19 -0700 Subject: [PATCH 262/621] Updates 1.21 binary in makefile (#926) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 544be20cc..cbe8d3833 100644 --- a/Makefile +++ b/Makefile @@ -52,7 +52,7 @@ k8s: validate .PHONY: 1.21 1.21: - $(MAKE) k8s kubernetes_version=1.21.5 kubernetes_build_date=2022-01-21 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.21.12 kubernetes_build_date=2022-05-20 pull_cni_from_github=true .PHONY: 1.22 1.22: From 8af5d617816447f996ee181c9e6e6aafecf75a03 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 25 May 2022 15:27:55 -0700 Subject: [PATCH 263/621] Update CHANGELOG with release 20220523 (#928) --- CHANGELOG.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 813f4a68b..429c98431 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,41 @@ # Changelog +### AMI Release 20220523 +* amazon-eks-gpu-node-1.22-20220523 +* amazon-eks-gpu-node-1.21-20220523 +* amazon-eks-gpu-node-1.20-20220523 +* amazon-eks-gpu-node-1.19-20220523 +* amazon-eks-arm64-node-1.22-20220523 +* amazon-eks-arm64-node-1.21-20220523 +* amazon-eks-arm64-node-1.20-20220523 +* amazon-eks-arm64-node-1.19-20220523 +* amazon-eks-node-1.22-20220523 +* amazon-eks-node-1.21-20220523 +* amazon-eks-node-1.20-20220523 +* amazon-eks-node-1.19-20220523 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.6/2022-03-09/ +* s3://amazon-eks/1.21.12/2022-05-20/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.190-107.353.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-2.amzn2.0.1 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1188.0 + +Notable changes: +* Added i4i instance support +* Fixes regression in the docker group ID. AMI build will now fail if the docker group ID is not 1950. +* Removes unused kernels (such as 4.14) during AMI build. This prevents false-positives from automated scanning tools such as AWS Inspector. +* Maintain dockershim compatibility symlink after instance reboot +* Updates 1.21 kubelet version to 1.21.12 + ### [Recalled] AMI Release v20220513 * amazon-eks-gpu-node-1.22-v20220513 * amazon-eks-gpu-node-1.21-v20220513 From 47cbd2f379ff09f62b2a60b7164dcdad08950ac5 Mon Sep 17 00:00:00 2001 From: Suket Sharma Date: Wed, 25 May 2022 17:21:52 -0700 Subject: [PATCH 264/621] Add support for containerd-config-file to bootstrap script (#929) --- files/bootstrap.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index f7619968c..536b3c462 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -23,6 +23,7 @@ function print_help { echo "--enable-docker-bridge Restores the docker default bridge network. (default: false)" echo "--aws-api-retry-attempts Number of retry attempts for AWS API call (DescribeCluster) (default: 3)" echo "--docker-config-json The contents of the /etc/docker/daemon.json file. Useful if you want a custom config differing from the default one in the AMI" + echo "--containerd-config-file File containing the containerd configuration to be used in place of AMI defaults." echo "--dns-cluster-ip Overrides the IP address to use for DNS queries within the cluster. Defaults to 10.100.0.10 or 172.20.0.10 based on the IP address of the primary interface" echo "--pause-container-account The AWS account (number) to pull the pause container from" echo "--pause-container-version The tag of the pause container" @@ -75,6 +76,11 @@ while [[ $# -gt 0 ]]; do shift shift ;; + --containerd-config-file) + CONTAINERD_CONFIG_FILE=$2 + shift + shift + ;; --pause-container-account) PAUSE_CONTAINER_ACCOUNT=$2 shift @@ -126,6 +132,7 @@ KUBELET_EXTRA_ARGS="${KUBELET_EXTRA_ARGS:-}" ENABLE_DOCKER_BRIDGE="${ENABLE_DOCKER_BRIDGE:-false}" API_RETRY_ATTEMPTS="${API_RETRY_ATTEMPTS:-3}" DOCKER_CONFIG_JSON="${DOCKER_CONFIG_JSON:-}" +CONTAINERD_CONFIG_FILE="${CONTAINERD_CONFIG_FILE:-}" PAUSE_CONTAINER_VERSION="${PAUSE_CONTAINER_VERSION:-3.1-eksbuild.1}" CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-dockerd}" IP_FAMILY="${IP_FAMILY:-}" @@ -471,6 +478,9 @@ if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then [Service] ExecStartPre=/bin/ln -sf /run/containerd/containerd.sock /run/dockershim.sock EOF + if [[ -n "$CONTAINERD_CONFIG_FILE" ]]; then + sudo cp -v $CONTAINERD_CONFIG_FILE /etc/eks/containerd/containerd-config.toml + fi sudo sed -i s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g /etc/eks/containerd/containerd-config.toml sudo cp -v /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service From f0597535f3cc7549d47ab8786e8c7cf33e87795c Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 27 May 2022 17:00:52 -0700 Subject: [PATCH 265/621] Updates CHANGELOG for release 20220526 (#932) --- CHANGELOG.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 429c98431..88ff47e10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,37 @@ # Changelog +### AMI Release 20220526 +* amazon-eks-gpu-node-1.22-20220526 +* amazon-eks-gpu-node-1.21-20220526 +* amazon-eks-gpu-node-1.20-20220526 +* amazon-eks-gpu-node-1.19-20220526 +* amazon-eks-arm64-node-1.22-20220526 +* amazon-eks-arm64-node-1.21-20220526 +* amazon-eks-arm64-node-1.20-20220526 +* amazon-eks-arm64-node-1.19-20220526 +* amazon-eks-node-1.22-20220526 +* amazon-eks-node-1.21-20220526 +* amazon-eks-node-1.20-20220526 +* amazon-eks-node-1.19-20220526 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.6/2022-03-09/ +* s3://amazon-eks/1.21.12/2022-05-20/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.190-107.353.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-2.amzn2.0.1 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1188.0 + +Notable changes: +Linux kernel upgraded to 5.4.190-107.353. + ### AMI Release 20220523 * amazon-eks-gpu-node-1.22-20220523 * amazon-eks-gpu-node-1.21-20220523 From e7b9516e9d8a7dd59ca79c6d1c0fe60f76f5b4ca Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Wed, 1 Jun 2022 16:52:42 -0700 Subject: [PATCH 266/621] Updating Makefile and install script for 1.23 version (#935) * Updating Makefile and install script for 1.23 version * Removing Makefile changes --- scripts/install-worker.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index b5713a726..d7e0b882b 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -159,7 +159,7 @@ else sudo mv $TEMPLATE_DIR/containerd-config.toml /etc/eks/containerd/containerd-config.toml fi -if [[ $KUBERNETES_VERSION == "1.22"* ]]; then +if [[ ! $KUBERNETES_VERSION =~ "1.19"* || ! $KUBERNETES_VERSION =~ "1.20"* || ! $KUBERNETES_VERSION =~ "1.21"* ]]; then # enable CredentialProviders features in kubelet-containerd service file IMAGE_CREDENTIAL_PROVIDER_FLAGS='\\\n --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \\\n --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider' sudo sed -i s,"aws","aws $IMAGE_CREDENTIAL_PROVIDER_FLAGS", $TEMPLATE_DIR/kubelet-containerd.service @@ -272,7 +272,7 @@ if [[ $KUBERNETES_VERSION == "1.20"* ]]; then echo $KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED > $TEMPLATE_DIR/kubelet-config.json fi -if [[ $KUBERNETES_VERSION == "1.22"* ]]; then +if [[ ! $KUBERNETES_VERSION =~ "1.19"* || ! $KUBERNETES_VERSION =~ "1.20"* || ! $KUBERNETES_VERSION =~ "1.21"* ]]; then # enable CredentialProviders feature flags in kubelet service file IMAGE_CREDENTIAL_PROVIDER_FLAGS='\\\n --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \\\n --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider' sudo sed -i s,"aws","aws $IMAGE_CREDENTIAL_PROVIDER_FLAGS", $TEMPLATE_DIR/kubelet.service @@ -311,7 +311,7 @@ fi ################################################################################ ### ECR CREDENTIAL PROVIDER #################################################### ################################################################################ -if [[ $KUBERNETES_VERSION == "1.22"* ]]; then +if [[ ! $KUBERNETES_VERSION =~ "1.19"* || ! $KUBERNETES_VERSION =~ "1.20"* || ! $KUBERNETES_VERSION =~ "1.21"* ]]; then ECR_BINARY="ecr-credential-provider" if [[ -n "$AWS_ACCESS_KEY_ID" ]]; then echo "AWS cli present - using it to copy ecr-credential-provider binaries from s3." From 66a5114f069e2052337b19af741268f24d68bbc0 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Thu, 2 Jun 2022 16:31:44 -0700 Subject: [PATCH 267/621] Updating condition for updating ECR credential flags for versions greater than 1.22 (#937) --- scripts/install-worker.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index d7e0b882b..4c653a59c 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -159,7 +159,7 @@ else sudo mv $TEMPLATE_DIR/containerd-config.toml /etc/eks/containerd/containerd-config.toml fi -if [[ ! $KUBERNETES_VERSION =~ "1.19"* || ! $KUBERNETES_VERSION =~ "1.20"* || ! $KUBERNETES_VERSION =~ "1.21"* ]]; then +if [[ ! $KUBERNETES_VERSION =~ "1.19"* && ! $KUBERNETES_VERSION =~ "1.20"* && ! $KUBERNETES_VERSION =~ "1.21"* ]]; then # enable CredentialProviders features in kubelet-containerd service file IMAGE_CREDENTIAL_PROVIDER_FLAGS='\\\n --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \\\n --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider' sudo sed -i s,"aws","aws $IMAGE_CREDENTIAL_PROVIDER_FLAGS", $TEMPLATE_DIR/kubelet-containerd.service @@ -272,7 +272,7 @@ if [[ $KUBERNETES_VERSION == "1.20"* ]]; then echo $KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED > $TEMPLATE_DIR/kubelet-config.json fi -if [[ ! $KUBERNETES_VERSION =~ "1.19"* || ! $KUBERNETES_VERSION =~ "1.20"* || ! $KUBERNETES_VERSION =~ "1.21"* ]]; then +if [[ ! $KUBERNETES_VERSION =~ "1.19"* && ! $KUBERNETES_VERSION =~ "1.20"* && ! $KUBERNETES_VERSION =~ "1.21"* ]]; then # enable CredentialProviders feature flags in kubelet service file IMAGE_CREDENTIAL_PROVIDER_FLAGS='\\\n --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \\\n --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider' sudo sed -i s,"aws","aws $IMAGE_CREDENTIAL_PROVIDER_FLAGS", $TEMPLATE_DIR/kubelet.service @@ -311,7 +311,7 @@ fi ################################################################################ ### ECR CREDENTIAL PROVIDER #################################################### ################################################################################ -if [[ ! $KUBERNETES_VERSION =~ "1.19"* || ! $KUBERNETES_VERSION =~ "1.20"* || ! $KUBERNETES_VERSION =~ "1.21"* ]]; then +if [[ ! $KUBERNETES_VERSION =~ "1.19"* && ! $KUBERNETES_VERSION =~ "1.20"* && ! $KUBERNETES_VERSION =~ "1.21"* ]]; then ECR_BINARY="ecr-credential-provider" if [[ -n "$AWS_ACCESS_KEY_ID" ]]; then echo "AWS cli present - using it to copy ecr-credential-provider binaries from s3." From 6f6828b908fcca526a02098b2d0cdbd96ff790f8 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 7 Jun 2022 11:23:25 -0700 Subject: [PATCH 268/621] Correct version suffixes for recent releases (#940) --- CHANGELOG.md | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 88ff47e10..ae212abc3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,18 +1,18 @@ # Changelog -### AMI Release 20220526 -* amazon-eks-gpu-node-1.22-20220526 -* amazon-eks-gpu-node-1.21-20220526 -* amazon-eks-gpu-node-1.20-20220526 -* amazon-eks-gpu-node-1.19-20220526 -* amazon-eks-arm64-node-1.22-20220526 -* amazon-eks-arm64-node-1.21-20220526 -* amazon-eks-arm64-node-1.20-20220526 -* amazon-eks-arm64-node-1.19-20220526 -* amazon-eks-node-1.22-20220526 -* amazon-eks-node-1.21-20220526 -* amazon-eks-node-1.20-20220526 -* amazon-eks-node-1.19-20220526 +### AMI Release v20220526 +* amazon-eks-gpu-node-1.22-v20220526 +* amazon-eks-gpu-node-1.21-v20220526 +* amazon-eks-gpu-node-1.20-v20220526 +* amazon-eks-gpu-node-1.19-v20220526 +* amazon-eks-arm64-node-1.22-v20220526 +* amazon-eks-arm64-node-1.21-v20220526 +* amazon-eks-arm64-node-1.20-v20220526 +* amazon-eks-arm64-node-1.19-v20220526 +* amazon-eks-node-1.22-v20220526 +* amazon-eks-node-1.21-v20220526 +* amazon-eks-node-1.20-v20220526 +* amazon-eks-node-1.19-v20220526 Binaries used to build these AMIs are published: * s3://amazon-eks/1.22.6/2022-03-09/ @@ -32,19 +32,19 @@ AMI details: Notable changes: Linux kernel upgraded to 5.4.190-107.353. -### AMI Release 20220523 -* amazon-eks-gpu-node-1.22-20220523 -* amazon-eks-gpu-node-1.21-20220523 -* amazon-eks-gpu-node-1.20-20220523 -* amazon-eks-gpu-node-1.19-20220523 -* amazon-eks-arm64-node-1.22-20220523 -* amazon-eks-arm64-node-1.21-20220523 -* amazon-eks-arm64-node-1.20-20220523 -* amazon-eks-arm64-node-1.19-20220523 -* amazon-eks-node-1.22-20220523 -* amazon-eks-node-1.21-20220523 -* amazon-eks-node-1.20-20220523 -* amazon-eks-node-1.19-20220523 +### AMI Release v20220523 +* amazon-eks-gpu-node-1.22-v20220523 +* amazon-eks-gpu-node-1.21-v20220523 +* amazon-eks-gpu-node-1.20-v20220523 +* amazon-eks-gpu-node-1.19-v20220523 +* amazon-eks-arm64-node-1.22-v20220523 +* amazon-eks-arm64-node-1.21-v20220523 +* amazon-eks-arm64-node-1.20-v20220523 +* amazon-eks-arm64-node-1.19-v20220523 +* amazon-eks-node-1.22-v20220523 +* amazon-eks-node-1.21-v20220523 +* amazon-eks-node-1.20-v20220523 +* amazon-eks-node-1.19-v20220523 Binaries used to build these AMIs are published: * s3://amazon-eks/1.22.6/2022-03-09/ From c97236d0ac59e6f56e612c568c53c8e439efe50e Mon Sep 17 00:00:00 2001 From: Saurav Agarwalla Date: Fri, 10 Jun 2022 14:12:55 -0400 Subject: [PATCH 269/621] Update containerd for CVE-2022-31030 (#945) https://alas.aws.amazon.com/cve/html/CVE-2022-31030.html --- eks-worker-al2.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 37f7af523..6c877a52a 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -14,7 +14,7 @@ "kubernetes_build_date": null, "kernel_version": "", "docker_version": "20.10.13-2.amzn2", - "containerd_version": "1.4.13-2.amzn2.0.1", + "containerd_version": "1.4.13-3.amzn2", "runc_version": "1.0.3-2.amzn2", "cni_plugin_version": "v0.8.6", "pull_cni_from_github": "true", From dcfe8cf97a6bc357826ee475772904ecc8308de3 Mon Sep 17 00:00:00 2001 From: Suket Sharma Date: Tue, 14 Jun 2022 09:39:58 -0700 Subject: [PATCH 270/621] Adding c6id, m6id, r6id to eni-max-pods.txt (#944) --- files/eni-max-pods.txt | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 7dcbd973f..c4b0fd9b6 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,26 +11,21 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2022-05-09T10:14:37-07:00 +# This file was generated at 2022-06-10T09:23:03-07:00 # # The regions queried were: -# - af-south-1 -# - ap-east-1 # - ap-northeast-1 # - ap-northeast-2 # - ap-northeast-3 # - ap-south-1 # - ap-southeast-1 # - ap-southeast-2 -# - ap-southeast-3 # - ca-central-1 # - eu-central-1 # - eu-north-1 -# - eu-south-1 # - eu-west-1 # - eu-west-2 # - eu-west-3 -# - me-south-1 # - sa-east-1 # - us-east-1 # - us-east-2 @@ -153,6 +148,16 @@ c6i.8xlarge 234 c6i.large 29 c6i.metal 737 c6i.xlarge 58 +c6id.12xlarge 234 +c6id.16xlarge 737 +c6id.24xlarge 737 +c6id.2xlarge 58 +c6id.32xlarge 737 +c6id.4xlarge 234 +c6id.8xlarge 234 +c6id.large 29 +c6id.metal 737 +c6id.xlarge 58 c7g.12xlarge 234 c7g.16xlarge 737 c7g.2xlarge 58 @@ -245,6 +250,7 @@ i4i.32xlarge 737 i4i.4xlarge 234 i4i.8xlarge 234 i4i.large 29 +i4i.metal 737 i4i.xlarge 58 im4gn.16xlarge 737 im4gn.2xlarge 58 @@ -377,6 +383,16 @@ m6i.8xlarge 234 m6i.large 29 m6i.metal 737 m6i.xlarge 58 +m6id.12xlarge 234 +m6id.16xlarge 737 +m6id.24xlarge 737 +m6id.2xlarge 58 +m6id.32xlarge 737 +m6id.4xlarge 234 +m6id.8xlarge 234 +m6id.large 29 +m6id.metal 737 +m6id.xlarge 58 mac1.metal 234 p2.16xlarge 234 p2.8xlarge 234 @@ -486,6 +502,16 @@ r6i.8xlarge 234 r6i.large 29 r6i.metal 737 r6i.xlarge 58 +r6id.12xlarge 234 +r6id.16xlarge 737 +r6id.24xlarge 737 +r6id.2xlarge 58 +r6id.32xlarge 737 +r6id.4xlarge 234 +r6id.8xlarge 234 +r6id.large 29 +r6id.metal 737 +r6id.xlarge 58 t1.micro 4 t2.2xlarge 44 t2.large 35 From c8fa176afb949fa0aa5507c51eb0f001fc3b018c Mon Sep 17 00:00:00 2001 From: Chunyang Wang <89669703+chunywan@users.noreply.github.com> Date: Tue, 14 Jun 2022 13:59:29 -0700 Subject: [PATCH 271/621] Support local cluster in Outposts (#939) This adds support, via new bootstrap flags (--enable-local-outpost), for local control planes on Outpost, specifically when the Outpost is disconnected from its parent region. --- files/bootstrap.sh | 74 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 70 insertions(+), 4 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 536b3c462..fef75348d 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -30,6 +30,8 @@ function print_help { echo "--container-runtime Specify a container runtime (default: dockerd)" echo "--ip-family Specify ip family of the cluster" echo "--service-ipv6-cidr ipv6 cidr range of the cluster" + echo "--enable-local-outpost Enable support for worker nodes to communicate with the local control plane when running on a disconnected Outpost. (true or false)" + echo "--cluster-id Specify the id of EKS cluster" } POSITIONAL=() @@ -111,6 +113,16 @@ while [[ $# -gt 0 ]]; do shift shift ;; + --enable-local-outpost) + ENABLE_LOCAL_OUTPOST=$2 + shift + shift + ;; + --cluster-id) + CLUSTER_ID=$2 + shift + shift + ;; *) # unknown option POSITIONAL+=("$1") # save it in an array for later shift # past argument @@ -137,6 +149,8 @@ PAUSE_CONTAINER_VERSION="${PAUSE_CONTAINER_VERSION:-3.1-eksbuild.1}" CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-dockerd}" IP_FAMILY="${IP_FAMILY:-}" SERVICE_IPV6_CIDR="${SERVICE_IPV6_CIDR:-}" +ENABLE_LOCAL_OUTPOST="${ENABLE_LOCAL_OUTPOST:-}" +CLUSTER_ID="${CLUSTER_ID:-}" function get_pause_container_account_for_region () { local region="$1" @@ -360,7 +374,7 @@ if [[ -z "${B64_CLUSTER_CA}" ]] || [[ -z "${APISERVER_ENDPOINT}" ]]; then --region=${AWS_DEFAULT_REGION} \ --name=${CLUSTER_NAME} \ --output=text \ - --query 'cluster.{certificateAuthorityData: certificateAuthority.data, endpoint: endpoint, serviceIpv4Cidr: kubernetesNetworkConfig.serviceIpv4Cidr, serviceIpv6Cidr: kubernetesNetworkConfig.serviceIpv6Cidr, clusterIpFamily: kubernetesNetworkConfig.ipFamily}' > $DESCRIBE_CLUSTER_RESULT || rc=$? + --query 'cluster.{certificateAuthorityData: certificateAuthority.data, endpoint: endpoint, serviceIpv4Cidr: kubernetesNetworkConfig.serviceIpv4Cidr, serviceIpv6Cidr: kubernetesNetworkConfig.serviceIpv6Cidr, clusterIpFamily: kubernetesNetworkConfig.ipFamily, outpostArn: outpostConfig.outpostArns[0], id: id}' > $DESCRIBE_CLUSTER_RESULT || rc=$? if [[ $rc -eq 0 ]]; then break fi @@ -373,12 +387,26 @@ if [[ -z "${B64_CLUSTER_CA}" ]] || [[ -z "${APISERVER_ENDPOINT}" ]]; then done B64_CLUSTER_CA=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $1}') APISERVER_ENDPOINT=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $3}') - SERVICE_IPV4_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $4}') - SERVICE_IPV6_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $5}') + CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $4}') + OUTPOST_ARN=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $5}') + SERVICE_IPV4_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $6}') + SERVICE_IPV6_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $7}') if [[ -z "${IP_FAMILY}" ]]; then IP_FAMILY=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $2}') fi + + # Automatically detect local cluster in outpost + if [[ -z "${OUTPOST_ARN}" ]] || [[ "${OUTPOST_ARN}" == "None" ]]; then + IS_LOCAL_OUTPOST_DETECTED=false + else + IS_LOCAL_OUTPOST_DETECTED=true + fi + + # If the cluster id is returned from describe cluster, let us use it no matter whether cluster id is passed from option + if [[ ! -z "${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT}" ]] && [[ "${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT}" != "None" ]]; then + CLUSTER_ID=${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT} + fi fi if [[ -z "${IP_FAMILY}" ]] || [[ "${IP_FAMILY}" == "None" ]]; then @@ -389,9 +417,47 @@ fi echo $B64_CLUSTER_CA | base64 -d > $CA_CERTIFICATE_FILE_PATH -sed -i s,CLUSTER_NAME,$CLUSTER_NAME,g /var/lib/kubelet/kubeconfig sed -i s,MASTER_ENDPOINT,$APISERVER_ENDPOINT,g /var/lib/kubelet/kubeconfig sed -i s,AWS_REGION,$AWS_DEFAULT_REGION,g /var/lib/kubelet/kubeconfig + +if [[ -z "$ENABLE_LOCAL_OUTPOST" ]]; then + # Only when "--enable-local-outpost" option is not set explicity on calling bootstrap.sh, it will be assigned with + # - the result of auto-detectection through describe-cluster + # - or "false" when describe-cluster is bypassed. + # This also means if "--enable-local-outpost" option is set explicity, it will override auto-detection result + ENABLE_LOCAL_OUTPOST="${IS_LOCAL_OUTPOST_DETECTED:-false}" +fi + +### To support worker nodes to continue to communicate and connect to local cluster even when the Outpost +### is disconnected from the parent AWS Region, the following specific setup are required: +### - append entries to /etc/hosts with the mappings of control plane host IP address and API server +### domain name. So that the domain name can be resolved to IP addresses locally. +### - use aws-iam-authenticator as bootstrap auth for kubelet TLS bootstrapping which downloads client +### X.509 certificate and generate kubelet kubeconfig file which uses the cleint cert. So that the +### worker node can be authentiacated through X.509 certificate which works for both connected and +#### disconnected state. +if [[ "${ENABLE_LOCAL_OUTPOST}" == "true" ]]; then + ### append to /etc/hosts file with shuffled mappings of "IP address to API server domain name" + DOMAIN_NAME=$(echo "$APISERVER_ENDPOINT" | awk -F/ '{print $3}' | awk -F: '{print $1}') + getent hosts "$DOMAIN_NAME" | shuf >> /etc/hosts + + ### kubelet bootstrap kubeconfig uses aws-iam-authenticator with cluster id to authenticate to cluster + ### - if "aws eks describe-cluster" is bypassed, for local outpost, the value of CLUSTER_NAME parameter will be cluster id. + ### - otherwise, the cluster id will use the id returned by "aws eks describe-cluster". + if [[ -z "${CLUSTER_ID}" ]]; then + echo "Cluster ID is required when local outpost support is enabled" + exit 1 + else + sed -i s,CLUSTER_NAME,$CLUSTER_ID,g /var/lib/kubelet/kubeconfig + + ### use aws-iam-authenticator as bootstrap auth and download X.509 cert used in kubelet kubeconfig + mv /var/lib/kubelet/kubeconfig /var/lib/kubelet/bootstrap-kubeconfig + KUBELET_EXTRA_ARGS="--bootstrap-kubeconfig /var/lib/kubelet/bootstrap-kubeconfig $KUBELET_EXTRA_ARGS" + fi +else + sed -i s,CLUSTER_NAME,$CLUSTER_NAME,g /var/lib/kubelet/kubeconfig +fi + ### kubelet.service configuration if [[ "${IP_FAMILY}" == "ipv6" ]]; then From 0f7d8afce913991242b9c0c5ff76def35a3ae98d Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Tue, 14 Jun 2022 16:19:22 -0700 Subject: [PATCH 272/621] Updating Makefile to update 1.22 build config (#948) Updating the 1.22 binaries including their build date and build version --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cbe8d3833..199a7debc 100644 --- a/Makefile +++ b/Makefile @@ -56,4 +56,4 @@ k8s: validate .PHONY: 1.22 1.22: - $(MAKE) k8s kubernetes_version=1.22.6 kubernetes_build_date=2022-03-09 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.22.9 kubernetes_build_date=2022-06-03 pull_cni_from_github=true From 6bb3edf54235b4c744ec57dc617baa3e89d2e307 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Wed, 15 Jun 2022 16:58:23 -0700 Subject: [PATCH 273/621] Updating CHANGELOG.md for AMI release 06/10 (#950) --- CHANGELOG.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ae212abc3..312a33566 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,39 @@ # Changelog +### AMI Release v20220610 +* amazon-eks-gpu-node-1.22-v20220610 +* amazon-eks-gpu-node-1.21-v20220610 +* amazon-eks-gpu-node-1.20-v20220610 +* amazon-eks-gpu-node-1.19-v20220610 +* amazon-eks-arm64-node-1.22-v20220610 +* amazon-eks-arm64-node-1.21-v20220610 +* amazon-eks-arm64-node-1.20-v20220610 +* amazon-eks-arm64-node-1.19-v20220610 +* amazon-eks-node-1.22-v20220610 +* amazon-eks-node-1.21-v20220610 +* amazon-eks-node-1.20-v20220610 +* amazon-eks-node-1.19-v20220610 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.9/2022-06-03/ +* s3://amazon-eks/1.21.12/2022-05-20/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.196-108.356.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-3.amzn2 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1188.0 + +Notable changes: +* Containerd version upgraded to 1.4.13-3.amzn2 for [CVE-2022-31030](https://alas.aws.amazon.com/cve/html/CVE-2022-31030.html). +* Kernel version upgraded to 5.4.196-108.356.amzn2 for [CVE-2022-0494](https://alas.aws.amazon.com/cve/html/CVE-2022-0494.html), [CVE-2022-0854](https://alas.aws.amazon.com/cve/html/CVE-2022-0854.html), [CVE-2022-1729](https://alas.aws.amazon.com/cve/html/CVE-2022-1729.html), [CVE-2022-1836](https://alas.aws.amazon.com/cve/html/CVE-2022-1836.html), [CVE-2022-28893](https://alas.aws.amazon.com/cve/html/CVE-2022-28893.html), [CVE-2022-29581](https://alas.aws.amazon.com/cve/html/CVE-2022-29581.html) +* Updating the kubelet version for 1.22 from 1.22.6 to 1.22.9 + ### AMI Release v20220526 * amazon-eks-gpu-node-1.22-v20220526 * amazon-eks-gpu-node-1.21-v20220526 From b905f2afc3b14210a8604d44df462ab83298dc1b Mon Sep 17 00:00:00 2001 From: Denis Salamanca Date: Fri, 17 Jun 2022 03:17:07 +1000 Subject: [PATCH 274/621] Increase /var/log/messages limit to 100M (#930) 10M resulted in insufficient logs for investigation in too many cases. Rather than removing the limit entirely, this raises it to include the entire /var/log/messages in most cases while making the resulting archive easy to pass between support and service teams. --- log-collector-script/linux/eks-log-collector.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 84af9bc17..f33381941 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -320,7 +320,7 @@ get_common_logs() { for entry in ${COMMON_LOGS[*]}; do if [[ -e "/var/log/${entry}" ]]; then if [[ "${entry}" == "messages" ]]; then - tail -c 10M /var/log/messages > "${COLLECT_DIR}"/var_log/messages + tail -c 100M /var/log/messages > "${COLLECT_DIR}"/var_log/messages continue fi if [[ "${entry}" == "containers" ]]; then From 69b1c73dce130b4a468564e82e15e62336d7fbc0 Mon Sep 17 00:00:00 2001 From: Pubudu Perera <892600+suharshan@users.noreply.github.com> Date: Fri, 17 Jun 2022 05:26:50 +0800 Subject: [PATCH 275/621] Support packer's ami_regions feature (#752) Allows `ami_regions` to be passed through the `make` line, which will instruct Packer to copy the resulting AMI(s) to additional regions. --- Makefile | 2 +- eks-worker-al2.json | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 199a7debc..ef0f45d98 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ PACKER_BINARY ?= packer -PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date kernel_version docker_version containerd_version runc_version cni_plugin_version source_ami_id source_ami_owners source_ami_filter_name arch instance_type security_group_id additional_yum_repos pull_cni_from_github sonobuoy_e2e_registry +PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date kernel_version docker_version containerd_version runc_version cni_plugin_version source_ami_id source_ami_owners source_ami_filter_name arch instance_type security_group_id additional_yum_repos pull_cni_from_github sonobuoy_e2e_registry ami_regions K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 6c877a52a..1ace29f65 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -35,7 +35,8 @@ "launch_block_device_mappings_volume_size": "4", "ami_users": "", "additional_yum_repos": "", - "sonobuoy_e2e_registry": "" + "sonobuoy_e2e_registry": "", + "ami_regions": "" }, "builders": [ { @@ -74,6 +75,7 @@ "delete_on_termination": true } ], + "ami_regions": "{{user `ami_regions`}}", "ssh_username": "{{user `ssh_username`}}", "ssh_interface": "{{user `ssh_interface`}}", "temporary_security_group_source_cidrs": "{{user `temporary_security_group_source_cidrs`}}", From 86b2ab96b341ce27bdc3503858f665eac963241e Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Thu, 23 Jun 2022 13:26:15 -0700 Subject: [PATCH 276/621] Update Makefile to reflect latest 1.20 binaries (#955) We recently updated the 1.20 binaries with latest build dates and those respective binaries have been uploaded to the s3 bucket. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ef0f45d98..c9be7eb6e 100644 --- a/Makefile +++ b/Makefile @@ -48,7 +48,7 @@ k8s: validate .PHONY: 1.20 1.20: - $(MAKE) k8s kubernetes_version=1.20.11 kubernetes_build_date=2021-11-10 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.20.15 kubernetes_build_date=2022-06-20 pull_cni_from_github=true .PHONY: 1.21 1.21: From 4cf8509fe94a71d9b495b4c65c077a3cf02b519a Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Thu, 23 Jun 2022 17:42:19 -0700 Subject: [PATCH 277/621] Updating changelog for AMI release 20220620 (#956) --- CHANGELOG.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 312a33566..9bd805846 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,41 @@ # Changelog +### AMI Release v20220620 +* amazon-eks-gpu-node-1.22-v20220620 +* amazon-eks-gpu-node-1.21-v20220620 +* amazon-eks-gpu-node-1.20-v20220620 +* amazon-eks-gpu-node-1.19-v20220620 +* amazon-eks-arm64-node-1.22-v20220620 +* amazon-eks-arm64-node-1.21-v20220620 +* amazon-eks-arm64-node-1.20-v20220620 +* amazon-eks-arm64-node-1.19-v20220620 +* amazon-eks-node-1.22-v20220620 +* amazon-eks-node-1.21-v20220620 +* amazon-eks-node-1.20-v20220620 +* amazon-eks-node-1.19-v20220620 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.9/2022-03-09/ +* s3://amazon-eks/1.21.12/2022-05-20/ +* s3://amazon-eks/1.20.15/2022-06-20/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.196-108.356.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-3.amzn2 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1188.0 + +Notable changes: +* Update kubelet binaries for 1.20 +* Support packer's ami_regions feature +* Increase /var/log/messages limit to 100M +* Support local cluster in Outposts +* Adding c6id, m6id, r6id to eni-max-pods.txt + ### AMI Release v20220610 * amazon-eks-gpu-node-1.22-v20220610 * amazon-eks-gpu-node-1.21-v20220610 From 5c74a515ddb7f59f8a94306e7de0a7f9e6f0a1fb Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 30 Jun 2022 15:26:21 -0700 Subject: [PATCH 278/621] Adds release v20220629 to the changelog (#958) --- CHANGELOG.md | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bd805846..461edc503 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,36 @@ # Changelog +### AMI Release v20220629 +* amazon-eks-gpu-node-1.22-v20220629 +* amazon-eks-gpu-node-1.21-v20220629 +* amazon-eks-gpu-node-1.20-v20220629 +* amazon-eks-gpu-node-1.19-v20220629 +* amazon-eks-arm64-node-1.22-v20220629 +* amazon-eks-arm64-node-1.21-v20220629 +* amazon-eks-arm64-node-1.20-v20220629 +* amazon-eks-arm64-node-1.19-v20220629 +* amazon-eks-node-1.22-v20220629 +* amazon-eks-node-1.21-v20220629 +* amazon-eks-node-1.20-v20220629 +* amazon-eks-node-1.19-v20220629 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.9/2022-03-09/ +* s3://amazon-eks/1.21.12/2022-05-20/ +* s3://amazon-eks/1.20.15/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.196-108.356.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-3.amzn2 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1188.0 + +Noted software versions are identical to release v20220620 in the commercial partition. + ### AMI Release v20220620 * amazon-eks-gpu-node-1.22-v20220620 * amazon-eks-gpu-node-1.21-v20220620 From 28bd4f8aa3891b637789059f7aca8466be04531a Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 30 Jun 2022 15:34:11 -0700 Subject: [PATCH 279/621] Fixes 1.20 reference in changelog for v20220629 (#959) --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 461edc503..51c2fe3a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,7 @@ Binaries used to build these AMIs are published: * s3://amazon-eks/1.22.9/2022-03-09/ * s3://amazon-eks/1.21.12/2022-05-20/ -* s3://amazon-eks/1.20.15/2021-11-10/ +* s3://amazon-eks/1.20.15/2022-06-20/ * s3://amazon-eks/1.19.15/2021-11-10/ AMI details: From 4e4cc4ec46213132b0ec8a6035b5f640c1e1bab1 Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Wed, 13 Jul 2022 14:22:30 -0700 Subject: [PATCH 280/621] updating pause-container-version to 3.5 from 3.1 (#965) Co-authored-by: Ravi Sinha --- files/bootstrap.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index fef75348d..cff81924c 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -145,7 +145,7 @@ ENABLE_DOCKER_BRIDGE="${ENABLE_DOCKER_BRIDGE:-false}" API_RETRY_ATTEMPTS="${API_RETRY_ATTEMPTS:-3}" DOCKER_CONFIG_JSON="${DOCKER_CONFIG_JSON:-}" CONTAINERD_CONFIG_FILE="${CONTAINERD_CONFIG_FILE:-}" -PAUSE_CONTAINER_VERSION="${PAUSE_CONTAINER_VERSION:-3.1-eksbuild.1}" +PAUSE_CONTAINER_VERSION="${PAUSE_CONTAINER_VERSION:-3.5}" CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-dockerd}" IP_FAMILY="${IP_FAMILY:-}" SERVICE_IPV6_CIDR="${SERVICE_IPV6_CIDR:-}" From 584f9a56c76fc9e7e8632f6ea45e29d45f2eab63 Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Tue, 19 Jul 2022 15:50:24 -0700 Subject: [PATCH 281/621] adding log-collector-script in EKS AL2 AMI (#967) Co-authored-by: Ravi Sinha --- eks-worker-al2.json | 7 ++++++- scripts/install-worker.sh | 6 ++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 1ace29f65..f44f44521 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -125,7 +125,7 @@ "pause_before": "90s", "remote_folder": "{{ user `remote_folder`}}", "inline": [ - "mkdir -p /tmp/worker/" + "mkdir -p /tmp/worker/log-collector-script/" ] }, { @@ -133,6 +133,11 @@ "source": "{{template_dir}}/files/", "destination": "/tmp/worker/" }, + { + "type": "file", + "source": "{{template_dir}}/log-collector-script/linux/", + "destination": "/tmp/worker/log-collector-script/" + }, { "type": "shell", "remote_folder": "{{ user `remote_folder`}}", diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 4c653a59c..639bbe1bb 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -367,6 +367,12 @@ echo fs.inotify.max_user_instances=8192 | sudo tee -a /etc/sysctl.conf echo vm.max_map_count=524288 | sudo tee -a /etc/sysctl.conf +################################################################################ +### adding log-collector-script ############################################### +################################################################################ +sudo mkdir -p /etc/eks/log-collector-script/ +sudo cp $TEMPLATE_DIR/log-collector-script/eks-log-collector.sh /etc/eks/log-collector-script/ + ################################################################################ ### Cleanup #################################################################### ################################################################################ From 03cc9d7a3eaec8ad145dd73cd7ee21970b4e9d54 Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Thu, 21 Jul 2022 08:41:48 -0700 Subject: [PATCH 282/621] adding log-collector-script directory as dependency dir (#968) Co-authored-by: Ravi Sinha --- ArchiveBuildConfig.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/ArchiveBuildConfig.yaml b/ArchiveBuildConfig.yaml index e6663a0fa..a93376845 100644 --- a/ArchiveBuildConfig.yaml +++ b/ArchiveBuildConfig.yaml @@ -9,6 +9,7 @@ dependencies: dirs: - src: files/ - src: scripts/ + - src: log-collector-script/ files: - src: Makefile - src: eks-worker-al2.json From 5dafe2088499bb3c33b3e56625e9852f2cda2956 Mon Sep 17 00:00:00 2001 From: Steve Hipwell Date: Mon, 25 Jul 2022 19:21:16 +0100 Subject: [PATCH 283/621] Use systemd as the containerd cgroup driver (#717) Authored-by: Steve Hipwell --- files/bootstrap.sh | 17 +++++++++-------- files/containerd-config.toml | 3 +++ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index cff81924c..1cc5ca6c3 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -402,7 +402,7 @@ if [[ -z "${B64_CLUSTER_CA}" ]] || [[ -z "${APISERVER_ENDPOINT}" ]]; then else IS_LOCAL_OUTPOST_DETECTED=true fi - + # If the cluster id is returned from describe cluster, let us use it no matter whether cluster id is passed from option if [[ ! -z "${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT}" ]] && [[ "${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT}" != "None" ]]; then CLUSTER_ID=${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT} @@ -421,20 +421,20 @@ sed -i s,MASTER_ENDPOINT,$APISERVER_ENDPOINT,g /var/lib/kubelet/kubeconfig sed -i s,AWS_REGION,$AWS_DEFAULT_REGION,g /var/lib/kubelet/kubeconfig if [[ -z "$ENABLE_LOCAL_OUTPOST" ]]; then - # Only when "--enable-local-outpost" option is not set explicity on calling bootstrap.sh, it will be assigned with + # Only when "--enable-local-outpost" option is not set explicity on calling bootstrap.sh, it will be assigned with # - the result of auto-detectection through describe-cluster # - or "false" when describe-cluster is bypassed. # This also means if "--enable-local-outpost" option is set explicity, it will override auto-detection result - ENABLE_LOCAL_OUTPOST="${IS_LOCAL_OUTPOST_DETECTED:-false}" + ENABLE_LOCAL_OUTPOST="${IS_LOCAL_OUTPOST_DETECTED:-false}" fi -### To support worker nodes to continue to communicate and connect to local cluster even when the Outpost +### To support worker nodes to continue to communicate and connect to local cluster even when the Outpost ### is disconnected from the parent AWS Region, the following specific setup are required: -### - append entries to /etc/hosts with the mappings of control plane host IP address and API server +### - append entries to /etc/hosts with the mappings of control plane host IP address and API server ### domain name. So that the domain name can be resolved to IP addresses locally. -### - use aws-iam-authenticator as bootstrap auth for kubelet TLS bootstrapping which downloads client -### X.509 certificate and generate kubelet kubeconfig file which uses the cleint cert. So that the -### worker node can be authentiacated through X.509 certificate which works for both connected and +### - use aws-iam-authenticator as bootstrap auth for kubelet TLS bootstrapping which downloads client +### X.509 certificate and generate kubelet kubeconfig file which uses the cleint cert. So that the +### worker node can be authentiacated through X.509 certificate which works for both connected and #### disconnected state. if [[ "${ENABLE_LOCAL_OUTPOST}" == "true" ]]; then ### append to /etc/hosts file with shuffled mappings of "IP address to API server domain name" @@ -547,6 +547,7 @@ EOF if [[ -n "$CONTAINERD_CONFIG_FILE" ]]; then sudo cp -v $CONTAINERD_CONFIG_FILE /etc/eks/containerd/containerd-config.toml fi + echo "$(jq '.cgroupDriver="systemd"' $KUBELET_CONFIG)" > $KUBELET_CONFIG sudo sed -i s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g /etc/eks/containerd/containerd-config.toml sudo cp -v /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service diff --git a/files/containerd-config.toml b/files/containerd-config.toml index 31404027f..8a668ce84 100644 --- a/files/containerd-config.toml +++ b/files/containerd-config.toml @@ -14,6 +14,9 @@ sandbox_image = "SANDBOX_IMAGE" [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] runtime_type = "io.containerd.runc.v2" +[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] +SystemdCgroup = true + [plugins."io.containerd.grpc.v1.cri".cni] bin_dir = "/opt/cni/bin" conf_dir = "/etc/cni/net.d" From 4bc398e73ae17729114bea725ba669eb499e6543 Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 26 Jul 2022 12:22:43 -0700 Subject: [PATCH 284/621] Adds instructions for PR testing (#970) --- CONTRIBUTING.md | 66 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7e0b7332b..c22acf627 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -39,6 +39,72 @@ To send us a pull request, please: GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). +### Testing Changes + +When submitting PRs, we want to verify that there are no regressions in the AMI with the new changes. EKS runs various tests before publishing new Amazon EKS optimized Amazon Linux AMIs, which will ensure the highest level of confidence that there are no regressions in officially published AMIs. To maintain the health of this repo, we need to do some basic validation prior to merging PRs. Eventually, we hope to automate this process. Until then, here are the basic steps that we should take before merging PRs. + +**Test #1: Verify that building AMIs still works** + +If your change is relevant to a specific Kubernetes version, build all AMIs that apply. Otherwise, just choose the latest available Kubernetes version. + +``` +# Configure AWS credentials +make 1.22 +``` + +**Test #2: Create a nodegroup with new AMI and confirm it joins a cluster** + +Once the AMI is built, we need to verify that it can join a cluster. You can use `eksctl`, or your method of choice, to create a cluster and add nodes to it using the AMI you built. Below is an example config file. + +`cluster.yaml` + +``` +apiVersion: eksctl.io/v1alpha5 +kind: ClusterConfig + +metadata: + name: basic-cluster + region: us-west-2 + version: '1.22' + +nodeGroups: + - name: ng + instanceType: m5.large + ami: [INSERT_AMI_ID] + overrideBootstrapCommand: | + #!/bin/bash + /etc/eks/bootstrap.sh basic-cluster +``` + +Then run: + +``` +eksctl create cluster -f cluster.yaml +``` + +`eksctl` will verify that the nodes join the cluster before completing. + +**Test #3: Verify that the nodes are Kubernetes conformant** + +You can use [sonobuoy](https://sonobuoy.io/) to run conformance tests on the cluster you've create in *Test #2*. You should only include nodes with the custom AMI built in *Test #1*. You must install `sonobuoy` locally before running. + +``` +sonobuoy run --wait +``` + +By default, `sonobuoy` will run `e2e` and `systemd-logs`. This step may take multiple hours to run. + +**Test #4: [Optional] Test your specific PR changes** + +If your PR has changes that require additional, custom validation, provide the appropriate steps to verify that the changes don't cause regressions and behave as expected. Document the steps taken in the CR. + +**Clean Up** + +Delete the cluster: + +``` +eksctl delete cluster -f cluster.yaml +``` ## Finding contributions to work on Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels ((enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/aws-samples/amazon-eks-ami/labels/help%20wanted) issues is a great place to start. From e4dd6ad98200d1f457f7f8043ef520cd9e4eac5e Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 26 Jul 2022 12:50:23 -0700 Subject: [PATCH 285/621] Update PR template to reference testing steps (#971) --- .github/PULL_REQUEST_TEMPLATE.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index e779d6499..09b7a68ad 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,8 +1,14 @@ -*Issue #, if available:* +**Issue #, if available:** -*Description of changes:* +**Description of changes:** By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. + +**Testing Done** + + + +*[See this guide for recommended testing for PRs.](https://github.com/awslabs/amazon-eks-ami/blob/master/CONTRIBUTING.md#testing-changes) Some tests may not apply. Completing tests and providing additional validation steps are not required, but it is recommended and may reduce review time and time to merge.* From ca85d94b0bc8dc03bcaa569cd983d06e20c72b8d Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Fri, 29 Jul 2022 23:54:41 -0700 Subject: [PATCH 286/621] Updating changelog for AMI release v20220725 (#973) Co-authored-by: Ravi Sinha --- CHANGELOG.md | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 51c2fe3a4..df4343055 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,40 @@ # Changelog +### AMI Release v20220725 +* amazon-eks-gpu-node-1.22-v20220725 +* amazon-eks-gpu-node-1.21-v20220725 +* amazon-eks-gpu-node-1.20-v20220725 +* amazon-eks-gpu-node-1.19-v20220725 +* amazon-eks-arm64-node-1.22-v20220725 +* amazon-eks-arm64-node-1.21-v20220725 +* amazon-eks-arm64-node-1.20-v20220725 +* amazon-eks-arm64-node-1.19-v20220725 +* amazon-eks-node-1.22-v20220725 +* amazon-eks-node-1.21-v20220725 +* amazon-eks-node-1.20-v20220725 +* amazon-eks-node-1.19-v20220725 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.9/2022-03-09/ +* s3://amazon-eks/1.21.12/2022-05-20/ +* s3://amazon-eks/1.20.15/2022-06-20/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.204-113.362.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-3.amzn2 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1575.0 + +Notable changes: +* Updating pause-container version from 3.1 to 3.5 +* Adding log-collector-script to the AMI +* Kernel version upgraded to 5.4.204-113.362.amzn2 for [CVE-2022-0494](https://alas.aws.amazon.com/cve/html/CVE-2022-0494.html) [CVE-2022-0812](https://alas.aws.amazon.com/cve/html/CVE-2022-0812.html) [CVE-2022-1012](https://alas.aws.amazon.com/cve/html/CVE-2022-1012.html) [CVE-2022-1184](https://alas.aws.amazon.com/cve/html/CVE-2022-1184.html) [CVE-2022-1966](https://alas.aws.amazon.com/cve/html/CVE-2022-1966.html) [CVE-2022-32250](https://alas.aws.amazon.com/cve/html/CVE-2022-32250.html) [CVE-2022-32296](https://alas.aws.amazon.com/cve/html/CVE-2022-32296.html) [CVE-2022-32981](https://alas.aws.amazon.com/cve/html/CVE-2022-32981.html) + + ### AMI Release v20220629 * amazon-eks-gpu-node-1.22-v20220629 * amazon-eks-gpu-node-1.21-v20220629 From 7dc3326f2a529e34e1aab765d507df1c8db58c8e Mon Sep 17 00:00:00 2001 From: icarthick <93390344+icarthick@users.noreply.github.com> Date: Tue, 2 Aug 2022 14:38:26 -0500 Subject: [PATCH 287/621] =?UTF-8?q?Update=20Readme=20file=20to=20have=20cl?= =?UTF-8?q?ear=20instructions=20about=20how=20to=20build=20usin=E2=80=A6?= =?UTF-8?q?=20(#794)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update Readme file to have clear instructions about how to build using different versions of Kubernetes binaries * Change readme text to exclude older k8s versions --- README.md | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 37cbc9c3f..f3bc37212 100644 --- a/README.md +++ b/README.md @@ -21,13 +21,71 @@ when building this AMI. ## Building the AMI -A Makefile is provided to build the AMI, but it is just a small wrapper around +A Makefile is provided to build the Amazon EKS Worker AMI, but it is just a small wrapper around invoking Packer directly. You can initiate the build process by running the following command in the root of this repository: ```bash make ``` +The Makefile chooses a particular kubelet binary to use per kubernetes version which you can [view here](Makefile). +To build an Amazon EKS Worker AMI for a particular Kubernetes version run the following command +```bash +make 1.21 ## Build a Amazon EKS Worker AMI for k8s 1.21 +``` +### Building against other versions of Kubernetes binaries +To build an Amazon EKS Worker AMI with other versions of Kubernetes that are not listed above run the following AWS Command +Line Interface (AWS CLI) commands to obtain values for KUBERNETES_VERSION, KUBERNETES_BUILD_DATE, PLATFORM, ARCH from S3 +```bash +#List of all avalable Kuberenets Versions: +aws s3 ls s3://amazon-eks +KUBERNETES_VERSION=1.17.9 # Chose a version and set the variable + +#List of all builds for the specified Kubernetes Version: +aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/ +KUBERNETES_BUILD_DATE=2020-08-04 # Chose a date and set the variable + +#List of all platforms available for the selected Kubernetes Version and build date +aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/2020-08-04/bin/ +PLATFORM=linux # Chose a platform and set the variable + +#List of all architectures for the selected Kubernetes Version, build date and platform +aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/2020-08-04/bin/linux/ +ARCH=amd64 #Chose an architecture and set the variable +``` +Run the following command to build an Amazon EKS Worker AMI based on the chosen parameters in the previous step +```bash +make k8s \ + kubernetes_version=$KUBERNETES_VERSION \ + kubernetes_build_date=$KUBERNETES_BUILD_DATE \ + arch=$ARCH +``` + +### Providing your own Kubernetes Binaries + +By default, binaries are downloaded from the Amazon EKS public Amazon Simple Storage Service (Amazon S3) +bucket amazon-eks in us-west-2. You can instead choose to provide your own version of Kubernetes binaries to be used. To use your own binaries + +1. Copy the binaries to your own S3 bucket using the AWS CLI. Here is an example that uses Kubelet binary +```bash + aws s3 cp kubelet s3://my-custom-bucket/kubernetes_version/kubernetes_build_date/bin/linux/arch/kubelet +``` +**Note**: Replace my-custom-bucket, amazon-eks, kubernetes_version, kubernetes_build_date, and arch with your values. + +**Important**: You must provide all the binaries listed in the default amazon-eks bucket for a specific kubernetes_version, kubernetes_build_date, and arch combination. These binaries must be accessible through AWS Identity and Access Management (IAM) credentials configured in the Install and configure HashiCorp Packer section. + +2. Run the following command to start the build process to use your own Kubernetes binaries +```bash +make k8s \ + binary_bucket_name=my-custom-bucket \ + binary_bucket_region=eu-west-1 \ + kubernetes_version=1.14.9 \ + kubernetes_build_date=2020-01-22 +``` +**Note**: Confirm that the binary_bucket_name, binary_bucket_region, kubernetes_version, and kubernetes_build_date parameters match the path to your binaries in Amazon S3. + + + The Makefile runs Packer with the `eks-worker-al2.json` build specification template and the [amazon-ebs](https://www.packer.io/docs/builders/amazon-ebs.html) From 3d44534f1bf4a4f45e4d9d266f7c1e36562ad707 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 5 Aug 2022 14:12:20 -0700 Subject: [PATCH 288/621] Generate version-info.json during build (#974) --- eks-worker-al2.json | 12 ++++++++++++ scripts/generate-version-info.sh | 20 ++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 scripts/generate-version-info.sh diff --git a/eks-worker-al2.json b/eks-worker-al2.json index f44f44521..521c1b7de 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -174,6 +174,18 @@ "environment_vars": [ "KERNEL_VERSION={{user `kernel_version`}}" ] + }, + { + "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", + "script": "{{template_dir}}/scripts/generate-version-info.sh", + "execute_command": "chmod +x {{ .Path }}; {{ .Path }} /tmp/version-info.json" + }, + { + "type": "file", + "direction": "download", + "source": "/tmp/version-info.json", + "destination": "{{ user `ami_name` }}-version-info.json" } ], "post-processors": [ diff --git a/scripts/generate-version-info.sh b/scripts/generate-version-info.sh new file mode 100644 index 000000000..fa7baa196 --- /dev/null +++ b/scripts/generate-version-info.sh @@ -0,0 +1,20 @@ +#!/bin/sh + +# generates a JSON file containing version information for the software in this AMI + +set -o errexit +set -o pipefail + +if [ "$#" -ne 1 ] +then + echo "usage: $0 OUTPUT_FILE" + exit 1 +fi + +OUTPUT_FILE="$1" + +# packages +rpm --query --all --queryformat '\{"%{NAME}": "%{VERSION}-%{RELEASE}"\}\n' | jq --slurp --sort-keys 'add | {packages:(.)}' > "$OUTPUT_FILE" + +# binaries +echo $(jq ".binaries.kubelet = \"$(kubelet --version | awk '{print $2}')\"" $OUTPUT_FILE) > $OUTPUT_FILE From c847bba4fea9773d40446155d8e670192500f133 Mon Sep 17 00:00:00 2001 From: guessi Date: Tue, 9 Aug 2022 01:05:35 +0800 Subject: [PATCH 289/621] Extend support for containerd log collecting (#964) --- .../linux/eks-log-collector.sh | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index f33381941..0e64f2278 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -559,11 +559,25 @@ get_containerd_info() { warning "The Containerd daemon is not running." fi - ok + ok + + try "Collect Containerd running information" + if ! command -v ctr >/dev/null 2>&1; then + warning "ctr not installed" + else + timeout 75 ctr version > "${COLLECT_DIR}"/containerd/containerd-version.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr namespaces list > "${COLLECT_DIR}"/containerd/containerd-namespaces.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr --namespace k8s.io images list > "${COLLECT_DIR}"/containerd/containerd-images.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr --namespace k8s.io containers list > "${COLLECT_DIR}"/containerd/containerd-containers.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr --namespace k8s.io tasks list > "${COLLECT_DIR}"/containerd/containerd-tasks.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr --namespace k8s.io plugins list > "${COLLECT_DIR}"/containerd/containerd-plugins.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + fi + + ok } get_sandboxImage_info() { - try "Collect sandbox-image daemon information" + try "Collect sandbox-image daemon information" timeout 75 journalctl -u sandbox-image > "${COLLECT_DIR}"/sandbox-image/sandbox-image-log.txt 2>&1 || echo -e "\tTimed out, ignoring \"sandbox-image info output \" " ok } From 56bcf1946d814b0d56bfd4c09a5688e95c8c0268 Mon Sep 17 00:00:00 2001 From: siddharth Date: Mon, 8 Aug 2022 23:52:06 +0530 Subject: [PATCH 290/621] eks-log-collector.sh: add timeout to df command. (#949) df hangs in some scenarios like https://github.com/kubernetes-sigs/aws-efs-csi-driver/issues/616 --- log-collector-script/linux/eks-log-collector.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 0e64f2278..5469babb0 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -210,7 +210,7 @@ is_diskfull() { # 1.5GB in KB threshold=1500000 - result=$(df / | grep --invert-match "Filesystem" | awk '{ print $4 }') + result=$(timeout 75 df / | grep --invert-match "Filesystem" | awk '{ print $4 }') # If "result" is less than or equal to "threshold", fail. if [[ "${result}" -le "${threshold}" ]]; then @@ -278,7 +278,7 @@ get_mounts_info() { try "collect mount points and volume information" mount > "${COLLECT_DIR}"/storage/mounts.txt echo >> "${COLLECT_DIR}"/storage/mounts.txt - df --human-readable >> "${COLLECT_DIR}"/storage/mounts.txt + timeout 75 df --human-readable >> "${COLLECT_DIR}"/storage/mounts.txt lsblk > "${COLLECT_DIR}"/storage/lsblk.txt lvs > "${COLLECT_DIR}"/storage/lvs.txt pvs > "${COLLECT_DIR}"/storage/pvs.txt From 5cb2ba8d69fcc8c2e692120c830854a3a0891ef7 Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Mon, 8 Aug 2022 13:25:30 -0500 Subject: [PATCH 291/621] record inode usage (#978) This will allow diagnosing issues where we run out of inodes instead of disk space more easily. --- log-collector-script/linux/eks-log-collector.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 5469babb0..1241f4529 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -279,6 +279,7 @@ get_mounts_info() { mount > "${COLLECT_DIR}"/storage/mounts.txt echo >> "${COLLECT_DIR}"/storage/mounts.txt timeout 75 df --human-readable >> "${COLLECT_DIR}"/storage/mounts.txt + timeout 75 df --inodes >> "${COLLECT_DIR}"/storage/inodes.txt lsblk > "${COLLECT_DIR}"/storage/lsblk.txt lvs > "${COLLECT_DIR}"/storage/lvs.txt pvs > "${COLLECT_DIR}"/storage/pvs.txt From 1cf9757299499daf8d7a62dcad8a18feb5cc2d38 Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Mon, 8 Aug 2022 13:25:48 -0500 Subject: [PATCH 292/621] copy AWS EBS/EFS driver logs (#979) --- log-collector-script/linux/eks-log-collector.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 1241f4529..fd0290cf3 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -329,6 +329,8 @@ get_common_logs() { cp --force --dereference --recursive /var/log/containers/kube-system_cni-metrics-helper* "${COLLECT_DIR}"/var_log/ 2>/dev/null cp --force --dereference --recursive /var/log/containers/coredns-* "${COLLECT_DIR}"/var_log/ 2>/dev/null cp --force --dereference --recursive /var/log/containers/kube-proxy* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --dereference --recursive /var/log/containers/ebs-csi* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --dereference --recursive /var/log/containers/efs-csi* "${COLLECT_DIR}"/var_log/ 2>/dev/null continue fi if [[ "${entry}" == "pods" ]]; then @@ -336,6 +338,8 @@ get_common_logs() { cp --force --dereference --recursive /var/log/pods/kube-system_cni-metrics-helper* "${COLLECT_DIR}"/var_log/ 2>/dev/null cp --force --dereference --recursive /var/log/pods/kube-system_coredns* "${COLLECT_DIR}"/var_log/ 2>/dev/null cp --force --dereference --recursive /var/log/pods/kube-system_kube-proxy* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_ebs-csi-* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_efs-csi-* "${COLLECT_DIR}"/var_log/ 2>/dev/null continue fi cp --force --recursive --dereference /var/log/"${entry}" "${COLLECT_DIR}"/var_log/ 2>/dev/null From ab6845498f5c70f230fa997fe5809d531d7056c8 Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Mon, 8 Aug 2022 13:26:05 -0500 Subject: [PATCH 293/621] eks-log-collector.sh: record a ps listing including threads (#980) * record a ps listing including threads * print headers after every page of ps output --- log-collector-script/linux/eks-log-collector.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index fd0290cf3..429e2029f 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -548,7 +548,8 @@ get_system_services() { esac timeout 75 top -b -n 1 > "${COLLECT_DIR}"/system/top.txt 2>&1 - timeout 75 ps fauxwww > "${COLLECT_DIR}"/system/ps.txt 2>&1 + timeout 75 ps fauxwww --headers > "${COLLECT_DIR}"/system/ps.txt 2>&1 + timeout 75 ps -eTF --headers > "${COLLECT_DIR}"/system/ps-threads.txt 2>&1 timeout 75 netstat -plant > "${COLLECT_DIR}"/system/netstat.txt 2>&1 ok From b6cdc39e2dbf652f3675a941f34a8c913fef4683 Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Mon, 8 Aug 2022 13:26:36 -0500 Subject: [PATCH 294/621] eks-log-collector.sh: collect information regarding throttled processes (#975) * collect information regarding throttled processes Collects process throttling information from /sys/fs/cgroup. In the event of detected CPU throttling, the log will show the amount of throttling, the pod and container UIDs if applicable and the processes being throttled: $ cat system/cpu_throttling.txt /sys/fs/cgroup/cpu,cpuacct/kubepods/burstable/poda207af95-18de-463f-a3f6-181f78900fc7/071f5ea890c12c15f98a8fdc3c60cd0fd31df4788194effd9add9713442adb23/cpu.stat nr_periods 104065 nr_throttled 103964 throttled_time 18601742785642 12248 ? SLs 0:00 /bin/stress-ng --cpu 0 --metrics-brief -v 12325 ? R 10:39 /bin/stress-ng --cpu 0 --metrics-brief -v 12326 ? R 11:01 /bin/stress-ng --cpu 0 --metrics-brief -v 12327 ? R 11:04 /bin/stress-ng --cpu 0 --metrics-brief -v 12328 ? R 10:36 /bin/stress-ng --cpu 0 --metrics-brief -v * fix indentation --- .../linux/eks-log-collector.sh | 33 ++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 429e2029f..7018d1dfa 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -259,6 +259,7 @@ collect() { get_cni_config get_docker_logs get_sandboxImage_info + get_cpu_throttled_processes } pack() { @@ -589,7 +590,7 @@ get_sandboxImage_info() { } get_docker_info() { - try "collect Docker daemon information" + try "Collect Docker daemon information" if [[ "$(pgrep -o dockerd)" -ne 0 ]]; then timeout 75 docker info > "${COLLECT_DIR}"/docker/docker-info.txt 2>&1 || echo -e "\tTimed out, ignoring \"docker info output \" " @@ -604,6 +605,36 @@ get_docker_info() { ok } + +get_cpu_throttled_processes() { + try "Collect CPU Throttled Process Information" + readonly THROTTLE_LOG="${COLLECT_DIR}"/system/cpu_throttling.txt + command find /sys/fs/cgroup -iname "cpu.stat" -print0 | while IFS= read -r -d '' cs + do + # look for a non-zero nr_throttled value + if grep -q "nr_throttled [1-9]" "${cs}"; then + pids=${cs/cpu.stat/cgroup.procs} + lines=$(wc -l < "${pids}") + # ignore if no PIDs are listed + if [ "${lines}" -eq "0" ] ; then + continue + fi + + echo "$cs" >> "${THROTTLE_LOG}" + cat "${cs}" >> "${THROTTLE_LOG}" + while IFS= read -r pid + do + command ps ax | grep "^${pid}" >> "${THROTTLE_LOG}" + done < "${pids}" + echo "" >> "${THROTTLE_LOG}" + fi + done + if [ ! -e "${THROTTLE_LOG}" ]; then + echo "No CPU Throttling Found" >> "${THROTTLE_LOG}" + fi + ok +} + # ----------------------------------------------------------------------------- # Entrypoint parse_options "$@" From debf1a9b0f94f942bd1eeda4ab4cdf6e0cc8edca Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 8 Aug 2022 13:58:13 -0700 Subject: [PATCH 295/621] Add timestamps to packer console output (#982) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c9be7eb6e..6e8324856 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ validate: .PHONY: k8s k8s: validate @echo "$(T_GREEN)Building AMI for version $(T_YELLOW)$(kubernetes_version)$(T_GREEN) on $(T_YELLOW)$(arch)$(T_RESET)" - $(PACKER_BINARY) build $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json + $(PACKER_BINARY) build -timestamp-ui $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html From dc22b732904141b389934d32ff2d1922c4929e96 Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Mon, 8 Aug 2022 21:31:49 -0500 Subject: [PATCH 296/621] eks-log-collector.sh: identify IO throttled processes (#981) * identify IO throttled processes * bump log collector version * update filename and version --- log-collector-script/linux/eks-log-collector.sh | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 7018d1dfa..561889915 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -20,7 +20,7 @@ export LANG="C" export LC_ALL="C" # Global options -readonly PROGRAM_VERSION="0.6.2" +readonly PROGRAM_VERSION="0.7.0" readonly PROGRAM_SOURCE="https://github.com/awslabs/amazon-eks-ami/blob/master/log-collector-script/" readonly PROGRAM_NAME="$(basename "$0" .sh)" readonly PROGRAM_DIR="/opt/log-collector" @@ -260,6 +260,7 @@ collect() { get_docker_logs get_sandboxImage_info get_cpu_throttled_processes + get_io_throttled_processes } pack() { @@ -552,6 +553,8 @@ get_system_services() { timeout 75 ps fauxwww --headers > "${COLLECT_DIR}"/system/ps.txt 2>&1 timeout 75 ps -eTF --headers > "${COLLECT_DIR}"/system/ps-threads.txt 2>&1 timeout 75 netstat -plant > "${COLLECT_DIR}"/system/netstat.txt 2>&1 + timeout 75 cat /proc/stat > "${COLLECT_DIR}"/system/procstat.txt 2>&1 + timeout 75 cat /proc/[0-9]*/stat > "${COLLECT_DIR}"/system/allprocstat.txt 2>&1 ok } @@ -605,7 +608,6 @@ get_docker_info() { ok } - get_cpu_throttled_processes() { try "Collect CPU Throttled Process Information" readonly THROTTLE_LOG="${COLLECT_DIR}"/system/cpu_throttling.txt @@ -635,6 +637,16 @@ get_cpu_throttled_processes() { ok } +get_io_throttled_processes() { + try "Collect IO Throttled Process Information" + readonly IO_THROTTLE_LOG="${COLLECT_DIR}"/system/io_throttling.txt + command echo -e "PID Name Block IO Delay (centisconds)" > ${IO_THROTTLE_LOG} + # column 42 is Aggregated block I/O delays, measured in centiseconds so we capture the non-zero block + # I/O delays. + command cut -d" " -f 1,2,42 /proc/[0-9]*/stat | sort -n -k+3 -r | grep -v 0$ >> ${IO_THROTTLE_LOG} + ok +} + # ----------------------------------------------------------------------------- # Entrypoint parse_options "$@" From 918b1df72a1d8dfd8926b878a0dcd33c05a9d587 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Tue, 9 Aug 2022 12:54:53 -0700 Subject: [PATCH 297/621] Adding make command to support 1.23 AMI creation (#983) --- Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile b/Makefile index 6e8324856..dd229ba75 100644 --- a/Makefile +++ b/Makefile @@ -57,3 +57,7 @@ k8s: validate .PHONY: 1.22 1.22: $(MAKE) k8s kubernetes_version=1.22.9 kubernetes_build_date=2022-06-03 pull_cni_from_github=true + +.PHONY: 1.23 +1.23: + $(MAKE) k8s kubernetes_version=1.23.7 kubernetes_build_date=2022-06-29 pull_cni_from_github=true From ce469680ded2a9b55facfc8e30b9a9ce4c660cdf Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Tue, 9 Aug 2022 13:15:01 -0700 Subject: [PATCH 298/621] Update CHANGELOG.md for AMI release 20220802 (#984) --- CHANGELOG.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index df4343055..4820885ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,41 @@ # Changelog +### AMI Release v20220802 +* amazon-eks-gpu-node-1.23-v20220802 +* amazon-eks-gpu-node-1.22-v20220802 +* amazon-eks-gpu-node-1.21-v20220802 +* amazon-eks-gpu-node-1.20-v20220802 +* amazon-eks-gpu-node-1.19-v20220802 +* amazon-eks-arm64-node-1.23-v20220802 +* amazon-eks-arm64-node-1.22-v20220802 +* amazon-eks-arm64-node-1.21-v20220802 +* amazon-eks-arm64-node-1.20-v20220802 +* amazon-eks-arm64-node-1.19-v20220802 +* amazon-eks-node-1.23-v20220802 +* amazon-eks-node-1.22-v20220802 +* amazon-eks-node-1.21-v20220802 +* amazon-eks-node-1.20-v20220802 +* amazon-eks-node-1.19-v20220802 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.23.7/2022-06-29/ +* s3://amazon-eks/1.22.9/2022-03-09/ +* s3://amazon-eks/1.21.12/2022-05-20/ +* s3://amazon-eks/1.20.15/2022-06-20/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.204-113.362.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-3.amzn2 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1575.0-1.amzn2 + +Notable changes: +* Release 1.23 AMIs publicly + ### AMI Release v20220725 * amazon-eks-gpu-node-1.22-v20220725 * amazon-eks-gpu-node-1.21-v20220725 From 090678eafabd0c9f03aa18d8dad8bf46361b0196 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 12 Aug 2022 16:37:14 -0700 Subject: [PATCH 299/621] Update kubelet 1.20-1.23 build dates to `2022-07-27` (#987) --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index dd229ba75..8e9c3b8ca 100644 --- a/Makefile +++ b/Makefile @@ -48,16 +48,16 @@ k8s: validate .PHONY: 1.20 1.20: - $(MAKE) k8s kubernetes_version=1.20.15 kubernetes_build_date=2022-06-20 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.20.15 kubernetes_build_date=2022-07-27 pull_cni_from_github=true .PHONY: 1.21 1.21: - $(MAKE) k8s kubernetes_version=1.21.12 kubernetes_build_date=2022-05-20 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.21.12 kubernetes_build_date=2022-07-27 pull_cni_from_github=true .PHONY: 1.22 1.22: - $(MAKE) k8s kubernetes_version=1.22.9 kubernetes_build_date=2022-06-03 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.22.9 kubernetes_build_date=2022-07-27 pull_cni_from_github=true .PHONY: 1.23 1.23: - $(MAKE) k8s kubernetes_version=1.23.7 kubernetes_build_date=2022-06-29 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.23.7 kubernetes_build_date=2022-07-27 pull_cni_from_github=true From 07da85734df21518c9d8472c7820ce194a02cdec Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 15 Aug 2022 08:58:10 -0700 Subject: [PATCH 300/621] Fix kubelet versions for build date 2022-07-27 (#988) This was missed when build dates were updated to 2022-07-27. --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 8e9c3b8ca..b77731722 100644 --- a/Makefile +++ b/Makefile @@ -52,12 +52,12 @@ k8s: validate .PHONY: 1.21 1.21: - $(MAKE) k8s kubernetes_version=1.21.12 kubernetes_build_date=2022-07-27 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.21.14 kubernetes_build_date=2022-07-27 pull_cni_from_github=true .PHONY: 1.22 1.22: - $(MAKE) k8s kubernetes_version=1.22.9 kubernetes_build_date=2022-07-27 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.22.12 kubernetes_build_date=2022-07-27 pull_cni_from_github=true .PHONY: 1.23 1.23: - $(MAKE) k8s kubernetes_version=1.23.7 kubernetes_build_date=2022-07-27 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.23.9 kubernetes_build_date=2022-07-27 pull_cni_from_github=true From 2bd132dfb3ab604c4156a9c82d2a0948b5e2b8d5 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 16 Aug 2022 15:08:56 -0700 Subject: [PATCH 301/621] Update CHANGELOG.md (#994) --- CHANGELOG.md | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4820885ef..c5b72d87f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,43 @@ # Changelog +### AMI Release v20220811 +* amazon-eks-gpu-node-1.23-v20220811 +* amazon-eks-gpu-node-1.22-v20220811 +* amazon-eks-gpu-node-1.21-v20220811 +* amazon-eks-gpu-node-1.20-v20220811 +* amazon-eks-gpu-node-1.19-v20220811 +* amazon-eks-arm64-node-1.23-v20220811 +* amazon-eks-arm64-node-1.22-v20220811 +* amazon-eks-arm64-node-1.21-v20220811 +* amazon-eks-arm64-node-1.20-v20220811 +* amazon-eks-arm64-node-1.19-v20220811 +* amazon-eks-node-1.23-v20220811 +* amazon-eks-node-1.22-v20220811 +* amazon-eks-node-1.21-v20220811 +* amazon-eks-node-1.20-v20220811 +* amazon-eks-node-1.19-v20220811 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.23.9/2022-07-27/ +* s3://amazon-eks/1.22.12/2022-07-27/ +* s3://amazon-eks/1.21.14/2022-07-27/ +* s3://amazon-eks/1.20.15/2022-07-27/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.204-113.362.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-3.amzn2 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1575.0-1.amzn2 + +Notable changes: +- Kubelet binaries updated, including a backport of [#109676](https://github.com/kubernetes/kubernetes/pull/109676). +- When using `containerd` as the container runtime, `systemd` will now be used as the cgroup driver. For more information, see [the Kubernetes documentation](https://kubernetes.io/docs/tasks/administer-cluster/kubeadm/configure-cgroup-driver/). +- Updated `aws-neuron-dkms` to `2.3.26` to address [a security issue](https://awsdocs-neuron.readthedocs-hosted.com/en/latest/release-notes/neuron-driver.html#ndriver-2-3-26-0). This is a recommended upgrade for all users of the GPU AMI. + ### AMI Release v20220802 * amazon-eks-gpu-node-1.23-v20220802 * amazon-eks-gpu-node-1.22-v20220802 From ad1ae43865d7d03a9ba83268cc34f5a2fb5bf098 Mon Sep 17 00:00:00 2001 From: Jim DeWaard Date: Tue, 16 Aug 2022 17:27:54 -0500 Subject: [PATCH 302/621] Update container-runtime package versions (#992) --- eks-worker-al2.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 521c1b7de..b53a6566f 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -13,9 +13,9 @@ "kubernetes_version": null, "kubernetes_build_date": null, "kernel_version": "", - "docker_version": "20.10.13-2.amzn2", - "containerd_version": "1.4.13-3.amzn2", - "runc_version": "1.0.3-2.amzn2", + "docker_version": "20.10.17-1.amzn2", + "containerd_version": "1.6.6-1.amzn2", + "runc_version": "1.1.3-1.amzn2", "cni_plugin_version": "v0.8.6", "pull_cni_from_github": "true", "source_ami_id": "", From b0012e8031a5c0f8839a7e0f33fc6e5651da35ab Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 17 Aug 2022 08:30:27 -0700 Subject: [PATCH 303/621] Removes unmaintained CFN template and modernizes README (#995) --- README.md | 41 +----- amazon-eks-nodegroup.yaml | 301 -------------------------------------- 2 files changed, 6 insertions(+), 336 deletions(-) delete mode 100644 amazon-eks-nodegroup.yaml diff --git a/README.md b/README.md index f3bc37212..b503b29b8 100644 --- a/README.md +++ b/README.md @@ -15,8 +15,7 @@ For more information, see [Authentication](https://www.packer.io/docs/builders/a in the Packer documentation. **Note** -The default instance type to build this AMI is an `m4.large` and does not -qualify for the AWS free tier. You are charged for any instances created +The default instance type to build this AMI does not qualify for the AWS free tier. You are charged for any instances created when building this AMI. ## Building the AMI @@ -31,7 +30,7 @@ make The Makefile chooses a particular kubelet binary to use per kubernetes version which you can [view here](Makefile). To build an Amazon EKS Worker AMI for a particular Kubernetes version run the following command ```bash -make 1.21 ## Build a Amazon EKS Worker AMI for k8s 1.21 +make 1.23 ## Build a Amazon EKS Worker AMI for k8s 1.23 ``` ### Building against other versions of Kubernetes binaries To build an Amazon EKS Worker AMI with other versions of Kubernetes that are not listed above run the following AWS Command @@ -39,18 +38,18 @@ Line Interface (AWS CLI) commands to obtain values for KUBERNETES_VERSION, KUBER ```bash #List of all avalable Kuberenets Versions: aws s3 ls s3://amazon-eks -KUBERNETES_VERSION=1.17.9 # Chose a version and set the variable +KUBERNETES_VERSION=1.23.9 # Chose a version and set the variable #List of all builds for the specified Kubernetes Version: aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/ -KUBERNETES_BUILD_DATE=2020-08-04 # Chose a date and set the variable +KUBERNETES_BUILD_DATE=2022-07-27 # Chose a date and set the variable #List of all platforms available for the selected Kubernetes Version and build date -aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/2020-08-04/bin/ +aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/ PLATFORM=linux # Chose a platform and set the variable #List of all architectures for the selected Kubernetes Version, build date and platform -aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/2020-08-04/bin/linux/ +aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/linux/ ARCH=amd64 #Chose an architecture and set the variable ``` Run the following command to build an Amazon EKS Worker AMI based on the chosen parameters in the previous step @@ -104,34 +103,6 @@ want to launch a node group with your new AMI, see [Launching Amazon EKS Worker Nodes](https://docs.aws.amazon.com/eks/latest/userguide/launch-workers.html) in the Amazon EKS User Guide. -The [`amazon-eks-nodegroup.yaml`](amazon-eks-nodegroup.yaml) AWS CloudFormation -template in this repository is provided to launch a node group with the new AMI -ID that is returned when Packer finishes building. Note that there is important -Amazon EC2 user data in this CloudFormation template that bootstraps the worker -nodes when they are launched so that they can register with your Amazon EKS -cluster. Your nodes cannot register properly without this user data. - -### Compatibility with CloudFormation Template - -The CloudFormation template for EKS Nodes is published in the S3 bucket -`amazon-eks` under the path `cloudformation`. You can see a list of previous -versions by running `aws s3 ls s3://amazon-eks/cloudformation/`. - -| CloudFormation Version | EKS AMI versions | [amazon-vpc-cni-k8s](https://github.com/aws/amazon-vpc-cni-k8s/releases) | -| ---------------------- | ------------------------------------------ | -------------------- | -| 2019-09-27 | amazon-eks-node-(1.14,1.13,1.12,1.11)-v20190927 | v1.5.4 -| 2019-09-17 | amazon-eks-node-(1.14,1.13,1.12,1.11)-v20190906 | v1.5.3 -| 2019-02-11 | amazon-eks-node-(1.12,1.11,1.10)-v20190327 | v1.3.2 (for p3dn.24xlarge instances) | -| 2019-02-11 | amazon-eks-node-(1.11,1.10)-v20190220 | v1.3.2 (for p3dn.24xlarge instances) | -| 2019-02-11 | amazon-eks-node-(1.11,1.10)-v20190211 | v1.3.2 (for p3dn.24xlarge instances) | -| 2018-12-10 | amazon-eks-node-(1.11,1.10)-v20181210 | v1.2.1 | -| 2018-11-07 | amazon-eks-node-v25+ | v1.2.1 (for t3 and r5 instances) | -| 2018-08-30 | amazon-eks-node-v23+ | v1.1.0 | -| 2018-08-21 | amazon-eks-node-v23+ | v1.1.0 | - -For older versions of the EKS AMI (v20-v22), you can find the CloudFormation -templates in the same bucket under the path `s3://amazon-eks/1.10.3/2018-06-05/`. - ## AL2 / Linux Kernel Information By default, the `amazon-eks-ami` uses a [source_ami_filter](https://github.com/awslabs/amazon-eks-ami/blob/e3f1b910f83ad1f27e68312e50474ea6059f052d/eks-worker-al2.json#L46) that selects the latest [hvm](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/virtualization_types.html) AL2 AMI for the given architecture as the base AMI. For more information on what kernel versions are running on published Amazon EKS optimized Linux AMIs, see [the public documentation](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html). diff --git a/amazon-eks-nodegroup.yaml b/amazon-eks-nodegroup.yaml deleted file mode 100644 index 89366e2bf..000000000 --- a/amazon-eks-nodegroup.yaml +++ /dev/null @@ -1,301 +0,0 @@ -AWSTemplateFormatVersion: "2010-09-09" - -Description: Amazon EKS - Node Group - -Metadata: - "AWS::CloudFormation::Interface": - ParameterGroups: - - Label: - default: EKS Cluster - Parameters: - - ClusterName - - ClusterControlPlaneSecurityGroup - - Label: - default: Worker Node Configuration - Parameters: - - NodeGroupName - - NodeAutoScalingGroupMinSize - - NodeAutoScalingGroupDesiredCapacity - - NodeAutoScalingGroupMaxSize - - NodeInstanceType - - NodeImageIdSSMParam - - NodeImageId - - NodeVolumeSize - - KeyName - - BootstrapArguments - - DisableIMDSv1 - - Label: - default: Worker Network Configuration - Parameters: - - VpcId - - Subnets - -Parameters: - BootstrapArguments: - Type: String - Default: "" - Description: "Arguments to pass to the bootstrap script. See files/bootstrap.sh in https://github.com/awslabs/amazon-eks-ami" - - ClusterControlPlaneSecurityGroup: - Type: "AWS::EC2::SecurityGroup::Id" - Description: The security group of the cluster control plane. - - ClusterName: - Type: String - Description: The cluster name provided when the cluster was created. If it is incorrect, nodes will not be able to join the cluster. - - KeyName: - Type: "AWS::EC2::KeyPair::KeyName" - Description: The EC2 Key Pair to allow SSH access to the instances - - NodeAutoScalingGroupDesiredCapacity: - Type: Number - Default: 3 - Description: Desired capacity of Node Group ASG. - - NodeAutoScalingGroupMaxSize: - Type: Number - Default: 4 - Description: Maximum size of Node Group ASG. Set to at least 1 greater than NodeAutoScalingGroupDesiredCapacity. - - NodeAutoScalingGroupMinSize: - Type: Number - Default: 1 - Description: Minimum size of Node Group ASG. - - NodeGroupName: - Type: String - Description: Unique identifier for the Node Group. - - NodeImageId: - Type: String - Default: "" - Description: (Optional) Specify your own custom image ID. This value overrides any AWS Systems Manager Parameter Store value specified above. - - NodeImageIdSSMParam: - Type: "AWS::SSM::Parameter::Value" - Default: /aws/service/eks/optimized-ami/1.22/amazon-linux-2/recommended/image_id - Description: AWS Systems Manager Parameter Store parameter of the AMI ID for the worker node instances. Change this value to match the version of Kubernetes you are using. - - DisableIMDSv1: - Type: String - Default: "false" - AllowedValues: - - "false" - - "true" - - NodeInstanceType: - Type: String - Default: t3.medium - Description: EC2 instance type for the node instances - - NodeVolumeSize: - Type: Number - Default: 20 - Description: Node volume size - - Subnets: - Type: "List" - Description: The subnets where workers can be created. - - VpcId: - Type: "AWS::EC2::VPC::Id" - Description: The VPC of the worker instances - -Mappings: - PartitionMap: - aws: - EC2ServicePrincipal: "ec2.amazonaws.com" - aws-us-gov: - EC2ServicePrincipal: "ec2.amazonaws.com" - aws-cn: - EC2ServicePrincipal: "ec2.amazonaws.com.cn" - aws-iso: - EC2ServicePrincipal: "ec2.c2s.ic.gov" - aws-iso-b: - EC2ServicePrincipal: "ec2.sc2s.sgov.gov" - -Conditions: - HasNodeImageId: !Not - - "Fn::Equals": - - !Ref NodeImageId - - "" - - IMDSv1Disabled: - "Fn::Equals": - - !Ref DisableIMDSv1 - - "true" - -Resources: - NodeInstanceRole: - Type: "AWS::IAM::Role" - Properties: - AssumeRolePolicyDocument: - Version: "2012-10-17" - Statement: - - Effect: Allow - Principal: - Service: - - !FindInMap [PartitionMap, !Ref "AWS::Partition", EC2ServicePrincipal] - Action: - - "sts:AssumeRole" - ManagedPolicyArns: - - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonEKSWorkerNodePolicy" - - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonEKS_CNI_Policy" - - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly" - Path: / - - NodeInstanceProfile: - Type: "AWS::IAM::InstanceProfile" - Properties: - Path: / - Roles: - - !Ref NodeInstanceRole - - NodeSecurityGroup: - Type: "AWS::EC2::SecurityGroup" - Properties: - GroupDescription: Security group for all nodes in the cluster - Tags: - - Key: !Sub kubernetes.io/cluster/${ClusterName} - Value: owned - VpcId: !Ref VpcId - - NodeSecurityGroupIngress: - Type: "AWS::EC2::SecurityGroupIngress" - DependsOn: NodeSecurityGroup - Properties: - Description: Allow node to communicate with each other - FromPort: 0 - GroupId: !Ref NodeSecurityGroup - IpProtocol: "-1" - SourceSecurityGroupId: !Ref NodeSecurityGroup - ToPort: 65535 - - ClusterControlPlaneSecurityGroupIngress: - Type: "AWS::EC2::SecurityGroupIngress" - DependsOn: NodeSecurityGroup - Properties: - Description: Allow pods to communicate with the cluster API Server - FromPort: 443 - GroupId: !Ref ClusterControlPlaneSecurityGroup - IpProtocol: tcp - SourceSecurityGroupId: !Ref NodeSecurityGroup - ToPort: 443 - - ControlPlaneEgressToNodeSecurityGroup: - Type: "AWS::EC2::SecurityGroupEgress" - DependsOn: NodeSecurityGroup - Properties: - Description: Allow the cluster control plane to communicate with worker Kubelet and pods - DestinationSecurityGroupId: !Ref NodeSecurityGroup - FromPort: 1025 - GroupId: !Ref ClusterControlPlaneSecurityGroup - IpProtocol: tcp - ToPort: 65535 - - ControlPlaneEgressToNodeSecurityGroupOn443: - Type: "AWS::EC2::SecurityGroupEgress" - DependsOn: NodeSecurityGroup - Properties: - Description: Allow the cluster control plane to communicate with pods running extension API servers on port 443 - DestinationSecurityGroupId: !Ref NodeSecurityGroup - FromPort: 443 - GroupId: !Ref ClusterControlPlaneSecurityGroup - IpProtocol: tcp - ToPort: 443 - - NodeSecurityGroupFromControlPlaneIngress: - Type: "AWS::EC2::SecurityGroupIngress" - DependsOn: NodeSecurityGroup - Properties: - Description: Allow worker Kubelets and pods to receive communication from the cluster control plane - FromPort: 1025 - GroupId: !Ref NodeSecurityGroup - IpProtocol: tcp - SourceSecurityGroupId: !Ref ClusterControlPlaneSecurityGroup - ToPort: 65535 - - NodeSecurityGroupFromControlPlaneOn443Ingress: - Type: "AWS::EC2::SecurityGroupIngress" - DependsOn: NodeSecurityGroup - Properties: - Description: Allow pods running extension API servers on port 443 to receive communication from cluster control plane - FromPort: 443 - GroupId: !Ref NodeSecurityGroup - IpProtocol: tcp - SourceSecurityGroupId: !Ref ClusterControlPlaneSecurityGroup - ToPort: 443 - - NodeLaunchTemplate: - Type: "AWS::EC2::LaunchTemplate" - Properties: - LaunchTemplateData: - BlockDeviceMappings: - - DeviceName: /dev/xvda - Ebs: - DeleteOnTermination: true - VolumeSize: !Ref NodeVolumeSize - VolumeType: gp2 - IamInstanceProfile: - Arn: !GetAtt NodeInstanceProfile.Arn - ImageId: !If - - HasNodeImageId - - !Ref NodeImageId - - !Ref NodeImageIdSSMParam - InstanceType: !Ref NodeInstanceType - KeyName: !Ref KeyName - SecurityGroupIds: - - !Ref NodeSecurityGroup - UserData: !Base64 - "Fn::Sub": | - #!/bin/bash - set -o xtrace - /etc/eks/bootstrap.sh ${ClusterName} ${BootstrapArguments} - /opt/aws/bin/cfn-signal --exit-code $? \ - --stack ${AWS::StackName} \ - --resource NodeGroup \ - --region ${AWS::Region} - MetadataOptions: - HttpPutResponseHopLimit : 2 - HttpEndpoint: enabled - HttpTokens: !If - - IMDSv1Disabled - - required - - optional - - NodeGroup: - Type: "AWS::AutoScaling::AutoScalingGroup" - Properties: - DesiredCapacity: !Ref NodeAutoScalingGroupDesiredCapacity - LaunchTemplate: - LaunchTemplateId: !Ref NodeLaunchTemplate - Version: !GetAtt NodeLaunchTemplate.LatestVersionNumber - MaxSize: !Ref NodeAutoScalingGroupMaxSize - MinSize: !Ref NodeAutoScalingGroupMinSize - Tags: - - Key: Name - PropagateAtLaunch: true - Value: !Sub ${ClusterName}-${NodeGroupName}-Node - - Key: !Sub kubernetes.io/cluster/${ClusterName} - PropagateAtLaunch: true - Value: owned - VPCZoneIdentifier: !Ref Subnets - UpdatePolicy: - AutoScalingRollingUpdate: - MaxBatchSize: 1 - MinInstancesInService: !Ref NodeAutoScalingGroupDesiredCapacity - PauseTime: PT5M - -Outputs: - NodeInstanceRole: - Description: The node instance role - Value: !GetAtt NodeInstanceRole.Arn - - NodeSecurityGroup: - Description: The security group for the node group - Value: !Ref NodeSecurityGroup - - NodeAutoScalingGroup: - Description: The autoscaling group - Value: !Ref NodeGroup From b4bbfe51a4115f66be7a5afe9c3bc47597446133 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 17 Aug 2022 10:50:19 -0700 Subject: [PATCH 304/621] Removes reference to CFN template in ArchiveBuildConfig.yaml (#998) --- ArchiveBuildConfig.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ArchiveBuildConfig.yaml b/ArchiveBuildConfig.yaml index a93376845..2c5bf850c 100644 --- a/ArchiveBuildConfig.yaml +++ b/ArchiveBuildConfig.yaml @@ -13,7 +13,6 @@ dependencies: files: - src: Makefile - src: eks-worker-al2.json - - src: amazon-eks-nodegroup.yaml archive: name: amazon-eks-ami.tar.gz - type: tgz \ No newline at end of file + type: tgz From eaed70ddc58993ccce68d4627759f614ab72d7f8 Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 18 Aug 2022 15:15:48 -0700 Subject: [PATCH 305/621] Adds section to README on configuring kubelet config (#997) --- README.md | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b503b29b8..a0ef8250b 100644 --- a/README.md +++ b/README.md @@ -83,9 +83,6 @@ make k8s \ ``` **Note**: Confirm that the binary_bucket_name, binary_bucket_region, kubernetes_version, and kubernetes_build_date parameters match the path to your binaries in Amazon S3. - - - The Makefile runs Packer with the `eks-worker-al2.json` build specification template and the [amazon-ebs](https://www.packer.io/docs/builders/amazon-ebs.html) builder. An instance is launched and the Packer [Shell @@ -109,6 +106,58 @@ By default, the `amazon-eks-ami` uses a [source_ami_filter](https://github.com/a When building an AMI, you can set the `kernel_version` to `4.14` or `5.4` to customize the kernel version. The [upgrade_kernel.sh script](https://github.com/awslabs/amazon-eks-ami/blob/master/scripts/upgrade_kernel.sh#L26) contains the logic for updating and upgrading the kernel. For Kubernetes versions 1.18 and below, it uses the `4.14` kernel if not set, and it will install the latest patches. For Kubernetes version 1.19 and above, it uses the `5.4` kernel if not set. +## Customizing Kubelet Config + +In some cases, customers may want to customize the [kubelet configuration](https://kubernetes.io/docs/reference/config-api/kubelet-config.v1beta1/#kubelet-config-k8s-io-v1beta1-KubeletConfiguration) on their nodes, and there are two mechanisms to do that with the EKS Optimized AMI. + +**Set the "--kubelet-extra-args" flag when invoking bootstrap.sh** + +`bootstrap.sh`, the script that bootstraps nodes when using the EKS Optimized AMI, supports a flag called `--kubelet-extra-args` that allows you to pass in additional `kubelet` configuration. If you invoke the bootstrap script yourself (self-managed nodegroups or EKS managed nodegroups with custom AMIs), you can use that to customize your configuration. For example, you can use something like the following in your userdata: + +``` +/etc/eks/bootstrap.sh my-cluster --kubelet-extra-args '--registry-qps=20 --registry-burst=40' +``` + +In this case, it will set `registryPullQPS` to 20 and `registryBurst` to 40 in `kubelet`. Some of the flags, like the ones above, are marked as deprecated and you're encouraged to set them in the `kubelet` config file (described below), but they continue to work as of 1.23. + +**Update the kubelet config file** + +You can update the `kubelet` config file directly with new configuration. On EKS Optimized AMIs, the file is stored at `/etc/kubernetes/kubelet/kubelet-config.json`. It must be valid JSON. You can use a utility like `jq` (or your tool of choice) to edit the config in your user data: + +``` +echo "$(jq ".registryPullQPS=20 | .registryBurst=40" /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json +``` + +There are a couple of important caveats here: + +1. If you update the `kubelet` config file after `kubelet` has already started (i.e. `bootstrap.sh` already ran), you'll need to restart `kubelet` to pick up the latest configuration. +2. [bootstrap.sh](https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh) does modify a few fields, like `kubeReserved` and `evictionHard`, so you'd need to modify the config after the bootstrap script is run and restart `kubelet` to overwrite those properties. + +**View active kubelet config** + +When `kubelet` starts up, it logs all possible flags, including unset flags. The unset flags get logged with default values. *These logs do not necessarily reflect the actual active configuration.* This has caused confusion in the past when customers have configured the `kubelet` config file with one value and notice the default value is logged. Here is an example of the referenced log: + +``` +Aug 16 21:53:49 ip-192-168-92-220.us-east-2.compute.internal kubelet[3935]: I0816 21:53:49.202824 3935 flags.go:59] FLAG: --registry-burst="10" +Aug 16 21:53:49 ip-192-168-92-220.us-east-2.compute.internal kubelet[3935]: I0816 21:53:49.202829 3935 flags.go:59] FLAG: --registry-qps="5" +``` + +To view the actual `kubelet` config on your node, you can use the Kubernetes API to confirm that your configuration has applied. + +``` +$ kubectl proxy +$ curl -sSL "http://localhost:8001/api/v1/nodes/ip-192-168-92-220.us-east-2.compute.internal/proxy/configz" | jq + +{ + "kubeletconfig": { + ... + "registryPullQPS": 20, + "registryBurst": 40, + ... + } +} +``` + ## Security For security issues or concerns, please do not open an issue or pull request on GitHub. Please report any suspected or confirmed security issues to AWS Security https://aws.amazon.com/security/vulnerability-reporting/ From 4d79489c567079883f1f69ac68ba58a4134d5ba3 Mon Sep 17 00:00:00 2001 From: Trent Petersen Date: Tue, 23 Aug 2022 14:14:26 -0500 Subject: [PATCH 306/621] Add 1.23 to `all` target (#1005) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b77731722..32571074b 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.19 1.20 1.21 1.22 +all: 1.19 1.20 1.21 1.22 1.23 .PHONY: validate validate: From 2acffd1fe57b434ca83cf18ccc4bf07902db44d6 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 23 Aug 2022 12:15:33 -0700 Subject: [PATCH 307/621] Update example for AWSCLI v2. (#1003) --- log-collector-script/linux/README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/log-collector-script/linux/README.md b/log-collector-script/linux/README.md index ee08bfd72..f634e271c 100644 --- a/log-collector-script/linux/README.md +++ b/log-collector-script/linux/README.md @@ -89,9 +89,10 @@ Trying to archive gathered information... #### *To invoke SSM agent to run EKS log collector script and push bundle to S3 from Worker Node(s):* -1. Create the SSM document named "EKSLogCollector" using the following command:
+1. Create the SSM document named "EKSLogCollector" using the following commands:
``` -aws ssm create-document --name "EKSLogCollector" --document-type "Command" --content https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-ssm-content.json +curl -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-ssm-content.json +aws ssm create-document --name "EKSLogCollector" --document-type "Command" --content file://eks-ssm-content.json ``` 2. To execute the bash script in the SSM document and to collect the logs from worker, run the following command:
``` From 5585aa5eb9b4800326a052720068bf8a61a7cad3 Mon Sep 17 00:00:00 2001 From: Jayanth Varavani <1111446+jayanthvn@users.noreply.github.com> Date: Tue, 23 Aug 2022 12:21:27 -0700 Subject: [PATCH 308/621] Support mac2, p4de, r6a instance types (#1000) --- files/eni-max-pods.txt | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index c4b0fd9b6..8981c9c0b 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2022-06-10T09:23:03-07:00 +# This file was generated at 2022-08-15T23:21:13Z # # The regions queried were: # - ap-northeast-1 @@ -394,6 +394,7 @@ m6id.large 29 m6id.metal 737 m6id.xlarge 58 mac1.metal 234 +mac2.metal 234 p2.16xlarge 234 p2.8xlarge 234 p2.xlarge 58 @@ -402,6 +403,7 @@ p3.2xlarge 58 p3.8xlarge 234 p3dn.24xlarge 737 p4d.24xlarge 737 +p4de.24xlarge 737 r3.2xlarge 58 r3.4xlarge 234 r3.8xlarge 234 @@ -474,6 +476,17 @@ r5n.8xlarge 234 r5n.large 29 r5n.metal 737 r5n.xlarge 58 +r6a.12xlarge 234 +r6a.16xlarge 737 +r6a.24xlarge 737 +r6a.2xlarge 58 +r6a.32xlarge 737 +r6a.48xlarge 737 +r6a.4xlarge 234 +r6a.8xlarge 234 +r6a.large 29 +r6a.metal 737 +r6a.xlarge 58 r6g.12xlarge 234 r6g.16xlarge 737 r6g.2xlarge 58 From 64cd62179ac2df8eb5e3aea3bda5e2c0ed70349b Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Tue, 23 Aug 2022 14:21:51 -0500 Subject: [PATCH 309/621] Log collector enhancements (#993) * collect xfs fragmentation information * add tests for network connectivity - ping well known hostnames - capture the results of curling the API server a few times - save the node's resolv.conf --- .../linux/eks-log-collector.sh | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 561889915..1430b8545 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -20,7 +20,7 @@ export LANG="C" export LC_ALL="C" # Global options -readonly PROGRAM_VERSION="0.7.0" +readonly PROGRAM_VERSION="0.7.1" readonly PROGRAM_SOURCE="https://github.com/awslabs/amazon-eks-ami/blob/master/log-collector-script/" readonly PROGRAM_NAME="$(basename "$0" .sh)" readonly PROGRAM_DIR="/opt/log-collector" @@ -286,7 +286,7 @@ get_mounts_info() { lvs > "${COLLECT_DIR}"/storage/lvs.txt pvs > "${COLLECT_DIR}"/storage/pvs.txt vgs > "${COLLECT_DIR}"/storage/vgs.txt - + mount -t xfs | awk '{print $1}' | xargs -I{} -- sh -c "xfs_info {}; xfs_db -r -c 'freesp -s' {}" > "${COLLECT_DIR}"/storage/xfs.txt ok } @@ -491,6 +491,20 @@ get_networking_info() { # configure-multicard-interfaces timeout 75 journalctl -u configure-multicard-interfaces > "${COLLECT_DIR}"/networking/configure-multicard-interfaces.txt || echo -e "\tTimed out, ignoring \"configure-multicard-interfaces unit output \" " + # test some network connectivity + timeout 75 ping -A -c 10 amazon.com > "${COLLECT_DIR}"/networking/ping_amazon.com.txt + timeout 75 ping -A -c 10 public.ecr.aws > "${COLLECT_DIR}"/networking/ping_public.ecr.aws.txt + + if [[ -e "${COLLECT_DIR}"/kubelet/kubeconfig.yaml ]]; then + API_SERVER=$(grep server: "${COLLECT_DIR}"/kubelet/kubeconfig.yaml | sed 's/.*server: //') + CA_CRT=$(grep certificate-authority: "${COLLECT_DIR}"/kubelet/kubeconfig.yaml | sed 's/.*certificate-authority: //') + for i in $(seq 5); do + echo -e "curling ${API_SERVER} ($i of 5) $(date --utc +%FT%T.%3N%Z)\n\n" >> ${COLLECT_DIR}"/networking/curl_api_server.txt" + timeout 75 curl -v --cacert "${CA_CRT}" "${API_SERVER}"/livez?verbose >> ${COLLECT_DIR}"/networking/curl_api_server.txt" 2>&1 + done + fi + + cp /etc/resolv.conf "${COLLECT_DIR}"/networking/resolv.conf ok } From d9c8938f6666d3037d126ee906c2f133f2be0fb8 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 23 Aug 2022 12:22:54 -0700 Subject: [PATCH 310/621] Include source AMI details in tags and manifest (#1001) --- eks-worker-al2.json | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index b53a6566f..624572849 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -91,9 +91,11 @@ "tags": { "Name": "{{user `ami_name`}}", "created": "{{timestamp}}", + "build_region": "{{ .BuildRegion }}", + "source_ami_id": "{{ .SourceAMI }}", + "source_ami_name": "{{ .SourceAMIName }}", "docker_version": "{{ user `docker_version`}}", "containerd_version": "{{ user `containerd_version`}}", - "source_ami_id": "{{ user `source_ami_id`}}", "kubernetes": "{{ user `kubernetes_version`}}/{{ user `kubernetes_build_date` }}/bin/linux/{{ user `arch` }}", "cni_plugin_version": "{{ user `cni_plugin_version`}}" }, @@ -192,12 +194,20 @@ { "type": "manifest", "output": "manifest.json", - "strip_path": true + "strip_path": true, + "custom_data": { + "source_ami_name": "{{ build `SourceAMIName` }}", + "source_ami_id": "{{ build `SourceAMI` }}" + } }, { "type": "manifest", "output": "{{user `ami_name`}}-manifest.json", - "strip_path": true + "strip_path": true, + "custom_data": { + "source_ami_name": "{{ build `SourceAMIName` }}", + "source_ami_id": "{{ build `SourceAMI` }}" + } } ] } From a043436e05b29e395616df7a6d2f871f339d5985 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Fri, 26 Aug 2022 11:05:40 -0700 Subject: [PATCH 311/621] Updating CHANGELOG.md with latest AMI details (#1008) --- CHANGELOG.md | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c5b72d87f..3fcdc1766 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,50 @@ # Changelog +### AMI Release v20220824 +* amazon-eks-gpu-node-1.23-v20220824 +* amazon-eks-gpu-node-1.22-v20220824 +* amazon-eks-gpu-node-1.21-v20220824 +* amazon-eks-gpu-node-1.20-v20220824 +* amazon-eks-gpu-node-1.19-v20220824 +* amazon-eks-arm64-node-1.23-v20220824 +* amazon-eks-arm64-node-1.22-v20220824 +* amazon-eks-arm64-node-1.21-v20220824 +* amazon-eks-arm64-node-1.20-v20220824 +* amazon-eks-arm64-node-1.19-v20220824 +* amazon-eks-node-1.23-v20220824 +* amazon-eks-node-1.22-v20220824 +* amazon-eks-node-1.21-v20220824 +* amazon-eks-node-1.20-v20220824 +* amazon-eks-node-1.19-v20220824 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.23.9-20220824` +* `1.22.12-20220824` +* `1.21.14-20220824` +* `1.20.15-20220824` +* `1.19.15-20220824` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.23.9/2022-07-27/ +* s3://amazon-eks/1.22.12/2022-07-27/ +* s3://amazon-eks/1.21.14/2022-07-27/ +* s3://amazon-eks/1.20.15/2022-07-27/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.209-116.363.amzn2 +* dockerd: 20.10.17-1.amzn2 +* containerd: 1.6.6-1.amzn2 +* runc: 1.1.3-1.amzn2-1.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1575.0-1.amzn2 + +Notable changes: +* We are updating the versions of docker, containerd and runc as part of this AMI release. +* Kernel version is also updated to include the [latest CVE patches](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2022-034.html) +* This is the last release for 1.19 as we are at [end of support for 1.19](https://docs.aws.amazon.com/eks/latest/userguide/kubernetes-versions.html#kubernetes-release-calendar) + ### AMI Release v20220811 * amazon-eks-gpu-node-1.23-v20220811 * amazon-eks-gpu-node-1.22-v20220811 From 2e5a10d70e7756893143429a0107b3e016edb4bc Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Thu, 8 Sep 2022 14:42:55 -0500 Subject: [PATCH 312/621] Fix IPv6 CLUSTER_DNS, add test harness (#931) * Fixes ipv6 dns cluster IP, see #860 and #900 for more information. * Adds a unit testing approach, see `test/README.md` for more information. --- Makefile | 20 +++--- files/bootstrap.sh | 47 ++++++------- files/max-pods-calculator.sh | 11 ++-- test/Dockerfile | 15 +++++ test/README.md | 40 +++++++++++ .../ip-family-service-ipv6-cidr-mismatch.sh | 17 +++++ test/cases/ipv4-cluster-dns-ip.sh | 24 +++++++ test/cases/ipv6-cluster-dns-ip.sh | 24 +++++++ ...-dns-cluster-ip-given-service-ipv6-cidr.sh | 24 +++++++ .../ipv6-ip-family-and-service-ipv6-cidr.sh | 16 +++++ test/cases/max-pods-cni-1-11-2-delegation.sh | 22 +++++++ test/cases/max-pods-cni-1-11-2.sh | 21 ++++++ test/cases/max-pods-cni-1-7-5.sh | 21 ++++++ test/entrypoint.sh | 10 +++ test/mocks/aws | 16 +++++ .../describe-instance-types/m4-xlarge.json | 6 ++ .../describe-instance-types/m5-8xlarge.json | 6 ++ test/mocks/iptables-save | 4 ++ test/mocks/sudo | 4 ++ test/mocks/systemctl | 4 ++ test/test-harness.sh | 66 +++++++++++++++++++ 21 files changed, 383 insertions(+), 35 deletions(-) mode change 100644 => 100755 files/bootstrap.sh mode change 100644 => 100755 files/max-pods-calculator.sh create mode 100644 test/Dockerfile create mode 100644 test/README.md create mode 100755 test/cases/ip-family-service-ipv6-cidr-mismatch.sh create mode 100755 test/cases/ipv4-cluster-dns-ip.sh create mode 100755 test/cases/ipv6-cluster-dns-ip.sh create mode 100755 test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh create mode 100755 test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh create mode 100755 test/cases/max-pods-cni-1-11-2-delegation.sh create mode 100755 test/cases/max-pods-cni-1-11-2.sh create mode 100755 test/cases/max-pods-cni-1-7-5.sh create mode 100755 test/entrypoint.sh create mode 100755 test/mocks/aws create mode 100644 test/mocks/describe-instance-types/m4-xlarge.json create mode 100644 test/mocks/describe-instance-types/m5-8xlarge.json create mode 100755 test/mocks/iptables-save create mode 100755 test/mocks/sudo create mode 100755 test/mocks/systemctl create mode 100755 test/test-harness.sh diff --git a/Makefile b/Makefile index 32571074b..b04f1fbf9 100644 --- a/Makefile +++ b/Makefile @@ -29,35 +29,39 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.19 1.20 1.21 1.22 1.23 +all: 1.19 1.20 1.21 1.22 1.23 ## Build all versions of EKS Optimized AL2 AMI .PHONY: validate -validate: +validate: ## Validate packer config $(PACKER_BINARY) validate $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json .PHONY: k8s -k8s: validate +k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI @echo "$(T_GREEN)Building AMI for version $(T_YELLOW)$(kubernetes_version)$(T_GREEN) on $(T_YELLOW)$(arch)$(T_RESET)" $(PACKER_BINARY) build -timestamp-ui $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html .PHONY: 1.19 -1.19: +1.19: ## Build EKS Optimized AL2 AMI - K8s 1.19 $(MAKE) k8s kubernetes_version=1.19.15 kubernetes_build_date=2021-11-10 pull_cni_from_github=true .PHONY: 1.20 -1.20: +1.20: ## Build EKS Optimized AL2 AMI - K8s 1.20 $(MAKE) k8s kubernetes_version=1.20.15 kubernetes_build_date=2022-07-27 pull_cni_from_github=true .PHONY: 1.21 -1.21: +1.21: ## Build EKS Optimized AL2 AMI - K8s 1.21 $(MAKE) k8s kubernetes_version=1.21.14 kubernetes_build_date=2022-07-27 pull_cni_from_github=true .PHONY: 1.22 -1.22: +1.22: ## Build EKS Optimized AL2 AMI - K8s 1.22 $(MAKE) k8s kubernetes_version=1.22.12 kubernetes_build_date=2022-07-27 pull_cni_from_github=true .PHONY: 1.23 -1.23: +1.23: ## Build EKS Optimized AL2 AMI - K8s 1.23 $(MAKE) k8s kubernetes_version=1.23.9 kubernetes_build_date=2022-07-27 pull_cni_from_github=true + +.PHONY: help +help: ## Display help + @awk 'BEGIN {FS = ":.*##"; printf "Usage:\n make \033[36m\033[0m\n"} /^[\.a-zA-Z_0-9\-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) diff --git a/files/bootstrap.sh b/files/bootstrap.sh old mode 100644 new mode 100755 index 1cc5ca6c3..039e23246 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -151,6 +151,7 @@ IP_FAMILY="${IP_FAMILY:-}" SERVICE_IPV6_CIDR="${SERVICE_IPV6_CIDR:-}" ENABLE_LOCAL_OUTPOST="${ENABLE_LOCAL_OUTPOST:-}" CLUSTER_ID="${CLUSTER_ID:-}" +IMDS_ENDPOINT="${IMDS_ENDPOINT:-169.254.169.254:80}" function get_pause_container_account_for_region () { local region="$1" @@ -186,7 +187,7 @@ function _get_token() { local token_result= local http_result= - token_result=$(curl -s -w "\n%{http_code}" -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 600" "http://169.254.169.254/latest/api/token") + token_result=$(curl -s -w "\n%{http_code}" -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 600" "http://${IMDS_ENDPOINT}/latest/api/token") http_result=$(echo "$token_result" | tail -n 1) if [[ "$http_result" != "200" ]] then @@ -218,11 +219,11 @@ function _get_meta_data() { local path=$1 local metadata_result= - metadata_result=$(curl -s -w "\n%{http_code}" -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/$path) + metadata_result=$(curl -s -w "\n%{http_code}" -H "X-aws-ec2-metadata-token: $TOKEN" http://${IMDS_ENDPOINT}/$path) http_result=$(echo "$metadata_result" | tail -n 1) if [[ "$http_result" != "200" ]] then - echo -e "Failed to get metadata:\n$metadata_result\nhttp://169.254.169.254/$path\n$TOKEN" + echo -e "Failed to get metadata:\n$metadata_result\nhttp://${IMDS_ENDPOINT}/$path\n$TOKEN" return 1 else local lines=$(echo "$metadata_result" | wc -l) @@ -322,11 +323,6 @@ if [[ ! -z "${IP_FAMILY}" ]]; then echo "Invalid IpFamily. Only ipv4 or ipv6 are allowed" exit 1 fi - - if [[ "${IP_FAMILY}" == "ipv6" ]] && [[ ! -z "${B64_CLUSTER_CA}" ]] && [[ ! -z "${APISERVER_ENDPOINT}" ]] && [[ -z "${SERVICE_IPV6_CIDR}" ]]; then - echo "Service Ipv6 Cidr must be provided when ip-family is specified as IPV6" - exit 1 - fi fi if [[ ! -z "${SERVICE_IPV6_CIDR}" ]]; then @@ -339,7 +335,7 @@ fi TOKEN=$(get_token) AWS_DEFAULT_REGION=$(get_meta_data 'latest/dynamic/instance-identity/document' | jq .region -r) -AWS_SERVICES_DOMAIN=$(get_meta_data '2018-09-24/meta-data/services/domain') +AWS_SERVICES_DOMAIN=$(get_meta_data 'latest/meta-data/services/domain') MACHINE=$(uname -m) if [[ "$MACHINE" != "x86_64" && "$MACHINE" != "aarch64" ]]; then @@ -410,7 +406,7 @@ if [[ -z "${B64_CLUSTER_CA}" ]] || [[ -z "${APISERVER_ENDPOINT}" ]]; then fi if [[ -z "${IP_FAMILY}" ]] || [[ "${IP_FAMILY}" == "None" ]]; then - ### this can happen when the ifFamily field is not found in describeCluster response + ### this can happen when the ipFamily field is not found in describeCluster response ### or B64_CLUSTER_CA and APISERVER_ENDPOINT are defined but IPFamily isn't IP_FAMILY="ipv4" fi @@ -460,21 +456,28 @@ fi ### kubelet.service configuration -if [[ "${IP_FAMILY}" == "ipv6" ]]; then - DNS_CLUSTER_IP=$(awk -F/ '{print $1}' <<< $SERVICE_IPV6_CIDR)a -fi - MAC=$(get_meta_data 'latest/meta-data/network/interfaces/macs/' | head -n 1 | sed 's/\/$//') + if [[ -z "${DNS_CLUSTER_IP}" ]]; then - if [[ ! -z "${SERVICE_IPV4_CIDR}" ]] && [[ "${SERVICE_IPV4_CIDR}" != "None" ]] ; then - #Sets the DNS Cluster IP address that would be chosen from the serviceIpv4Cidr. (x.y.z.10) - DNS_CLUSTER_IP=${SERVICE_IPV4_CIDR%.*}.10 - else - TEN_RANGE=$(get_meta_data "latest/meta-data/network/interfaces/macs/$MAC/vpc-ipv4-cidr-blocks" | grep -c '^10\..*' || true ) - DNS_CLUSTER_IP=10.100.0.10 - if [[ "$TEN_RANGE" != "0" ]]; then - DNS_CLUSTER_IP=172.20.0.10 + if [[ "${IP_FAMILY}" == "ipv6" ]]; then + if [[ -z "${SERVICE_IPV6_CIDR}" ]]; then + echo "One of --service-ipv6-cidr or --dns-cluster-ip must be provided when ip-family is specified as ipv6" + exit 1 + fi + DNS_CLUSTER_IP=$(awk -F/ '{print $1}' <<< $SERVICE_IPV6_CIDR)a + fi + + if [[ "${IP_FAMILY}" == "ipv4" ]]; then + if [[ ! -z "${SERVICE_IPV4_CIDR}" ]] && [[ "${SERVICE_IPV4_CIDR}" != "None" ]]; then + #Sets the DNS Cluster IP address that would be chosen from the serviceIpv4Cidr. (x.y.z.10) + DNS_CLUSTER_IP=${SERVICE_IPV4_CIDR%.*}.10 + else + TEN_RANGE=$(get_meta_data "latest/meta-data/network/interfaces/macs/$MAC/vpc-ipv4-cidr-blocks" | grep -c '^10\..*' || true ) + DNS_CLUSTER_IP=10.100.0.10 + if [[ "$TEN_RANGE" != "0" ]]; then + DNS_CLUSTER_IP=172.20.0.10 + fi fi fi else diff --git a/files/max-pods-calculator.sh b/files/max-pods-calculator.sh old mode 100644 new mode 100755 index 643a9a22b..bf92d571e --- a/files/max-pods-calculator.sh +++ b/files/max-pods-calculator.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -o pipefail set -o nounset @@ -76,14 +76,15 @@ CNI_MAX_ENI="${CNI_MAX_ENI:-}" INSTANCE_TYPE="${INSTANCE_TYPE:-}" INSTANCE_TYPE_FROM_IMDS="${INSTANCE_TYPE_FROM_IMDS:-false}" SHOW_MAX_ALLOWED="${SHOW_MAX_ALLOWED:-false}" +IMDS_ENDPOINT="${IMDS_ENDPOINT:-169.254.169.254:80}" PREFIX_DELEGATION_SUPPORTED=false IPS_PER_PREFIX=16 if [ "$INSTANCE_TYPE_FROM_IMDS" = true ]; then - TOKEN=$(curl -m 10 -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 600" -s "http://169.254.169.254/latest/api/token") - export AWS_DEFAULT_REGION=$(curl -s --retry 5 -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/dynamic/instance-identity/document | jq .region -r) - INSTANCE_TYPE=$(curl -m 10 -H "X-aws-ec2-metadata-token: $TOKEN" -s http://169.254.169.254/latest/meta-data/instance-type) + TOKEN=$(curl -m 10 -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 600" -s "http://${IMDS_ENDPOINT}/latest/api/token") + export AWS_DEFAULT_REGION=$(curl -s --retry 5 -H "X-aws-ec2-metadata-token: $TOKEN" http://${IMDS_ENDPOINT}/latest/dynamic/instance-identity/document | jq .region -r) + INSTANCE_TYPE=$(curl -m 10 -H "X-aws-ec2-metadata-token: $TOKEN" -s http://${IMDS_ENDPOINT}/latest/meta-data/instance-type) elif [ -z "$INSTANCE_TYPE" ]; # There's no reasonable default for an instanceType so force one to be provided to the script. then echo "You must specify an instance type to calculate max pods value." @@ -119,7 +120,7 @@ if [[ "$CNI_MAJOR_VERSION" -gt 1 ]] || ([[ "$CNI_MAJOR_VERSION" = 1 ]] && [[ "$C PREFIX_DELEGATION_SUPPORTED=true fi -DESCRIBE_INSTANCES_RESULT=$(aws ec2 describe-instance-types --instance-type $INSTANCE_TYPE --query 'InstanceTypes[0].{Hypervisor: Hypervisor, EniCount: NetworkInfo.MaximumNetworkInterfaces, PodsPerEniCount: NetworkInfo.Ipv4AddressesPerInterface, CpuCount: VCpuInfo.DefaultVCpus'} --output json) +DESCRIBE_INSTANCES_RESULT=$(aws ec2 describe-instance-types --instance-type "${INSTANCE_TYPE}" --query 'InstanceTypes[0].{Hypervisor: Hypervisor, EniCount: NetworkInfo.MaximumNetworkInterfaces, PodsPerEniCount: NetworkInfo.Ipv4AddressesPerInterface, CpuCount: VCpuInfo.DefaultVCpus}' --output json) HYPERVISOR_TYPE=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.Hypervisor' ) IS_NITRO=false diff --git a/test/Dockerfile b/test/Dockerfile new file mode 100644 index 000000000..f4a4487b1 --- /dev/null +++ b/test/Dockerfile @@ -0,0 +1,15 @@ +FROM public.ecr.aws/aws-ec2/amazon-ec2-metadata-mock:v1.11.2 as aemm +FROM public.ecr.aws/amazonlinux/amazonlinux:2 + +ENV IMDS_ENDPOINT=127.0.0.1:1338 +COPY --from=aemm /ec2-metadata-mock /sbin/ec2-metadata-mock + +COPY files/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json +COPY files/kubelet-kubeconfig /var/lib/kubelet/kubeconfig +COPY test/entrypoint.sh /entrypoint.sh +COPY files /etc/eks +COPY test/mocks/ /sbin/ + +RUN yum install -y jq + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/test/README.md b/test/README.md new file mode 100644 index 000000000..e688ca945 --- /dev/null +++ b/test/README.md @@ -0,0 +1,40 @@ +## Tests + +This directory contains a Dockerfile that is able to be used locally to test the `/etc/eks/boostrap.sh` script without having to use a real AL2 EC2 instance for a quick dev-loop. It is still necessary to test the bootstrap script on a real instance since the Docker image is not a fully accurate representation. + +## AL2 EKS Optimized AMI Docker Image + +The image is built using the official AL2 image `public.ecr.aws/amazonlinux/amazonlinux:2`. It has several mocks installed including the [ec2-metadata-mock](https://github.com/aws/amazon-ec2-metadata-mock). Mocks are installed into `/sbin`, so adding addditional ones as necessary should be as simple as dropping a bash script in the `mocks` dir named as the command you would like to mock out. + +## Usage + +```bash + +## The docker context needs to be at the root of the repo +docker build -t eks-optimized-ami -f Dockerfile ../ + +docker run -it eks-optimized-ami /etc/eks/bootstrap.sh --b64-cluster-ca dGVzdA== --apiserver-endpoint http://my-api-endpoint test +``` + +The `test-harness.sh` script wraps a build and runs test script in the `cases` dir. Tests scripts within the `cases` dir are invoked by the `test-harness.sh` script and have access to the `run` function. The `run` function accepts a temporary directory as an argument in order to mount as a volume in the container so that test scripts can check files within the `/etc/kubernetes/` directory after a bootstrap run. The remaining arguments to the `run` function are a path to a script within the AL2 EKS Optimized AMI Docker Container. + +Here's an example `run` call: + +``` +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv4 \ + --dns-cluster-ip 192.168.0.1 \ + test-cluster-name +``` + +## ECR Public + +You may need to logout of ECR public or reauthenticate if your credentials are expired: + +```bash +docker logout public.ecr.aws +``` + +ECR public allow anonymous access, but you cannot have expired credentials loaded. \ No newline at end of file diff --git a/test/cases/ip-family-service-ipv6-cidr-mismatch.sh b/test/cases/ip-family-service-ipv6-cidr-mismatch.sh new file mode 100755 index 000000000..efe887290 --- /dev/null +++ b/test/cases/ip-family-service-ipv6-cidr-mismatch.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "-> Should fail validation - ip-family mismatch" +exit_code=0 +TEMP_DIR=$(mktemp -d) +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv4 \ + --service-ipv6-cidr 192.168.0.1/24 \ + test || exit_code=$? + +if [[ ${exit_code} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi diff --git a/test/cases/ipv4-cluster-dns-ip.sh b/test/cases/ipv4-cluster-dns-ip.sh new file mode 100755 index 000000000..7dc7e36b5 --- /dev/null +++ b/test/cases/ipv4-cluster-dns-ip.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "--> Should return IPv4 DNS Cluster IP when given dns-cluster-ip" +exit_code=0 +TEMP_DIR=$(mktemp -d) +expected_cluster_dns="192.168.0.1" +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv4 \ + --dns-cluster-ip "${expected_cluster_dns}" \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +actual_cluster_dns=$(jq -r '.clusterDNS[0]' < ${TEMP_DIR}/kubelet-config.json) +if [[ ${actual_cluster_dns} != "${expected_cluster_dns}" ]]; then + echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" + exit 1 +fi \ No newline at end of file diff --git a/test/cases/ipv6-cluster-dns-ip.sh b/test/cases/ipv6-cluster-dns-ip.sh new file mode 100755 index 000000000..26f67fa8d --- /dev/null +++ b/test/cases/ipv6-cluster-dns-ip.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "-> Should return ipv6 DNS Cluster IP when given dns-cluster-ip" +exit_code=0 +TEMP_DIR=$(mktemp -d) +expected_cluster_dns="fe80::2a" +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv6 \ + --dns-cluster-ip "${expected_cluster_dns}" \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +actual_cluster_dns=$(jq -r '.clusterDNS[0]' < ${TEMP_DIR}/kubelet-config.json) +if [[ ${actual_cluster_dns} != "${expected_cluster_dns}" ]]; then + echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" + exit 1 +fi \ No newline at end of file diff --git a/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh b/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh new file mode 100755 index 000000000..6c08cd7f6 --- /dev/null +++ b/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "-> Should return IPv6 DNS cluster IP when given service-ipv6-cidr" +exit_code=0 +TEMP_DIR=$(mktemp -d) +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv6 \ + --service-ipv6-cidr fe80::1 \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +expected_cluster_dns="fe80::1a" +actual_cluster_dns=$(jq -r '.clusterDNS[0]' < ${TEMP_DIR}/kubelet-config.json) +if [[ ${actual_cluster_dns} != "${expected_cluster_dns}" ]]; then + echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" + exit 1 +fi \ No newline at end of file diff --git a/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh b/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh new file mode 100755 index 000000000..ac6991960 --- /dev/null +++ b/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "-> Should fail w/ \"service-ipv6-cidr must be provided when ip-family is specified as ipv6\"" +exit_code=0 +TEMP_DIR=$(mktemp -d) +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv6 \ + test || exit_code=$? + +if [[ ${exit_code} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi \ No newline at end of file diff --git a/test/cases/max-pods-cni-1-11-2-delegation.sh b/test/cases/max-pods-cni-1-11-2-delegation.sh new file mode 100755 index 000000000..e6a2b8ba6 --- /dev/null +++ b/test/cases/max-pods-cni-1-11-2-delegation.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "-> Should calc max-pods successfully for VPC CNI 1.11.2" +exit_code=0 +TEMP_DIR=$(mktemp -d) +out=$(run ${TEMP_DIR} /etc/eks/max-pods-calculator.sh \ + --instance-type m5.8xlarge \ + --cni-version 1.11.2 \ + --cni-prefix-delegation-enabled || exit_code=$?) +echo $out + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi +expected_max_pods="250" +actual_max_pods=$(grep -o '[0-9]\+' <<< ${out}) +if [[ ${actual_max_pods} -ne ${expected_max_pods} ]]; then + echo "❌ Test Failed: expected max-pods for m4.xlarge w/ CNI 1.11.2 to be '${expected_max_pods}', but got '${actual_max_pods}'" + exit 1 +fi diff --git a/test/cases/max-pods-cni-1-11-2.sh b/test/cases/max-pods-cni-1-11-2.sh new file mode 100755 index 000000000..cbf9179a5 --- /dev/null +++ b/test/cases/max-pods-cni-1-11-2.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "-> Should calc max-pods successfully for m5.8xlarge VPC CNI 1.11.2" +exit_code=0 +TEMP_DIR=$(mktemp -d) +out=$(run ${TEMP_DIR} /etc/eks/max-pods-calculator.sh \ + --instance-type m5.8xlarge \ + --cni-version 1.11.2 || exit_code=$?) +echo $out + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi +expected_max_pods="234" +actual_max_pods=$(grep -o '[0-9]\+' <<< ${out}) +if [[ ${actual_max_pods} -ne ${expected_max_pods} ]]; then + echo "❌ Test Failed: expected max-pods for m4.xlarge w/ CNI 1.11.2 to be '${expected_max_pods}', but got '${actual_max_pods}'" + exit 1 +fi diff --git a/test/cases/max-pods-cni-1-7-5.sh b/test/cases/max-pods-cni-1-7-5.sh new file mode 100755 index 000000000..295f43c28 --- /dev/null +++ b/test/cases/max-pods-cni-1-7-5.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "-> Should calc max-pods successfully for VPC CNI 1.7.5" +exit_code=0 +TEMP_DIR=$(mktemp -d) +out=$(run ${TEMP_DIR} /etc/eks/max-pods-calculator.sh \ + --instance-type-from-imds \ + --cni-version 1.7.5 || exit_code=$?) +echo $out + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi +expected_max_pods="58" +actual_max_pods=$(grep -o '[0-9]\+' <<< ${out}) +if [[ ${actual_max_pods} -ne ${expected_max_pods} ]]; then + echo "❌ Test Failed: expected max-pods for m4.xlarge w/ CNI 1.7.5 to be '${expected_max_pods}', but got '${actual_max_pods}'" + exit 1 +fi \ No newline at end of file diff --git a/test/entrypoint.sh b/test/entrypoint.sh new file mode 100755 index 000000000..b067e39f6 --- /dev/null +++ b/test/entrypoint.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +set -euo pipefail +SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" + +## Start IMDS mock +/sbin/ec2-metadata-mock --imdsv2 &> /var/log/ec2-metadata-mock.log & +sleep 1 + +## execute any other params +$@ \ No newline at end of file diff --git a/test/mocks/aws b/test/mocks/aws new file mode 100755 index 000000000..5d9d57079 --- /dev/null +++ b/test/mocks/aws @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +set -euo pipefail +SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" + +if [[ $1 == "ec2" ]]; then + + if [[ $2 == "describe-instance-types" ]]; then + instance_type=$(echo "${@}" | grep -o '[a-z]\+[0-9]\+[a-z]*\.[0-9a-z]\+' | tr '.' '-') + if [[ -f "${SCRIPTPATH}/describe-instance-types/${instance_type}.json" ]]; then + cat "${SCRIPTPATH}/describe-instance-types/${instance_type}.json" + exit 0 + fi + echo "instance type not found" + exit 1 + fi +fi \ No newline at end of file diff --git a/test/mocks/describe-instance-types/m4-xlarge.json b/test/mocks/describe-instance-types/m4-xlarge.json new file mode 100644 index 000000000..0239966bc --- /dev/null +++ b/test/mocks/describe-instance-types/m4-xlarge.json @@ -0,0 +1,6 @@ +{ + "Hypervisor": "xen", + "EniCount": 4, + "PodsPerEniCount": 15, + "CpuCount": 4 +} diff --git a/test/mocks/describe-instance-types/m5-8xlarge.json b/test/mocks/describe-instance-types/m5-8xlarge.json new file mode 100644 index 000000000..840091225 --- /dev/null +++ b/test/mocks/describe-instance-types/m5-8xlarge.json @@ -0,0 +1,6 @@ +{ + "Hypervisor": "nitro", + "EniCount": 8, + "PodsPerEniCount": 30, + "CpuCount": 32 +} diff --git a/test/mocks/iptables-save b/test/mocks/iptables-save new file mode 100755 index 000000000..b2bd12826 --- /dev/null +++ b/test/mocks/iptables-save @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "mocking iptables-save with params $@" \ No newline at end of file diff --git a/test/mocks/sudo b/test/mocks/sudo new file mode 100755 index 000000000..f91c5ff33 --- /dev/null +++ b/test/mocks/sudo @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "mocking sudo with params $@" \ No newline at end of file diff --git a/test/mocks/systemctl b/test/mocks/systemctl new file mode 100755 index 000000000..47846145d --- /dev/null +++ b/test/mocks/systemctl @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "mocking systemctl with $@" \ No newline at end of file diff --git a/test/test-harness.sh b/test/test-harness.sh new file mode 100755 index 000000000..fdfd00778 --- /dev/null +++ b/test/test-harness.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +export SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" +set -euo pipefail + +TEST_CASE_SCRIPT="" + +USAGE=$(cat << 'EOM' + Usage: test-harness.sh [-c ] + Executes the test harness for the EKS Optimized AL2 AMI. + By default the test harness executes all scripts in the cases directory. + Example: test-harness.sh + Optional: + -c A path to a specific test case script +EOM +) + +while getopts "c:h" opt; do + case ${opt} in + c ) # Case Script Path + TEST_CASE_SCRIPT="$OPTARG" + ;; + h ) # help + echo "$USAGE" 1>&2 + exit + ;; + \? ) + echo "$USAGE" 1>&2 + exit + ;; + esac +done + +docker build -t eks-optimized-ami -f "${SCRIPTPATH}/Dockerfile" "${SCRIPTPATH}/../" +overall_status=0 + +function run(){ + local temp_dir=$1 + shift + cp -f ${SCRIPTPATH}/../files/kubelet-config.json ${temp_dir}/kubelet-config.json + docker run -v ${SCRIPTPATH}/../files/:/etc/eks/ \ + -v ${temp_dir}/kubelet-config.json:/etc/kubernetes/kubelet/kubelet-config.json \ + -it --rm eks-optimized-ami $@ +} +export -f run + +if [[ ! -z ${TEST_CASE_SCRIPT} ]]; then + test_cases=${TEST_CASE_SCRIPT} +else + test_cases=($(find ${SCRIPTPATH}/cases -name "*.sh" -type f)) +fi + +for case in "${test_cases[@]}"; do + status=0 + echo "=================================================================================================================" + echo "-> Executing Test Case: $(basename ${case})" + ${case} || status=1 + if [[ ${status} -eq 0 ]]; then + echo "✅ ✅ $(basename ${case}) Tests Passed! ✅ ✅ " + else + echo "❌ ❌ $(basename ${case}) Tests Failed! ❌ ❌" + overall_status=1 + fi + echo "=================================================================================================================" +done + +exit $overall_status From e21d8649f652e1a3d013aea9470fbf3649cec260 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 8 Sep 2022 14:55:13 -0700 Subject: [PATCH 313/621] Add GitHub Action for unit tests (#1015) --- .github/workflows/ci.yaml | 17 +++++++++++++++++ Makefile | 4 ++++ test/test-harness.sh | 5 ++++- 3 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/ci.yaml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 000000000..45b749ed9 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,17 @@ +name: CI +on: + workflow_dispatch: + push: + branches: + - 'master' + pull_request: + types: + - opened + - reopened + - synchronize +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - run: make test diff --git a/Makefile b/Makefile index b04f1fbf9..2dc5bd69d 100644 --- a/Makefile +++ b/Makefile @@ -31,6 +31,10 @@ T_RESET := \e[0m .PHONY: all all: 1.19 1.20 1.21 1.22 1.23 ## Build all versions of EKS Optimized AL2 AMI +.PHONY: test +test: ## run the test-harness + test/test-harness.sh + .PHONY: validate validate: ## Validate packer config $(PACKER_BINARY) validate $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json diff --git a/test/test-harness.sh b/test/test-harness.sh index fdfd00778..76296888c 100755 --- a/test/test-harness.sh +++ b/test/test-harness.sh @@ -39,7 +39,10 @@ function run(){ cp -f ${SCRIPTPATH}/../files/kubelet-config.json ${temp_dir}/kubelet-config.json docker run -v ${SCRIPTPATH}/../files/:/etc/eks/ \ -v ${temp_dir}/kubelet-config.json:/etc/kubernetes/kubelet/kubelet-config.json \ - -it --rm eks-optimized-ami $@ + --attach STDOUT \ + --attach STDERR \ + --rm \ + eks-optimized-ami $@ } export -f run From b5f852943866f1fe2747138dd3a09af5e77fc7cc Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 8 Sep 2022 15:04:48 -0700 Subject: [PATCH 314/621] Add note about unit testing (#1016) --- CONTRIBUTING.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c22acf627..30e2f96d9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -43,7 +43,15 @@ GitHub provides additional document on [forking a repository](https://help.githu When submitting PRs, we want to verify that there are no regressions in the AMI with the new changes. EKS runs various tests before publishing new Amazon EKS optimized Amazon Linux AMIs, which will ensure the highest level of confidence that there are no regressions in officially published AMIs. To maintain the health of this repo, we need to do some basic validation prior to merging PRs. Eventually, we hope to automate this process. Until then, here are the basic steps that we should take before merging PRs. -**Test #1: Verify that building AMIs still works** +**Test #1: Verify that the unit tests pass** + +Please add a test case for your changes, if possible. See the [unit test README](test/README.md) for more information. These tests will be run automatically for every pull request. + +``` +make test +``` + +**Test #2: Verify that building AMIs still works** If your change is relevant to a specific Kubernetes version, build all AMIs that apply. Otherwise, just choose the latest available Kubernetes version. @@ -52,7 +60,7 @@ If your change is relevant to a specific Kubernetes version, build all AMIs that make 1.22 ``` -**Test #2: Create a nodegroup with new AMI and confirm it joins a cluster** +**Test #3: Create a nodegroup with new AMI and confirm it joins a cluster** Once the AMI is built, we need to verify that it can join a cluster. You can use `eksctl`, or your method of choice, to create a cluster and add nodes to it using the AMI you built. Below is an example config file. @@ -84,7 +92,7 @@ eksctl create cluster -f cluster.yaml `eksctl` will verify that the nodes join the cluster before completing. -**Test #3: Verify that the nodes are Kubernetes conformant** +**Test #4: Verify that the nodes are Kubernetes conformant** You can use [sonobuoy](https://sonobuoy.io/) to run conformance tests on the cluster you've create in *Test #2*. You should only include nodes with the custom AMI built in *Test #1*. You must install `sonobuoy` locally before running. @@ -94,7 +102,7 @@ sonobuoy run --wait By default, `sonobuoy` will run `e2e` and `systemd-logs`. This step may take multiple hours to run. -**Test #4: [Optional] Test your specific PR changes** +**Test #5: [Optional] Test your specific PR changes** If your PR has changes that require additional, custom validation, provide the appropriate steps to verify that the changes don't cause regressions and behave as expected. Document the steps taken in the CR. From 73bca495141bfd03646a17046083ced8786edd01 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 8 Sep 2022 18:03:40 -0700 Subject: [PATCH 315/621] Add note on minimum Packer version (#1017) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a0ef8250b..988c67657 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ EKS-optimized AMI. ## Setup -You must have [Packer](https://www.packer.io/) installed on your local system. +You must have [Packer](https://www.packer.io/) version 1.8.0 or later installed on your local system. For more information, see [Installing Packer](https://www.packer.io/docs/install/index.html) in the Packer documentation. You must also have AWS account credentials configured so that Packer can make calls to AWS API operations on your behalf. From 6439014a163b7db6af4c96b3fe7792e904aa64e7 Mon Sep 17 00:00:00 2001 From: Brandon H <697896+brandonhon@users.noreply.github.com> Date: Thu, 8 Sep 2022 20:53:25 -0500 Subject: [PATCH 316/621] Use sudo for generate-version-info.sh (#1011) Co-authored-by: Brandon Honeycutt --- scripts/generate-version-info.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate-version-info.sh b/scripts/generate-version-info.sh index fa7baa196..889a758ef 100644 --- a/scripts/generate-version-info.sh +++ b/scripts/generate-version-info.sh @@ -14,7 +14,7 @@ fi OUTPUT_FILE="$1" # packages -rpm --query --all --queryformat '\{"%{NAME}": "%{VERSION}-%{RELEASE}"\}\n' | jq --slurp --sort-keys 'add | {packages:(.)}' > "$OUTPUT_FILE" +sudo rpm --query --all --queryformat '\{"%{NAME}": "%{VERSION}-%{RELEASE}"\}\n' | jq --slurp --sort-keys 'add | {packages:(.)}' > "$OUTPUT_FILE" # binaries echo $(jq ".binaries.kubelet = \"$(kubelet --version | awk '{print $2}')\"" $OUTPUT_FILE) > $OUTPUT_FILE From 486ef112af655134f3d7ff5607970882263138f6 Mon Sep 17 00:00:00 2001 From: Zaid Ahmed Farooq <38226823+zaf6862@users.noreply.github.com> Date: Mon, 12 Sep 2022 12:22:52 -0700 Subject: [PATCH 317/621] Remove 1.19 (#1018) --- Makefile | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 2dc5bd69d..849fc3239 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.19 1.20 1.21 1.22 1.23 ## Build all versions of EKS Optimized AL2 AMI +all: 1.20 1.21 1.22 1.23 ## Build all versions of EKS Optimized AL2 AMI .PHONY: test test: ## run the test-harness @@ -40,16 +40,12 @@ validate: ## Validate packer config $(PACKER_BINARY) validate $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json .PHONY: k8s -k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI +k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI @echo "$(T_GREEN)Building AMI for version $(T_YELLOW)$(kubernetes_version)$(T_GREEN) on $(T_YELLOW)$(arch)$(T_RESET)" $(PACKER_BINARY) build -timestamp-ui $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html -.PHONY: 1.19 -1.19: ## Build EKS Optimized AL2 AMI - K8s 1.19 - $(MAKE) k8s kubernetes_version=1.19.15 kubernetes_build_date=2021-11-10 pull_cni_from_github=true - .PHONY: 1.20 1.20: ## Build EKS Optimized AL2 AMI - K8s 1.20 $(MAKE) k8s kubernetes_version=1.20.15 kubernetes_build_date=2022-07-27 pull_cni_from_github=true From 8e3c3cc5541a021f31616445cad5edea4375a28e Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 15 Sep 2022 13:14:41 -0700 Subject: [PATCH 318/621] Removes deprecated --network-plugin flag (#1021) --- files/kubelet-containerd.service | 2 +- files/kubelet.service | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/files/kubelet-containerd.service b/files/kubelet-containerd.service index 35a6bf188..b01a5bf15 100644 --- a/files/kubelet-containerd.service +++ b/files/kubelet-containerd.service @@ -11,7 +11,7 @@ ExecStart=/usr/bin/kubelet --cloud-provider aws \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime remote \ --container-runtime-endpoint unix:///run/containerd/containerd.sock \ - --network-plugin cni $KUBELET_ARGS $KUBELET_EXTRA_ARGS + $KUBELET_ARGS $KUBELET_EXTRA_ARGS Restart=on-failure RestartForceExitStatus=SIGPIPE diff --git a/files/kubelet.service b/files/kubelet.service index 387470da1..e221f6a0f 100644 --- a/files/kubelet.service +++ b/files/kubelet.service @@ -10,7 +10,7 @@ ExecStart=/usr/bin/kubelet --cloud-provider aws \ --config /etc/kubernetes/kubelet/kubelet-config.json \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime docker \ - --network-plugin cni $KUBELET_ARGS $KUBELET_EXTRA_ARGS + $KUBELET_ARGS $KUBELET_EXTRA_ARGS Restart=always RestartSec=5 From 3ab8f32859f175dc2c25e05530b0f171e52db703 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 16 Sep 2022 11:24:38 -0700 Subject: [PATCH 319/621] Adds --network-plugin back to docker kubelet.service (#1026) --- files/kubelet.service | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/kubelet.service b/files/kubelet.service index e221f6a0f..387470da1 100644 --- a/files/kubelet.service +++ b/files/kubelet.service @@ -10,7 +10,7 @@ ExecStart=/usr/bin/kubelet --cloud-provider aws \ --config /etc/kubernetes/kubelet/kubelet-config.json \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime docker \ - $KUBELET_ARGS $KUBELET_EXTRA_ARGS + --network-plugin cni $KUBELET_ARGS $KUBELET_EXTRA_ARGS Restart=always RestartSec=5 From 802add46bcb573628d9b357b63e01d9fe7ad5eed Mon Sep 17 00:00:00 2001 From: Laxmi Soumya Josyula <42261978+ljosyula@users.noreply.github.com> Date: Fri, 16 Sep 2022 18:30:34 -0700 Subject: [PATCH 320/621] Updating CHANGELOG.md with latest AMI details (#1029) Co-authored-by: ljosyula --- CHANGELOG.md | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3fcdc1766..025386aaf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,45 @@ # Changelog +### AMI Release v20220914 +* amazon-eks-gpu-node-1.23-v20220914 +* amazon-eks-gpu-node-1.22-v20220914 +* amazon-eks-gpu-node-1.21-v20220914 +* amazon-eks-gpu-node-1.20-v20220914 +* amazon-eks-arm64-node-1.23-v20220914 +* amazon-eks-arm64-node-1.22-v20220914 +* amazon-eks-arm64-node-1.21-v20220914 +* amazon-eks-arm64-node-1.20-v20220914 +* amazon-eks-node-1.23-v20220914 +* amazon-eks-node-1.22-v20220914 +* amazon-eks-node-1.21-v20220914 +* amazon-eks-node-1.20-v20220914 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.23.9-20220914` +* `1.22.12-20220914` +* `1.21.14-20220914` +* `1.20.15-20220914` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.23.9/2022-07-27/ +* s3://amazon-eks/1.22.12/2022-07-27/ +* s3://amazon-eks/1.21.14/2022-07-27/ +* s3://amazon-eks/1.20.15/2022-07-27/ + +AMI details: +* kernel: 5.4.209-116.367.amzn2 +* dockerd: 20.10.17-1.amzn2 +* containerd: 1.6.6-1.amzn2 +* runc: 1.1.3-1.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +- The AWS CLI has been updated to (`1.25.72`)[https://github.com/aws/aws-cli/blob/1.25.72/CHANGELOG.rst#L8] to support local EKS clusters on Outposts. +- This release fixes an issue with DNS cluster IP and IPv6. More info in #931. +- Kernel version updated to `5.4.209-116.367.amzn2` as a part of latest CVE patch (ALASKERNEL-5.4-2022-035)[https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2022-035.html] + ### AMI Release v20220824 * amazon-eks-gpu-node-1.23-v20220824 * amazon-eks-gpu-node-1.22-v20220824 From 07aeeb2cd62c8de9ff2a41dedb8c99d34fb9ba1d Mon Sep 17 00:00:00 2001 From: Zaid Ahmed Farooq <38226823+zaf6862@users.noreply.github.com> Date: Tue, 20 Sep 2022 15:11:31 -0700 Subject: [PATCH 321/621] Add pause container account for me-central-1 (#1032) Co-authored-by: Zaid Farooq --- files/bootstrap.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 039e23246..cfbc730e8 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -178,6 +178,8 @@ function get_pause_container_account_for_region () { echo "${PAUSE_CONTAINER_ACCOUNT:-590381155156}";; ap-southeast-3) echo "${PAUSE_CONTAINER_ACCOUNT:-296578399912}";; + me-central-1) + echo "${PAUSE_CONTAINER_ACCOUNT:-759879836304}";; *) echo "${PAUSE_CONTAINER_ACCOUNT:-602401143452}";; esac From 75c7d2970a73b00276519e96f249e89059167d34 Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Wed, 21 Sep 2022 11:27:30 -0500 Subject: [PATCH 322/621] Capture the disk usage from pod local storage (#1019) This captures the storage from the top transient layer of the overlay filesystem where files that the pod creates while running are stored. This allows identifying situations where pods are writing log files or other data to disk to both identify potential causes for full disks as well as I/O throttling. --- log-collector-script/linux/eks-log-collector.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 1430b8545..a0a3cafab 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -20,7 +20,7 @@ export LANG="C" export LC_ALL="C" # Global options -readonly PROGRAM_VERSION="0.7.1" +readonly PROGRAM_VERSION="0.7.2" readonly PROGRAM_SOURCE="https://github.com/awslabs/amazon-eks-ami/blob/master/log-collector-script/" readonly PROGRAM_NAME="$(basename "$0" .sh)" readonly PROGRAM_DIR="/opt/log-collector" @@ -287,6 +287,7 @@ get_mounts_info() { pvs > "${COLLECT_DIR}"/storage/pvs.txt vgs > "${COLLECT_DIR}"/storage/vgs.txt mount -t xfs | awk '{print $1}' | xargs -I{} -- sh -c "xfs_info {}; xfs_db -r -c 'freesp -s' {}" > "${COLLECT_DIR}"/storage/xfs.txt + mount | grep ^overlay | sed 's/.*upperdir=//' | sed 's/,.*//' | xargs -n 1 timeout 75 du -sh | grep -v ^0 > "${COLLECT_DIR}"/storage/pod_local_storage.txt ok } From 9c4b0c54440193a15b2458bb971a924542ad9606 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 21 Sep 2022 10:55:35 -0700 Subject: [PATCH 323/621] Resolves #1024: makes containerd the default and only runtime for k8s version 1.24+ (#1027) --- .gitignore | 2 + files/bootstrap.sh | 42 +++++++++- scripts/upgrade_kernel.sh | 17 +---- test/cases/container-runtime-defaults.sh | 97 ++++++++++++++++++++++++ test/mocks/kubelet | 9 +++ test/test-harness.sh | 3 + 6 files changed, 152 insertions(+), 18 deletions(-) create mode 100755 test/cases/container-runtime-defaults.sh create mode 100755 test/mocks/kubelet diff --git a/.gitignore b/.gitignore index 42b8dcbf0..2d9cb419a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ *manifest.json *.swp .idea +*version-info.json +.DS_Store diff --git a/files/bootstrap.sh b/files/bootstrap.sh index cfbc730e8..2ead184d1 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -135,18 +135,46 @@ set -- "${POSITIONAL[@]}" # restore positional parameters CLUSTER_NAME="$1" set -u +KUBELET_VERSION=$(kubelet --version | grep -Eo '[0-9]\.[0-9]+\.[0-9]+') +echo "Using kubelet version $KUBELET_VERSION" + +function is_greater_than_or_equal_to_version() { + local actual_version="$1" + local compared_version="$2" + + [ $actual_version = "`echo -e \"$actual_version\n$compared_version\" | sort -V | tail -n1`" ] +} + +# As of Kubernetes version 1.24, we will start defaulting the container runtime to containerd +# and no longer support docker as a container runtime. +IS_124_OR_GREATER=false +DEFAULT_CONTAINER_RUNTIME=dockerd +if is_greater_than_or_equal_to_version $KUBELET_VERSION "1.24.0"; then + IS_124_OR_GREATER=true + DEFAULT_CONTAINER_RUNTIME=containerd +fi + +# Set container runtime related variables +DOCKER_CONFIG_JSON="${DOCKER_CONFIG_JSON:-}" +ENABLE_DOCKER_BRIDGE="${ENABLE_DOCKER_BRIDGE:-false}" +CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-$DEFAULT_CONTAINER_RUNTIME}" + +echo "Using $CONTAINER_RUNTIME as the container runtime" + +if $IS_124_OR_GREATER && [ $CONTAINER_RUNTIME != "containerd" ]; then + echo "ERROR: containerd is the only supported container runtime as of Kubernetes version 1.24" + exit 1 +fi + USE_MAX_PODS="${USE_MAX_PODS:-true}" B64_CLUSTER_CA="${B64_CLUSTER_CA:-}" APISERVER_ENDPOINT="${APISERVER_ENDPOINT:-}" SERVICE_IPV4_CIDR="${SERVICE_IPV4_CIDR:-}" DNS_CLUSTER_IP="${DNS_CLUSTER_IP:-}" KUBELET_EXTRA_ARGS="${KUBELET_EXTRA_ARGS:-}" -ENABLE_DOCKER_BRIDGE="${ENABLE_DOCKER_BRIDGE:-false}" API_RETRY_ATTEMPTS="${API_RETRY_ATTEMPTS:-3}" -DOCKER_CONFIG_JSON="${DOCKER_CONFIG_JSON:-}" CONTAINERD_CONFIG_FILE="${CONTAINERD_CONFIG_FILE:-}" PAUSE_CONTAINER_VERSION="${PAUSE_CONTAINER_VERSION:-3.5}" -CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-dockerd}" IP_FAMILY="${IP_FAMILY:-}" SERVICE_IPV6_CIDR="${SERVICE_IPV6_CIDR:-}" ENABLE_LOCAL_OUTPOST="${ENABLE_LOCAL_OUTPOST:-}" @@ -542,6 +570,14 @@ EOF fi if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then + if $ENABLE_DOCKER_BRIDGE; then + echo "WARNING: Flag --enable-docker-bridge was set but will be ignored as it's not relevant to containerd" + fi + + if [ ! -z "$DOCKER_CONFIG_JSON" ]; then + echo "WARNING: Flag --docker-config-json was set but will be ignored as it's not relevant to containerd" + fi + sudo mkdir -p /etc/containerd sudo mkdir -p /etc/cni/net.d mkdir -p /etc/systemd/system/containerd.service.d diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh index 7a686604b..a53c4f6f9 100755 --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -5,22 +5,9 @@ set -o nounset set -o errexit if [[ -z "$KERNEL_VERSION" ]]; then - # Save for resetting - OLDIFS=$IFS - # Makes 5.4 kernel the default on 1.19 and higher - IFS='.' - # Convert kubernetes version in an array to compare versions - read -ra ADDR <<< "$KUBERNETES_VERSION" - # Reset - IFS=$OLDIFS + KERNEL_VERSION=5.4 - if (( ADDR[0] == 1 && ADDR[1] < 19 )); then - KERNEL_VERSION=4.14 - else - KERNEL_VERSION=5.4 - fi - - echo "kernel_version is unset. Setting to $KERNEL_VERSION based on kubernetes_version $KUBERNETES_VERSION" + echo "kernel_version is unset. Setting to $KERNEL_VERSION" fi if [[ $KERNEL_VERSION == "4.14" ]]; then diff --git a/test/cases/container-runtime-defaults.sh b/test/cases/container-runtime-defaults.sh new file mode 100755 index 000000000..c8efff4ed --- /dev/null +++ b/test/cases/container-runtime-defaults.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +set -euo pipefail + +exit_code=0 +TEMP_DIR=$(mktemp -d) + +echo "--> Should allow dockerd as container runtime when below k8s version 1.24" +KUBELET_VERSION="Kubernetes v1.20.15-eks-ba74326" +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --container-runtime dockerd \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +echo "--> Should allow containerd as container runtime when below k8s version 1.24" +KUBELET_VERSION="Kubernetes v1.20.15-eks-ba74326" +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --container-runtime containerd \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +echo "--> Should have default container runtime when below k8s version 1.24" +KUBELET_VERSION="Kubernetes v1.20.15-eks-ba74326" +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +echo "--> Should not allow dockerd as container runtime when at or above k8s version 1.24" +export KUBELET_VERSION="Kubernetes v1.24.15-eks-ba74326" +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --container-runtime dockerd \ + test || exit_code=$? + +echo "EXIT CODE $exit_code" +if [[ ${exit_code} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi +exit_code=0 + +echo "--> Should allow containerd as container runtime when at or above k8s version 1.24" +KUBELET_VERSION="Kubernetes v1.24.15-eks-ba74326" +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --container-runtime containerd \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +echo "--> Should have default container runtime when at or above k8s version 1.24" +KUBELET_VERSION="Kubernetes v1.24.15-eks-ba74326" +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +echo "--> Should ignore docker-specific flags when at or above k8s version 1.24" +KUBELET_VERSION="Kubernetes v1.24.15-eks-ba74326" +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --enable-docker-bridge true \ + --docker-config-json "{\"some\":\"json\"}" \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi diff --git a/test/mocks/kubelet b/test/mocks/kubelet new file mode 100755 index 000000000..6dbf8abcd --- /dev/null +++ b/test/mocks/kubelet @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -euo pipefail + +# The only use of kubelet directly is to get the Kubernetes version, +# so we'll set a default here to avoid test failures, and you can +# override by setting the KUBELET_VERSION environment variable. +some_kubelet_version="Kubernetes v1.20.15-eks-ba74326" +KUBELET_VERSION="${KUBELET_VERSION:-$some_kubelet_version}" +echo "$KUBELET_VERSION" diff --git a/test/test-harness.sh b/test/test-harness.sh index 76296888c..c13ca763e 100755 --- a/test/test-harness.sh +++ b/test/test-harness.sh @@ -36,12 +36,15 @@ overall_status=0 function run(){ local temp_dir=$1 shift + # This variable is used to override the default value in the kubelet mock + KUBELET_VERSION="${KUBELET_VERSION:-}" cp -f ${SCRIPTPATH}/../files/kubelet-config.json ${temp_dir}/kubelet-config.json docker run -v ${SCRIPTPATH}/../files/:/etc/eks/ \ -v ${temp_dir}/kubelet-config.json:/etc/kubernetes/kubelet/kubelet-config.json \ --attach STDOUT \ --attach STDERR \ --rm \ + -e KUBELET_VERSION="$KUBELET_VERSION" \ eks-optimized-ami $@ } export -f run From eb908eb80fe9e5b4cc4266c445638dedea264a8c Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 22 Sep 2022 12:12:55 -0700 Subject: [PATCH 324/621] Add trn1 instance types (#1033) --- files/eni-max-pods.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 8981c9c0b..a8de14260 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2022-08-15T23:21:13Z +# This file was generated at 2022-09-21T13:34:09-07:00 # # The regions queried were: # - ap-northeast-1 @@ -554,6 +554,8 @@ t4g.micro 4 t4g.nano 4 t4g.small 11 t4g.xlarge 58 +trn1.2xlarge 58 +trn1.32xlarge 247 u-12tb1.112xlarge 737 u-12tb1.metal 147 u-18tb1.metal 737 From 2485779f6918a2154d53875157914547fe57bad5 Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Fri, 23 Sep 2022 11:23:58 -0500 Subject: [PATCH 325/621] increase the kube-api-server QPS from 5/10 to 10/20 (#1030) This applies for EKS v1.22+ where API Priority & Fairness is available and there is a specific queue for kubelet health. --- files/bootstrap.sh | 12 ++++++--- test/cases/api-qps-k8s-1.21-below.sh | 32 ++++++++++++++++++++++++ test/cases/api-qps-k8s-1.22-above.sh | 31 +++++++++++++++++++++++ test/cases/container-runtime-defaults.sh | 14 +++++------ test/mocks/kubelet | 8 +++--- 5 files changed, 84 insertions(+), 13 deletions(-) create mode 100755 test/cases/api-qps-k8s-1.21-below.sh create mode 100755 test/cases/api-qps-k8s-1.22-above.sh diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 2ead184d1..3cfc9bc72 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -518,13 +518,19 @@ KUBELET_CONFIG=/etc/kubernetes/kubelet/kubelet-config.json echo "$(jq ".clusterDNS=[\"$DNS_CLUSTER_IP\"]" $KUBELET_CONFIG)" > $KUBELET_CONFIG if [[ "${IP_FAMILY}" == "ipv4" ]]; then - INTERNAL_IP=$(get_meta_data 'latest/meta-data/local-ipv4') + INTERNAL_IP=$(get_meta_data 'latest/meta-data/local-ipv4') else - INTERNAL_IP_URI=latest/meta-data/network/interfaces/macs/$MAC/ipv6s - INTERNAL_IP=$(get_meta_data $INTERNAL_IP_URI) + INTERNAL_IP_URI=latest/meta-data/network/interfaces/macs/$MAC/ipv6s + INTERNAL_IP=$(get_meta_data $INTERNAL_IP_URI) fi INSTANCE_TYPE=$(get_meta_data 'latest/meta-data/instance-type') +if is_greater_than_or_equal_to_version $KUBELET_VERSION "1.22.0"; then + # for K8s versions that suport API Priority & Fairness, increase our API server QPS + echo $(jq ".kubeAPIQPS=( .kubeAPIQPS // 10)|.kubeAPIBurst=( .kubeAPIBurst // 20)" $KUBELET_CONFIG) > $KUBELET_CONFIG +fi + + # Sets kubeReserved and evictionHard in /etc/kubernetes/kubelet/kubelet-config.json for worker nodes. The following two function # calls calculate the CPU and memory resources to reserve for kubeReserved based on the instance type of the worker node. # Note that allocatable memory and CPU resources on worker nodes is calculated by the Kubernetes scheduler diff --git a/test/cases/api-qps-k8s-1.21-below.sh b/test/cases/api-qps-k8s-1.21-below.sh new file mode 100755 index 000000000..5c7c40216 --- /dev/null +++ b/test/cases/api-qps-k8s-1.21-below.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "--> Should use default API server QPS for K8s 1.21-" +exit_code=0 +TEMP_DIR=$(mktemp -d) +KUBELET_VERSION=v1.21.0-eks-ba74326 +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +# values should not be set +expected_api_qps="null" +expected_api_burst="null" + +actual_api_qps=$(jq -r '.kubeAPIQPS' < ${TEMP_DIR}/kubelet-config.json) +actual_api_burst=$(jq -r '.kubeAPIBurst' < ${TEMP_DIR}/kubelet-config.json) +if [[ ${actual_api_qps} != ${expected_api_qps} ]]; then + echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" + exit 1 +fi + +if [[ ${actual_api_burst} != ${expected_api_burst} ]]; then + echo "❌ Test Failed: expected kubeAPIBurst = '${expected_api_burst}' but got '${actual_api_burst}'" + exit 1 +fi diff --git a/test/cases/api-qps-k8s-1.22-above.sh b/test/cases/api-qps-k8s-1.22-above.sh new file mode 100755 index 000000000..54639260e --- /dev/null +++ b/test/cases/api-qps-k8s-1.22-above.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "--> Should increase API server QPS for K8s 1.22+" +exit_code=0 +TEMP_DIR=$(mktemp -d) +KUBELET_VERSION=v1.22.0-eks-ba74326 +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +expected_api_qps="10" +expected_api_burst="20" + +actual_api_qps=$(jq -r '.kubeAPIQPS' < ${TEMP_DIR}/kubelet-config.json) +actual_api_burst=$(jq -r '.kubeAPIBurst' < ${TEMP_DIR}/kubelet-config.json) +if [[ ${actual_api_qps} != ${expected_api_qps} ]]; then + echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" + exit 1 +fi + +if [[ ${actual_api_burst} != ${expected_api_burst} ]]; then + echo "❌ Test Failed: expected kubeAPIBurst = '${expected_api_burst}' but got '${actual_api_burst}'" + exit 1 +fi diff --git a/test/cases/container-runtime-defaults.sh b/test/cases/container-runtime-defaults.sh index c8efff4ed..74f4c5f12 100755 --- a/test/cases/container-runtime-defaults.sh +++ b/test/cases/container-runtime-defaults.sh @@ -5,7 +5,7 @@ exit_code=0 TEMP_DIR=$(mktemp -d) echo "--> Should allow dockerd as container runtime when below k8s version 1.24" -KUBELET_VERSION="Kubernetes v1.20.15-eks-ba74326" +KUBELET_VERSION="v1.20.15-eks-ba74326" run ${TEMP_DIR} /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ @@ -18,7 +18,7 @@ if [[ ${exit_code} -ne 0 ]]; then fi echo "--> Should allow containerd as container runtime when below k8s version 1.24" -KUBELET_VERSION="Kubernetes v1.20.15-eks-ba74326" +KUBELET_VERSION="v1.20.15-eks-ba74326" run ${TEMP_DIR} /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ @@ -31,7 +31,7 @@ if [[ ${exit_code} -ne 0 ]]; then fi echo "--> Should have default container runtime when below k8s version 1.24" -KUBELET_VERSION="Kubernetes v1.20.15-eks-ba74326" +KUBELET_VERSION="v1.20.15-eks-ba74326" run ${TEMP_DIR} /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ @@ -43,7 +43,7 @@ if [[ ${exit_code} -ne 0 ]]; then fi echo "--> Should not allow dockerd as container runtime when at or above k8s version 1.24" -export KUBELET_VERSION="Kubernetes v1.24.15-eks-ba74326" +export KUBELET_VERSION="v1.24.15-eks-ba74326" run ${TEMP_DIR} /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ @@ -58,7 +58,7 @@ fi exit_code=0 echo "--> Should allow containerd as container runtime when at or above k8s version 1.24" -KUBELET_VERSION="Kubernetes v1.24.15-eks-ba74326" +KUBELET_VERSION="v1.24.15-eks-ba74326" run ${TEMP_DIR} /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ @@ -71,7 +71,7 @@ if [[ ${exit_code} -ne 0 ]]; then fi echo "--> Should have default container runtime when at or above k8s version 1.24" -KUBELET_VERSION="Kubernetes v1.24.15-eks-ba74326" +KUBELET_VERSION="v1.24.15-eks-ba74326" run ${TEMP_DIR} /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ @@ -83,7 +83,7 @@ if [[ ${exit_code} -ne 0 ]]; then fi echo "--> Should ignore docker-specific flags when at or above k8s version 1.24" -KUBELET_VERSION="Kubernetes v1.24.15-eks-ba74326" +KUBELET_VERSION="v1.24.15-eks-ba74326" run ${TEMP_DIR} /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ diff --git a/test/mocks/kubelet b/test/mocks/kubelet index 6dbf8abcd..c689a18dc 100755 --- a/test/mocks/kubelet +++ b/test/mocks/kubelet @@ -4,6 +4,8 @@ set -euo pipefail # The only use of kubelet directly is to get the Kubernetes version, # so we'll set a default here to avoid test failures, and you can # override by setting the KUBELET_VERSION environment variable. -some_kubelet_version="Kubernetes v1.20.15-eks-ba74326" -KUBELET_VERSION="${KUBELET_VERSION:-$some_kubelet_version}" -echo "$KUBELET_VERSION" +if [ $# == 1 ] && [ $1 == "--version" ]; then + echo "Kubernetes ${KUBELET_VERSION:-v1.23.9-eks-ba74326}" +else + echo "mocking kubelet with params $@" +fi From c3c83267df3605a45bc93bfcf6c17fc6a343335a Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 29 Sep 2022 11:55:40 -0700 Subject: [PATCH 326/621] Move cleanup to separate provisioner, remove cleanup_image var (#1036) --- eks-worker-al2.json | 7 +++++-- scripts/cleanup.sh | 28 ++++++++++++++++++++++++++++ scripts/install-worker.sh | 33 --------------------------------- 3 files changed, 33 insertions(+), 35 deletions(-) create mode 100644 scripts/cleanup.sh diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 624572849..80a81653e 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -24,7 +24,6 @@ "arch": null, "instance_type": null, "ami_description": "EKS Kubernetes Worker AMI with AmazonLinux2 image", - "cleanup_image": "true", "ssh_interface": "", "ssh_username": "ec2-user", "temporary_security_group_source_cidrs": "", @@ -157,10 +156,14 @@ "AWS_ACCESS_KEY_ID={{user `aws_access_key_id`}}", "AWS_SECRET_ACCESS_KEY={{user `aws_secret_access_key`}}", "AWS_SESSION_TOKEN={{user `aws_session_token`}}", - "CLEANUP_IMAGE={{user `cleanup_image`}}", "SONOBUOY_E2E_REGISTRY={{user `sonobuoy_e2e_registry`}}" ] }, + { + "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", + "script": "{{template_dir}}/scripts/cleanup.sh" + }, { "type": "shell", "remote_folder": "{{ user `remote_folder`}}", diff --git a/scripts/cleanup.sh b/scripts/cleanup.sh new file mode 100644 index 000000000..39babfde2 --- /dev/null +++ b/scripts/cleanup.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +# Clean up yum caches to reduce the image size +sudo yum clean all +sudo rm -rf /var/cache/yum + +# Clean up build artifacts +sudo rm -rf /tmp/worker + +# Clean up files to reduce confusion during debug +sudo rm -rf \ + /etc/hostname \ + /etc/machine-id \ + /etc/resolv.conf \ + /etc/ssh/ssh_host* \ + /home/ec2-user/.ssh/authorized_keys \ + /root/.ssh/authorized_keys \ + /var/lib/cloud/data \ + /var/lib/cloud/instance \ + /var/lib/cloud/instances \ + /var/lib/cloud/sem \ + /var/lib/dhclient/* \ + /var/lib/dhcp/dhclient.* \ + /var/lib/yum/history \ + /var/log/cloud-init-output.log \ + /var/log/cloud-init.log \ + /var/log/secure \ + /var/log/wtmp \ No newline at end of file diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 639bbe1bb..43825e045 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -373,37 +373,4 @@ echo vm.max_map_count=524288 | sudo tee -a /etc/sysctl.conf sudo mkdir -p /etc/eks/log-collector-script/ sudo cp $TEMPLATE_DIR/log-collector-script/eks-log-collector.sh /etc/eks/log-collector-script/ -################################################################################ -### Cleanup #################################################################### -################################################################################ - -CLEANUP_IMAGE="${CLEANUP_IMAGE:-true}" -if [[ "$CLEANUP_IMAGE" == "true" ]]; then - # Clean up yum caches to reduce the image size - sudo yum clean all - sudo rm -rf \ - $TEMPLATE_DIR \ - /var/cache/yum - - # Clean up files to reduce confusion during debug - sudo rm -rf \ - /etc/hostname \ - /etc/machine-id \ - /etc/resolv.conf \ - /etc/ssh/ssh_host* \ - /home/ec2-user/.ssh/authorized_keys \ - /root/.ssh/authorized_keys \ - /var/lib/cloud/data \ - /var/lib/cloud/instance \ - /var/lib/cloud/instances \ - /var/lib/cloud/sem \ - /var/lib/dhclient/* \ - /var/lib/dhcp/dhclient.* \ - /var/lib/yum/history \ - /var/log/cloud-init-output.log \ - /var/log/cloud-init.log \ - /var/log/secure \ - /var/log/wtmp -fi - sudo touch /etc/machine-id From 5651264bfffe5a7e0733aaf56fe12a506cabb314 Mon Sep 17 00:00:00 2001 From: xr1776 <108886506+xr1776@users.noreply.github.com> Date: Thu, 29 Sep 2022 19:24:10 -0400 Subject: [PATCH 327/621] Update CHANGELOG.md for v20220926 (#1039) --- CHANGELOG.md | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 025386aaf..f0deebb16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,44 @@ # Changelog +### AMI Release v20220926 +Available ap-northeast-2 09/29/2022 and other regions 10/3/2022. +* amazon-eks-gpu-node-1.23-v20220926 +* amazon-eks-gpu-node-1.22-v20220926 +* amazon-eks-gpu-node-1.21-v20220926 +* amazon-eks-gpu-node-1.20-v20220926 +* amazon-eks-arm64-node-1.23-v20220926 +* amazon-eks-arm64-node-1.22-v20220926 +* amazon-eks-arm64-node-1.21-v20220926 +* amazon-eks-arm64-node-1.20-v20220926 +* amazon-eks-node-1.23-v20220926 +* amazon-eks-node-1.22-v20220926 +* amazon-eks-node-1.21-v20220926 +* amazon-eks-node-1.20-v20220926 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.23.9-20220926` +* `1.22.12-20220926` +* `1.21.14-20220926` +* `1.20.15-20220926` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.23.9/2022-07-27/ +* s3://amazon-eks/1.22.12/2022-07-27/ +* s3://amazon-eks/1.21.14/2022-07-27/ +* s3://amazon-eks/1.20.15/2022-07-27/ + +AMI details: +* kernel: 5.4.209-116.367.amzn2 +* dockerd: 20.10.17-1.amzn2 +* containerd: 1.6.6-1.amzn2 +* runc: 1.1.3-1.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable Changes: +* Phase 1 of support for Trn1 instances + ### AMI Release v20220914 * amazon-eks-gpu-node-1.23-v20220914 * amazon-eks-gpu-node-1.22-v20220914 From e064e254d2388ab048aff586de43f8b0d3b7fc03 Mon Sep 17 00:00:00 2001 From: xr1776 <108886506+xr1776@users.noreply.github.com> Date: Thu, 29 Sep 2022 19:46:24 -0400 Subject: [PATCH 328/621] Update CHANGELOG.md (#1040) * Update CHANGELOG.md * Update CHANGELOG.md --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f0deebb16..a3560492e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,6 @@ # Changelog ### AMI Release v20220926 -Available ap-northeast-2 09/29/2022 and other regions 10/3/2022. * amazon-eks-gpu-node-1.23-v20220926 * amazon-eks-gpu-node-1.22-v20220926 * amazon-eks-gpu-node-1.21-v20220926 From bb9a9bbe9b7fcf32ce5e38822b10b665303cffdb Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 7 Oct 2022 13:15:23 -0700 Subject: [PATCH 329/621] Increase polling timeout to 45 minutes (#1042) --- eks-worker-al2.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 80a81653e..33d00f105 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -74,6 +74,10 @@ "delete_on_termination": true } ], + "aws_polling": { + "delay_seconds": 30, + "max_attempts": 90 + }, "ami_regions": "{{user `ami_regions`}}", "ssh_username": "{{user `ssh_username`}}", "ssh_interface": "{{user `ssh_interface`}}", From 9bec3eb6db8dc1ed645264f20f020f039eb1e8ce Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 7 Oct 2022 13:15:57 -0700 Subject: [PATCH 330/621] Disable colorized output (#1041) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 849fc3239..d6275f1c1 100644 --- a/Makefile +++ b/Makefile @@ -42,7 +42,7 @@ validate: ## Validate packer config .PHONY: k8s k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI @echo "$(T_GREEN)Building AMI for version $(T_YELLOW)$(kubernetes_version)$(T_GREEN) on $(T_YELLOW)$(arch)$(T_RESET)" - $(PACKER_BINARY) build -timestamp-ui $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json + $(PACKER_BINARY) build -timestamp-ui -color=false $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html From e5eb32500584a43ba1fab0a4091de9c1853a6f7a Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Mon, 10 Oct 2022 12:01:20 -0500 Subject: [PATCH 331/621] always add empty machine-id file (#1043) --- scripts/cleanup.sh | 4 +++- scripts/install-worker.sh | 2 -- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/cleanup.sh b/scripts/cleanup.sh index 39babfde2..a661178b3 100644 --- a/scripts/cleanup.sh +++ b/scripts/cleanup.sh @@ -25,4 +25,6 @@ sudo rm -rf \ /var/log/cloud-init-output.log \ /var/log/cloud-init.log \ /var/log/secure \ - /var/log/wtmp \ No newline at end of file + /var/log/wtmp + +sudo touch /etc/machine-id diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 43825e045..e54e01501 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -372,5 +372,3 @@ echo vm.max_map_count=524288 | sudo tee -a /etc/sysctl.conf ################################################################################ sudo mkdir -p /etc/eks/log-collector-script/ sudo cp $TEMPLATE_DIR/log-collector-script/eks-log-collector.sh /etc/eks/log-collector-script/ - -sudo touch /etc/machine-id From bfc24acad17ed771c0ffe13169bb126b7e000e9e Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 12 Oct 2022 10:35:41 -0700 Subject: [PATCH 332/621] Makes volume_type configurable with make arguments (#1045) --- Makefile | 2 +- eks-worker-al2.json | 51 +++++++++++++++++++++++---------------------- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/Makefile b/Makefile index d6275f1c1..a4253f36f 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ PACKER_BINARY ?= packer -PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date kernel_version docker_version containerd_version runc_version cni_plugin_version source_ami_id source_ami_owners source_ami_filter_name arch instance_type security_group_id additional_yum_repos pull_cni_from_github sonobuoy_e2e_registry ami_regions +PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date kernel_version docker_version containerd_version runc_version cni_plugin_version source_ami_id source_ami_owners source_ami_filter_name arch instance_type security_group_id additional_yum_repos pull_cni_from_github sonobuoy_e2e_registry ami_regions volume_type K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 33d00f105..a2335e3d6 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -1,41 +1,42 @@ { "variables": { - "aws_region": "us-west-2", + "additional_yum_repos": "", + "ami_description": "EKS Kubernetes Worker AMI with AmazonLinux2 image", "ami_name": null, - "creator": "{{env `USER`}}", - "encrypted": "false", - "kms_key_id": "", + "ami_regions": "", + "ami_users": "", + "arch": null, + "associate_public_ip_address": "", "aws_access_key_id": "{{env `AWS_ACCESS_KEY_ID`}}", + "aws_region": "us-west-2", "aws_secret_access_key": "{{env `AWS_SECRET_ACCESS_KEY`}}", "aws_session_token": "{{env `AWS_SESSION_TOKEN`}}", "binary_bucket_name": "amazon-eks", "binary_bucket_region": "us-west-2", - "kubernetes_version": null, - "kubernetes_build_date": null, - "kernel_version": "", - "docker_version": "20.10.17-1.amzn2", - "containerd_version": "1.6.6-1.amzn2", - "runc_version": "1.1.3-1.amzn2", "cni_plugin_version": "v0.8.6", + "containerd_version": "1.6.6-1.amzn2", + "creator": "{{env `USER`}}", + "docker_version": "20.10.17-1.amzn2", + "encrypted": "false", + "instance_type": null, + "kernel_version": "", + "kms_key_id": "", + "kubernetes_build_date": null, + "kubernetes_version": null, + "launch_block_device_mappings_volume_size": "4", "pull_cni_from_github": "true", + "remote_folder": "", + "runc_version": "1.1.3-1.amzn2", + "security_group_id": "", + "sonobuoy_e2e_registry": "", + "source_ami_filter_name": "amzn2-ami-minimal-hvm-*", "source_ami_id": "", "source_ami_owners": "137112412989", - "source_ami_filter_name": "amzn2-ami-minimal-hvm-*", - "arch": null, - "instance_type": null, - "ami_description": "EKS Kubernetes Worker AMI with AmazonLinux2 image", "ssh_interface": "", "ssh_username": "ec2-user", - "temporary_security_group_source_cidrs": "", - "security_group_id": "", - "associate_public_ip_address": "", "subnet_id": "", - "remote_folder": "", - "launch_block_device_mappings_volume_size": "4", - "ami_users": "", - "additional_yum_repos": "", - "sonobuoy_e2e_registry": "", - "ami_regions": "" + "temporary_security_group_source_cidrs": "", + "volume_type": "gp2" }, "builders": [ { @@ -61,7 +62,7 @@ "launch_block_device_mappings": [ { "device_name": "/dev/xvda", - "volume_type": "gp2", + "volume_type": "{{user `volume_type`}}", "volume_size": "{{user `launch_block_device_mappings_volume_size`}}", "delete_on_termination": true } @@ -69,7 +70,7 @@ "ami_block_device_mappings": [ { "device_name": "/dev/xvda", - "volume_type": "gp2", + "volume_type": "{{user `volume_type`}}", "volume_size": 20, "delete_on_termination": true } From 162eee294eed1dafe99c47180fa2c470b112aad5 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 12 Oct 2022 16:00:19 -0700 Subject: [PATCH 333/621] Add IMDS helper (#1044) --- files/bin/imds | 61 ++++++++++++++ files/bootstrap.sh | 82 ++----------------- files/max-pods-calculator.sh | 6 +- scripts/install-worker.sh | 9 +- test/Dockerfile | 6 +- test/cases/api-qps-k8s-1.21-below.sh | 9 +- test/cases/api-qps-k8s-1.22-above.sh | 9 +- test/cases/container-runtime-defaults.sh | 29 +++---- test/cases/imds-token-refresh.sh | 49 +++++++++++ .../ip-family-service-ipv6-cidr-mismatch.sh | 3 +- test/cases/ipv4-cluster-dns-ip.sh | 5 +- test/cases/ipv6-cluster-dns-ip.sh | 5 +- ...-dns-cluster-ip-given-service-ipv6-cidr.sh | 4 +- .../ipv6-ip-family-and-service-ipv6-cidr.sh | 3 +- test/cases/max-pods-cni-1-11-2-delegation.sh | 3 +- test/cases/max-pods-cni-1-11-2.sh | 3 +- test/cases/max-pods-cni-1-7-5.sh | 4 +- test/entrypoint.sh | 2 +- test/test-harness.sh | 20 ++--- 19 files changed, 174 insertions(+), 138 deletions(-) create mode 100755 files/bin/imds create mode 100755 test/cases/imds-token-refresh.sh diff --git a/files/bin/imds b/files/bin/imds new file mode 100755 index 000000000..a65e442b6 --- /dev/null +++ b/files/bin/imds @@ -0,0 +1,61 @@ +#!/bin/sh + +set -o errexit +set -o pipefail +set -o nounset + +IMDS_DEBUG="${IMDS_DEBUG:-false}" +function log() { + if [ "$IMDS_DEBUG" = "true" ] + then + echo >&2 "$1" + fi +} + +if [ "$#" -ne 1 ] +then + echo >&2 "usage: imds API_PATH" + exit 1 +fi + +# leading slashes will be removed +API_PATH="${1#/}" + +CURRENT_TIME=$(date '+%s') + +IMDS_ENDPOINT=${IMDS_ENDPOINT:-169.254.169.254} + +log "ℹ️ Talking to IMDS at $IMDS_ENDPOINT" + +TOKEN_DIR=/tmp/imds-tokens +mkdir -p $TOKEN_DIR + +TOKEN_FILE=$(ls $TOKEN_DIR | awk '$0 > '$CURRENT_TIME | sort -n -r | head -n 1) + +if [ "$TOKEN_FILE" = "" ] +then + # default ttl is 15 minutes + IMDS_TOKEN_TTL_SECONDS=${IMDS_TOKEN_TTL_SECONDS:-900} + TOKEN_FILE=$(($CURRENT_TIME + $IMDS_TOKEN_TTL_SECONDS)) + curl \ + --silent \ + --show-error \ + --retry 10 \ + --retry-delay 1 \ + -o $TOKEN_DIR/$TOKEN_FILE \ + -H "X-aws-ec2-metadata-token-ttl-seconds: $IMDS_TOKEN_TTL_SECONDS" \ + -X PUT \ + "http://$IMDS_ENDPOINT/latest/api/token" + log "🔑 Retrieved a fresh IMDS token that will expire in $IMDS_TOKEN_TTL_SECONDS seconds." +else + log "ℹ️ Using cached IMDS token that expires in $(($TOKEN_FILE - $CURRENT_TIME)) seconds." +fi + +curl \ + --silent \ + --show-error \ + --retry 10 \ + --retry-delay 1 \ + --write-out '\n' \ + -H "X-aws-ec2-metadata-token: $(cat $TOKEN_DIR/$TOKEN_FILE)" \ + "http://$IMDS_ENDPOINT/$API_PATH" diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 3cfc9bc72..208f06298 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -179,7 +179,6 @@ IP_FAMILY="${IP_FAMILY:-}" SERVICE_IPV6_CIDR="${SERVICE_IPV6_CIDR:-}" ENABLE_LOCAL_OUTPOST="${ENABLE_LOCAL_OUTPOST:-}" CLUSTER_ID="${CLUSTER_ID:-}" -IMDS_ENDPOINT="${IMDS_ENDPOINT:-169.254.169.254:80}" function get_pause_container_account_for_region () { local region="$1" @@ -213,72 +212,6 @@ function get_pause_container_account_for_region () { esac } -function _get_token() { - local token_result= - local http_result= - - token_result=$(curl -s -w "\n%{http_code}" -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 600" "http://${IMDS_ENDPOINT}/latest/api/token") - http_result=$(echo "$token_result" | tail -n 1) - if [[ "$http_result" != "200" ]] - then - echo -e "Failed to get token:\n$token_result" - return 1 - else - echo "$token_result" | head -n 1 - return 0 - fi -} - -function get_token() { - local token= - local retries=20 - local result=1 - - while [[ retries -gt 0 && $result -ne 0 ]] - do - retries=$[$retries-1] - token=$(_get_token) - result=$? - [[ $result != 0 ]] && sleep 5 - done - [[ $result == 0 ]] && echo "$token" - return $result -} - -function _get_meta_data() { - local path=$1 - local metadata_result= - - metadata_result=$(curl -s -w "\n%{http_code}" -H "X-aws-ec2-metadata-token: $TOKEN" http://${IMDS_ENDPOINT}/$path) - http_result=$(echo "$metadata_result" | tail -n 1) - if [[ "$http_result" != "200" ]] - then - echo -e "Failed to get metadata:\n$metadata_result\nhttp://${IMDS_ENDPOINT}/$path\n$TOKEN" - return 1 - else - local lines=$(echo "$metadata_result" | wc -l) - echo "$metadata_result" | head -n $(( lines - 1 )) - return 0 - fi -} - -function get_meta_data() { - local metadata= - local path=$1 - local retries=20 - local result=1 - - while [[ retries -gt 0 && $result -ne 0 ]] - do - retries=$[$retries-1] - metadata=$(_get_meta_data $path) - result=$? - [[ $result != 0 ]] && TOKEN=$(get_token) - done - [[ $result == 0 ]] && echo "$metadata" - return $result -} - # Helper function which calculates the amount of the given resource (either CPU or memory) # to reserve in a given resource range, specified by a start and end of the range and a percentage # of the resource to reserve. Note that we return zero if the start of the resource range is @@ -363,9 +296,8 @@ if [[ ! -z "${SERVICE_IPV6_CIDR}" ]]; then IP_FAMILY="ipv6" fi -TOKEN=$(get_token) -AWS_DEFAULT_REGION=$(get_meta_data 'latest/dynamic/instance-identity/document' | jq .region -r) -AWS_SERVICES_DOMAIN=$(get_meta_data 'latest/meta-data/services/domain') +AWS_DEFAULT_REGION=$(imds 'latest/dynamic/instance-identity/document' | jq .region -r) +AWS_SERVICES_DOMAIN=$(imds 'latest/meta-data/services/domain') MACHINE=$(uname -m) if [[ "$MACHINE" != "x86_64" && "$MACHINE" != "aarch64" ]]; then @@ -486,7 +418,7 @@ fi ### kubelet.service configuration -MAC=$(get_meta_data 'latest/meta-data/network/interfaces/macs/' | head -n 1 | sed 's/\/$//') +MAC=$(imds 'latest/meta-data/network/interfaces/macs/' | head -n 1 | sed 's/\/$//') if [[ -z "${DNS_CLUSTER_IP}" ]]; then @@ -503,7 +435,7 @@ if [[ -z "${DNS_CLUSTER_IP}" ]]; then #Sets the DNS Cluster IP address that would be chosen from the serviceIpv4Cidr. (x.y.z.10) DNS_CLUSTER_IP=${SERVICE_IPV4_CIDR%.*}.10 else - TEN_RANGE=$(get_meta_data "latest/meta-data/network/interfaces/macs/$MAC/vpc-ipv4-cidr-blocks" | grep -c '^10\..*' || true ) + TEN_RANGE=$(imds "latest/meta-data/network/interfaces/macs/$MAC/vpc-ipv4-cidr-blocks" | grep -c '^10\..*' || true ) DNS_CLUSTER_IP=10.100.0.10 if [[ "$TEN_RANGE" != "0" ]]; then DNS_CLUSTER_IP=172.20.0.10 @@ -518,12 +450,12 @@ KUBELET_CONFIG=/etc/kubernetes/kubelet/kubelet-config.json echo "$(jq ".clusterDNS=[\"$DNS_CLUSTER_IP\"]" $KUBELET_CONFIG)" > $KUBELET_CONFIG if [[ "${IP_FAMILY}" == "ipv4" ]]; then - INTERNAL_IP=$(get_meta_data 'latest/meta-data/local-ipv4') + INTERNAL_IP=$(imds 'latest/meta-data/local-ipv4') else INTERNAL_IP_URI=latest/meta-data/network/interfaces/macs/$MAC/ipv6s - INTERNAL_IP=$(get_meta_data $INTERNAL_IP_URI) + INTERNAL_IP=$(imds $INTERNAL_IP_URI) fi -INSTANCE_TYPE=$(get_meta_data 'latest/meta-data/instance-type') +INSTANCE_TYPE=$(imds 'latest/meta-data/instance-type') if is_greater_than_or_equal_to_version $KUBELET_VERSION "1.22.0"; then # for K8s versions that suport API Priority & Fairness, increase our API server QPS diff --git a/files/max-pods-calculator.sh b/files/max-pods-calculator.sh index bf92d571e..a9bb7122b 100755 --- a/files/max-pods-calculator.sh +++ b/files/max-pods-calculator.sh @@ -76,15 +76,13 @@ CNI_MAX_ENI="${CNI_MAX_ENI:-}" INSTANCE_TYPE="${INSTANCE_TYPE:-}" INSTANCE_TYPE_FROM_IMDS="${INSTANCE_TYPE_FROM_IMDS:-false}" SHOW_MAX_ALLOWED="${SHOW_MAX_ALLOWED:-false}" -IMDS_ENDPOINT="${IMDS_ENDPOINT:-169.254.169.254:80}" PREFIX_DELEGATION_SUPPORTED=false IPS_PER_PREFIX=16 if [ "$INSTANCE_TYPE_FROM_IMDS" = true ]; then - TOKEN=$(curl -m 10 -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 600" -s "http://${IMDS_ENDPOINT}/latest/api/token") - export AWS_DEFAULT_REGION=$(curl -s --retry 5 -H "X-aws-ec2-metadata-token: $TOKEN" http://${IMDS_ENDPOINT}/latest/dynamic/instance-identity/document | jq .region -r) - INSTANCE_TYPE=$(curl -m 10 -H "X-aws-ec2-metadata-token: $TOKEN" -s http://${IMDS_ENDPOINT}/latest/meta-data/instance-type) + export AWS_DEFAULT_REGION=$(imds /latest/dynamic/instance-identity/document | jq .region -r) + INSTANCE_TYPE=$(imds /latest/meta-data/instance-type) elif [ -z "$INSTANCE_TYPE" ]; # There's no reasonable default for an instanceType so force one to be provided to the script. then echo "You must specify an instance type to calculate max pods value." diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index e54e01501..da0de5789 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -45,6 +45,13 @@ else exit 1 fi +################################################################################ +### Utilities ################################################################## +################################################################################ + +sudo chmod -R a+x $TEMPLATE_DIR/bin/ +sudo mv $TEMPLATE_DIR/bin/* /usr/bin/ + ################################################################################ ### Packages ################################################################### ################################################################################ @@ -338,7 +345,7 @@ sudo yum install -y amazon-ssm-agent ### AMI Metadata ############################################################### ################################################################################ -BASE_AMI_ID=$(curl -s http://169.254.169.254/latest/meta-data/ami-id) +BASE_AMI_ID=$(imds /latest/meta-data/ami-id) cat < /tmp/release BASE_AMI_ID="$BASE_AMI_ID" BUILD_TIME="$(date)" diff --git a/test/Dockerfile b/test/Dockerfile index f4a4487b1..cf0d7c020 100644 --- a/test/Dockerfile +++ b/test/Dockerfile @@ -1,15 +1,13 @@ FROM public.ecr.aws/aws-ec2/amazon-ec2-metadata-mock:v1.11.2 as aemm FROM public.ecr.aws/amazonlinux/amazonlinux:2 +RUN yum install -y jq ENV IMDS_ENDPOINT=127.0.0.1:1338 COPY --from=aemm /ec2-metadata-mock /sbin/ec2-metadata-mock - COPY files/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json COPY files/kubelet-kubeconfig /var/lib/kubelet/kubeconfig COPY test/entrypoint.sh /entrypoint.sh COPY files /etc/eks +COPY files/bin/* /usr/bin/ COPY test/mocks/ /sbin/ - -RUN yum install -y jq - ENTRYPOINT ["/entrypoint.sh"] diff --git a/test/cases/api-qps-k8s-1.21-below.sh b/test/cases/api-qps-k8s-1.21-below.sh index 5c7c40216..dd7653a24 100755 --- a/test/cases/api-qps-k8s-1.21-below.sh +++ b/test/cases/api-qps-k8s-1.21-below.sh @@ -3,9 +3,8 @@ set -euo pipefail echo "--> Should use default API server QPS for K8s 1.21-" exit_code=0 -TEMP_DIR=$(mktemp -d) -KUBELET_VERSION=v1.21.0-eks-ba74326 -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +export KUBELET_VERSION=v1.21.0-eks-ba74326 +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ test || exit_code=$? @@ -19,8 +18,8 @@ fi expected_api_qps="null" expected_api_burst="null" -actual_api_qps=$(jq -r '.kubeAPIQPS' < ${TEMP_DIR}/kubelet-config.json) -actual_api_burst=$(jq -r '.kubeAPIBurst' < ${TEMP_DIR}/kubelet-config.json) +actual_api_qps=$(jq -r '.kubeAPIQPS' < /etc/kubernetes/kubelet/kubelet-config.json) +actual_api_burst=$(jq -r '.kubeAPIBurst' < /etc/kubernetes/kubelet/kubelet-config.json) if [[ ${actual_api_qps} != ${expected_api_qps} ]]; then echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" exit 1 diff --git a/test/cases/api-qps-k8s-1.22-above.sh b/test/cases/api-qps-k8s-1.22-above.sh index 54639260e..103868c93 100755 --- a/test/cases/api-qps-k8s-1.22-above.sh +++ b/test/cases/api-qps-k8s-1.22-above.sh @@ -3,9 +3,8 @@ set -euo pipefail echo "--> Should increase API server QPS for K8s 1.22+" exit_code=0 -TEMP_DIR=$(mktemp -d) -KUBELET_VERSION=v1.22.0-eks-ba74326 -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +export KUBELET_VERSION=v1.22.0-eks-ba74326 +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ test || exit_code=$? @@ -18,8 +17,8 @@ fi expected_api_qps="10" expected_api_burst="20" -actual_api_qps=$(jq -r '.kubeAPIQPS' < ${TEMP_DIR}/kubelet-config.json) -actual_api_burst=$(jq -r '.kubeAPIBurst' < ${TEMP_DIR}/kubelet-config.json) +actual_api_qps=$(jq -r '.kubeAPIQPS' < /etc/kubernetes/kubelet/kubelet-config.json) +actual_api_burst=$(jq -r '.kubeAPIBurst' < /etc/kubernetes/kubelet/kubelet-config.json) if [[ ${actual_api_qps} != ${expected_api_qps} ]]; then echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" exit 1 diff --git a/test/cases/container-runtime-defaults.sh b/test/cases/container-runtime-defaults.sh index 74f4c5f12..03333a190 100755 --- a/test/cases/container-runtime-defaults.sh +++ b/test/cases/container-runtime-defaults.sh @@ -5,8 +5,9 @@ exit_code=0 TEMP_DIR=$(mktemp -d) echo "--> Should allow dockerd as container runtime when below k8s version 1.24" -KUBELET_VERSION="v1.20.15-eks-ba74326" -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +# This variable is used to override the default value in the kubelet mock +export KUBELET_VERSION=v1.20.15-eks-ba74326 +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ --container-runtime dockerd \ @@ -18,8 +19,8 @@ if [[ ${exit_code} -ne 0 ]]; then fi echo "--> Should allow containerd as container runtime when below k8s version 1.24" -KUBELET_VERSION="v1.20.15-eks-ba74326" -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +export KUBELET_VERSION=v1.20.15-eks-ba74326 +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ --container-runtime containerd \ @@ -31,8 +32,8 @@ if [[ ${exit_code} -ne 0 ]]; then fi echo "--> Should have default container runtime when below k8s version 1.24" -KUBELET_VERSION="v1.20.15-eks-ba74326" -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +export KUBELET_VERSION=v1.20.15-eks-ba74326 +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ test || exit_code=$? @@ -43,8 +44,8 @@ if [[ ${exit_code} -ne 0 ]]; then fi echo "--> Should not allow dockerd as container runtime when at or above k8s version 1.24" -export KUBELET_VERSION="v1.24.15-eks-ba74326" -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +export KUBELET_VERSION=v1.24.15-eks-ba74326 +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ --container-runtime dockerd \ @@ -58,8 +59,8 @@ fi exit_code=0 echo "--> Should allow containerd as container runtime when at or above k8s version 1.24" -KUBELET_VERSION="v1.24.15-eks-ba74326" -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +export KUBELET_VERSION=v1.24.15-eks-ba74326 +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ --container-runtime containerd \ @@ -71,8 +72,8 @@ if [[ ${exit_code} -ne 0 ]]; then fi echo "--> Should have default container runtime when at or above k8s version 1.24" -KUBELET_VERSION="v1.24.15-eks-ba74326" -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +export KUBELET_VERSION=v1.24.15-eks-ba74326 +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ test || exit_code=$? @@ -83,8 +84,8 @@ if [[ ${exit_code} -ne 0 ]]; then fi echo "--> Should ignore docker-specific flags when at or above k8s version 1.24" -KUBELET_VERSION="v1.24.15-eks-ba74326" -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +export KUBELET_VERSION=v1.24.15-eks-ba74326 +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ --enable-docker-bridge true \ diff --git a/test/cases/imds-token-refresh.sh b/test/cases/imds-token-refresh.sh new file mode 100755 index 000000000..215aef426 --- /dev/null +++ b/test/cases/imds-token-refresh.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +set -o nounset +set -o errexit +set -o pipefail + +echo "--> Should refresh IMDS token on configured interval" +exit_code=0 +TOKEN_DIR=/tmp/imds-tokens +TTL=5 +export IMDS_TOKEN_TTL_SECONDS=$TTL +export IMDS_DEBUG=true +imds /latest/meta-data/instance-id || exit_code=$? + +if [[ ${exit_code} -ne 0 ]] +then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +elif [[ $(ls $TOKEN_DIR | wc -l) -ne 1 ]] +then + echo "❌ Test Failed: expected one token to be present after first IMDS call but got '${ls $TOKEN_DIR}'" + exit 1 +fi + +imds /latest/meta-data/instance-id || exit_code=$? + +if [[ ${exit_code} -ne 0 ]] +then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +elif [[ $(ls $TOKEN_DIR | wc -l) -ne 1 ]] +then + echo "❌ Test Failed: expected one token to be present after second IMDS call but got '$(ls $TOKEN_DIR)'" + exit 1 +fi + +sleep $(($TTL + 1)) + +imds /latest/meta-data/instance-id || exit_code=$? + +if [[ ${exit_code} -ne 0 ]] +then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +elif [[ $(ls $TOKEN_DIR | wc -l) -ne 2 ]] +then + echo "❌ Test Failed: expected two tokens to be present after third IMDS call but got '$(ls $TOKEN_DIR)'" + exit 1 +fi \ No newline at end of file diff --git a/test/cases/ip-family-service-ipv6-cidr-mismatch.sh b/test/cases/ip-family-service-ipv6-cidr-mismatch.sh index efe887290..f39ed8807 100755 --- a/test/cases/ip-family-service-ipv6-cidr-mismatch.sh +++ b/test/cases/ip-family-service-ipv6-cidr-mismatch.sh @@ -3,8 +3,7 @@ set -euo pipefail echo "-> Should fail validation - ip-family mismatch" exit_code=0 -TEMP_DIR=$(mktemp -d) -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ --ip-family ipv4 \ diff --git a/test/cases/ipv4-cluster-dns-ip.sh b/test/cases/ipv4-cluster-dns-ip.sh index 7dc7e36b5..03074fc07 100755 --- a/test/cases/ipv4-cluster-dns-ip.sh +++ b/test/cases/ipv4-cluster-dns-ip.sh @@ -3,9 +3,8 @@ set -euo pipefail echo "--> Should return IPv4 DNS Cluster IP when given dns-cluster-ip" exit_code=0 -TEMP_DIR=$(mktemp -d) expected_cluster_dns="192.168.0.1" -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ --ip-family ipv4 \ @@ -17,7 +16,7 @@ if [[ ${exit_code} -ne 0 ]]; then exit 1 fi -actual_cluster_dns=$(jq -r '.clusterDNS[0]' < ${TEMP_DIR}/kubelet-config.json) +actual_cluster_dns=$(jq -r '.clusterDNS[0]' < /etc/kubernetes/kubelet/kubelet-config.json) if [[ ${actual_cluster_dns} != "${expected_cluster_dns}" ]]; then echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" exit 1 diff --git a/test/cases/ipv6-cluster-dns-ip.sh b/test/cases/ipv6-cluster-dns-ip.sh index 26f67fa8d..0f7d7451d 100755 --- a/test/cases/ipv6-cluster-dns-ip.sh +++ b/test/cases/ipv6-cluster-dns-ip.sh @@ -3,9 +3,8 @@ set -euo pipefail echo "-> Should return ipv6 DNS Cluster IP when given dns-cluster-ip" exit_code=0 -TEMP_DIR=$(mktemp -d) expected_cluster_dns="fe80::2a" -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ --ip-family ipv6 \ @@ -17,7 +16,7 @@ if [[ ${exit_code} -ne 0 ]]; then exit 1 fi -actual_cluster_dns=$(jq -r '.clusterDNS[0]' < ${TEMP_DIR}/kubelet-config.json) +actual_cluster_dns=$(jq -r '.clusterDNS[0]' < /etc/kubernetes/kubelet/kubelet-config.json) if [[ ${actual_cluster_dns} != "${expected_cluster_dns}" ]]; then echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" exit 1 diff --git a/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh b/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh index 6c08cd7f6..f503f01ad 100755 --- a/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh +++ b/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh @@ -4,7 +4,7 @@ set -euo pipefail echo "-> Should return IPv6 DNS cluster IP when given service-ipv6-cidr" exit_code=0 TEMP_DIR=$(mktemp -d) -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ --ip-family ipv6 \ @@ -17,7 +17,7 @@ if [[ ${exit_code} -ne 0 ]]; then fi expected_cluster_dns="fe80::1a" -actual_cluster_dns=$(jq -r '.clusterDNS[0]' < ${TEMP_DIR}/kubelet-config.json) +actual_cluster_dns=$(jq -r '.clusterDNS[0]' < /etc/kubernetes/kubelet/kubelet-config.json) if [[ ${actual_cluster_dns} != "${expected_cluster_dns}" ]]; then echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" exit 1 diff --git a/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh b/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh index ac6991960..e984a223e 100755 --- a/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh +++ b/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh @@ -3,8 +3,7 @@ set -euo pipefail echo "-> Should fail w/ \"service-ipv6-cidr must be provided when ip-family is specified as ipv6\"" exit_code=0 -TEMP_DIR=$(mktemp -d) -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ --ip-family ipv6 \ diff --git a/test/cases/max-pods-cni-1-11-2-delegation.sh b/test/cases/max-pods-cni-1-11-2-delegation.sh index e6a2b8ba6..dca43f5e6 100755 --- a/test/cases/max-pods-cni-1-11-2-delegation.sh +++ b/test/cases/max-pods-cni-1-11-2-delegation.sh @@ -3,8 +3,7 @@ set -euo pipefail echo "-> Should calc max-pods successfully for VPC CNI 1.11.2" exit_code=0 -TEMP_DIR=$(mktemp -d) -out=$(run ${TEMP_DIR} /etc/eks/max-pods-calculator.sh \ +out=$(/etc/eks/max-pods-calculator.sh \ --instance-type m5.8xlarge \ --cni-version 1.11.2 \ --cni-prefix-delegation-enabled || exit_code=$?) diff --git a/test/cases/max-pods-cni-1-11-2.sh b/test/cases/max-pods-cni-1-11-2.sh index cbf9179a5..ec47c0730 100755 --- a/test/cases/max-pods-cni-1-11-2.sh +++ b/test/cases/max-pods-cni-1-11-2.sh @@ -3,8 +3,7 @@ set -euo pipefail echo "-> Should calc max-pods successfully for m5.8xlarge VPC CNI 1.11.2" exit_code=0 -TEMP_DIR=$(mktemp -d) -out=$(run ${TEMP_DIR} /etc/eks/max-pods-calculator.sh \ +out=$(/etc/eks/max-pods-calculator.sh \ --instance-type m5.8xlarge \ --cni-version 1.11.2 || exit_code=$?) echo $out diff --git a/test/cases/max-pods-cni-1-7-5.sh b/test/cases/max-pods-cni-1-7-5.sh index 295f43c28..619767256 100755 --- a/test/cases/max-pods-cni-1-7-5.sh +++ b/test/cases/max-pods-cni-1-7-5.sh @@ -3,8 +3,8 @@ set -euo pipefail echo "-> Should calc max-pods successfully for VPC CNI 1.7.5" exit_code=0 -TEMP_DIR=$(mktemp -d) -out=$(run ${TEMP_DIR} /etc/eks/max-pods-calculator.sh \ +export IMDS_DEBUG=true +out=$(/etc/eks/max-pods-calculator.sh \ --instance-type-from-imds \ --cni-version 1.7.5 || exit_code=$?) echo $out diff --git a/test/entrypoint.sh b/test/entrypoint.sh index b067e39f6..fdd437768 100755 --- a/test/entrypoint.sh +++ b/test/entrypoint.sh @@ -7,4 +7,4 @@ SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" sleep 1 ## execute any other params -$@ \ No newline at end of file +/test.sh \ No newline at end of file diff --git a/test/test-harness.sh b/test/test-harness.sh index c13ca763e..f632194a4 100755 --- a/test/test-harness.sh +++ b/test/test-harness.sh @@ -34,20 +34,13 @@ docker build -t eks-optimized-ami -f "${SCRIPTPATH}/Dockerfile" "${SCRIPTPATH}/. overall_status=0 function run(){ - local temp_dir=$1 - shift - # This variable is used to override the default value in the kubelet mock - KUBELET_VERSION="${KUBELET_VERSION:-}" - cp -f ${SCRIPTPATH}/../files/kubelet-config.json ${temp_dir}/kubelet-config.json docker run -v ${SCRIPTPATH}/../files/:/etc/eks/ \ - -v ${temp_dir}/kubelet-config.json:/etc/kubernetes/kubelet/kubelet-config.json \ + -v "$(realpath $1):/test.sh" \ --attach STDOUT \ --attach STDERR \ --rm \ - -e KUBELET_VERSION="$KUBELET_VERSION" \ - eks-optimized-ami $@ + eks-optimized-ami } -export -f run if [[ ! -z ${TEST_CASE_SCRIPT} ]]; then test_cases=${TEST_CASE_SCRIPT} @@ -59,9 +52,9 @@ for case in "${test_cases[@]}"; do status=0 echo "=================================================================================================================" echo "-> Executing Test Case: $(basename ${case})" - ${case} || status=1 + run ${case} || status=1 if [[ ${status} -eq 0 ]]; then - echo "✅ ✅ $(basename ${case}) Tests Passed! ✅ ✅ " + echo "✅ ✅ $(basename ${case}) Tests Passed! ✅ ✅" else echo "❌ ❌ $(basename ${case}) Tests Failed! ❌ ❌" overall_status=1 @@ -69,4 +62,9 @@ for case in "${test_cases[@]}"; do echo "=================================================================================================================" done +if [[ ${overall_status} -eq 0 ]]; then + echo "✅ ✅ All Tests Passed! ✅ ✅" +else + echo "❌ ❌ Some Tests Failed! ❌ ❌" +fi exit $overall_status From a830ca83411ea20b05ba84265098030faa3946c4 Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 20 Oct 2022 10:11:43 -0700 Subject: [PATCH 334/621] Upgrades runc to 1.1.3-1.amzn2.0.2 (#1055) --- eks-worker-al2.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index a2335e3d6..94900936e 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -26,7 +26,7 @@ "launch_block_device_mappings_volume_size": "4", "pull_cni_from_github": "true", "remote_folder": "", - "runc_version": "1.1.3-1.amzn2", + "runc_version": "1.1.3-1.amzn2.0.2", "security_group_id": "", "sonobuoy_e2e_registry": "", "source_ami_filter_name": "amzn2-ami-minimal-hvm-*", From 0cad5c51788a709c31a8cb839c6fac4b0431cbba Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 20 Oct 2022 10:48:44 -0700 Subject: [PATCH 335/621] Update docker and containerd for ALASDOCKER-2022-021 (#1056) --- eks-worker-al2.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 94900936e..dda34dd39 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -14,9 +14,9 @@ "binary_bucket_name": "amazon-eks", "binary_bucket_region": "us-west-2", "cni_plugin_version": "v0.8.6", - "containerd_version": "1.6.6-1.amzn2", + "containerd_version": "1.6.6-1.amzn2.0.2", "creator": "{{env `USER`}}", - "docker_version": "20.10.17-1.amzn2", + "docker_version": "20.10.17-1.amzn2.0.1", "encrypted": "false", "instance_type": null, "kernel_version": "", From 4b54ee95d42df8a2715add2a32f5150db097fde8 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 21 Oct 2022 11:48:51 -0700 Subject: [PATCH 336/621] Fixes arch reference in README per #1057 (#1058) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 988c67657..21c4b6641 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ PLATFORM=linux # Chose a platform and set the variable #List of all architectures for the selected Kubernetes Version, build date and platform aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/linux/ -ARCH=amd64 #Chose an architecture and set the variable +ARCH=x86_64 #Chose an architecture and set the variable ``` Run the following command to build an Amazon EKS Worker AMI based on the chosen parameters in the previous step ```bash From a0d20adbb4f2e0bcc3d520aecc9bfa2ba0b0f47b Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 27 Oct 2022 14:00:14 -0400 Subject: [PATCH 337/621] Add fmt to make targets (#1063) --- .editorconfig | 8 ++++++++ CONTRIBUTING.md | 9 +++++---- Makefile | 6 ++++++ 3 files changed, 19 insertions(+), 4 deletions(-) create mode 100644 .editorconfig diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 000000000..117520dfb --- /dev/null +++ b/.editorconfig @@ -0,0 +1,8 @@ +[*.sh] +indent_style = space +indent_size = 2 +binary_next_line = true +switch_case_indent = true +space_redirects = true +keep_padding = true +function_next_line = false diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 30e2f96d9..0576691bc 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -31,10 +31,11 @@ To send us a pull request, please: 1. Fork the repository. 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. -3. Ensure local tests pass. -4. Commit to your fork using clear commit messages. -5. Send us a pull request, answering any default questions in the pull request interface. -6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. +3. Ensure your changes match our style guide (`make fmt`). +4. Ensure local tests pass (`make test`). +5. Commit to your fork using clear commit messages. +6. Send us a pull request, answering any default questions in the pull request interface. +7. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). diff --git a/Makefile b/Makefile index a4253f36f..c96644855 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,8 @@ PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) +MAKEFILE_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + aws_region ?= $(AWS_DEFAULT_REGION) binary_bucket_region ?= $(AWS_DEFAULT_REGION) arch ?= x86_64 @@ -31,6 +33,10 @@ T_RESET := \e[0m .PHONY: all all: 1.20 1.21 1.22 1.23 ## Build all versions of EKS Optimized AL2 AMI +.PHONY: fmt +fmt: ## Format the source files + shfmt --list $(MAKEFILE_DIR) + .PHONY: test test: ## run the test-harness test/test-harness.sh From c46e7413f674c2bb03fad950d47658b4ef3c9fb1 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 27 Oct 2022 14:00:39 -0400 Subject: [PATCH 338/621] Fix syntax issues (#1062) --- files/bin/imds | 2 +- test/cases/imds-token-refresh.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/files/bin/imds b/files/bin/imds index a65e442b6..c73097b2f 100755 --- a/files/bin/imds +++ b/files/bin/imds @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash set -o errexit set -o pipefail diff --git a/test/cases/imds-token-refresh.sh b/test/cases/imds-token-refresh.sh index 215aef426..cc1334ec6 100755 --- a/test/cases/imds-token-refresh.sh +++ b/test/cases/imds-token-refresh.sh @@ -18,7 +18,7 @@ then exit 1 elif [[ $(ls $TOKEN_DIR | wc -l) -ne 1 ]] then - echo "❌ Test Failed: expected one token to be present after first IMDS call but got '${ls $TOKEN_DIR}'" + echo "❌ Test Failed: expected one token to be present after first IMDS call but got '$(ls $TOKEN_DIR)'" exit 1 fi @@ -46,4 +46,4 @@ elif [[ $(ls $TOKEN_DIR | wc -l) -ne 2 ]] then echo "❌ Test Failed: expected two tokens to be present after third IMDS call but got '$(ls $TOKEN_DIR)'" exit 1 -fi \ No newline at end of file +fi From a459e7332592cda651a81f7db2526b5d94e90f05 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 28 Oct 2022 11:25:47 -0400 Subject: [PATCH 339/621] Use shfmt flags instead of editorconfig (#1064) --- .editorconfig | 3 +++ Makefile | 12 +++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/.editorconfig b/.editorconfig index 117520dfb..cd9cab171 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,3 +1,6 @@ +# ensure that these rules are equivalent to the flags to shfmt in the Makefile. +# we can't use this file with shfmt directly because there's no way to express +# shebang matching on files without the `sh` extension. [*.sh] indent_style = space indent_size = 2 diff --git a/Makefile b/Makefile index c96644855..095922b28 100644 --- a/Makefile +++ b/Makefile @@ -35,7 +35,17 @@ all: 1.20 1.21 1.22 1.23 ## Build all versions of EKS Optimized AL2 AMI .PHONY: fmt fmt: ## Format the source files - shfmt --list $(MAKEFILE_DIR) + # ensure that these flags are equivalent to the rules in the .editorconfig + shfmt \ + --list \ + --write \ + --language-dialect auto \ + --indent 2 \ + --binary-next-line \ + --case-indent \ + --space-redirects \ + --keep-padding \ + $(MAKEFILE_DIR) .PHONY: test test: ## run the test-harness From 193759f20224bb866fef169b2a93049cc999e758 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Sat, 29 Oct 2022 09:22:24 -0700 Subject: [PATCH 340/621] Updating CHANGELOG.md for AMI release 20221027 (#1067) --- CHANGELOG.md | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a3560492e..d6e524ef3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,48 @@ # Changelog +### AMI Release v20221027 +* amazon-eks-gpu-node-1.23-v20221027 +* amazon-eks-gpu-node-1.22-v20221027 +* amazon-eks-gpu-node-1.21-v20221027 +* amazon-eks-gpu-node-1.20-v20221027 +* amazon-eks-arm64-node-1.23-v20221027 +* amazon-eks-arm64-node-1.22-v20221027 +* amazon-eks-arm64-node-1.21-v20221027 +* amazon-eks-arm64-node-1.20-v20221027 +* amazon-eks-node-1.23-v20221027 +* amazon-eks-node-1.22-v20221027 +* amazon-eks-node-1.21-v20221027 +* amazon-eks-node-1.20-v20221027 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.23.9-20221027` +* `1.22.12-20221027` +* `1.21.14-20221027` +* `1.20.15-20221027` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.23.9/2022-07-27/ +* s3://amazon-eks/1.22.12/2022-07-27/ +* s3://amazon-eks/1.21.14/2022-07-27/ +* s3://amazon-eks/1.20.15/2022-07-27/ + +AMI details: +* kernel: 5.4.217-126.408.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: 1.1.3-1.amzn2.0.2 +* cuda: 470.141.03-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +* cuda is updated to 470.141.03-1. +* Linux kernel is updated to 5.4.217-126.408.amzn2. +* runc version is updated to 1.1.3-1.amzn2.0.2 to include [ALAS2DOCKER-2022-020](https://alas.aws.amazon.com/AL2/ALASDOCKER-2022-020.html). [#1055](https://github.com/awslabs/amazon-eks-ami/pull/1055) +* docker version are update to 20.10.17-1.amzn2.0.1, and containerd version are updated to 1.6.6-1.amzn2.0.2 to include [ALASDOCKER-2022-021](https://alas.aws.amazon.com/AL2/ALASDOCKER-2022-021.html). [#1056](https://github.com/awslabs/amazon-eks-ami/pull/1056) +* Increase the kube-api-server QPS from 5/10 to 10/20. [#1030](https://github.com/awslabs/amazon-eks-ami/pull/1030) +* Release AMI in me-central-1 with version 1.21, 1.22, 1.23. 1.20 will not be supported since it will be deprecated soon. + ### AMI Release v20220926 * amazon-eks-gpu-node-1.23-v20220926 * amazon-eks-gpu-node-1.22-v20220926 From 3b6b5231f4ab8cbf669f6d8034864870690d721a Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Sat, 29 Oct 2022 18:21:33 -0700 Subject: [PATCH 341/621] recalling change log v20221027 (#1069) Co-authored-by: Ravi Sinha --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d6e524ef3..3f268cc05 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -### AMI Release v20221027 +### [Recalled] AMI Release v20221027 * amazon-eks-gpu-node-1.23-v20221027 * amazon-eks-gpu-node-1.22-v20221027 * amazon-eks-gpu-node-1.21-v20221027 From ff27e2440b6a02d51ebcc5fec2ae42d315b31310 Mon Sep 17 00:00:00 2001 From: Saurav Agarwalla Date: Mon, 31 Oct 2022 14:15:30 -0400 Subject: [PATCH 342/621] Pin Kernel 5.4 to 5.4.209-116.367 to prevent nodes from going into Unready (#1072) We're investigating issues with later Kernel versions which cause nodes to become Unready. Till those issues are resolved, pinning the Kernel to the last known good version. --- scripts/upgrade_kernel.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh index a53c4f6f9..a4cf68f3e 100755 --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -13,7 +13,12 @@ fi if [[ $KERNEL_VERSION == "4.14" ]]; then sudo yum update -y kernel elif [[ $KERNEL_VERSION == "5.4" ]]; then - sudo amazon-linux-extras install -y kernel-5.4 + # Pinning Kernel to 5.4.209-116.367 since we're investigating issues with later Kernel versions which cause nodes to become Unready. + # sudo amazon-linux-extras install -y kernel-5.4 + sudo amazon-linux-extras enable kernel-5.4=latest + sudo yum -y install kernel-5.4.209-116.367.amzn2 + sudo yum install -y yum-plugin-versionlock + sudo yum versionlock kernel-5.4* elif [[ $KERNEL_VERSION == "5.10" ]]; then sudo amazon-linux-extras install -y kernel-5.10 else From 165d827c38c548a0c08a7e34441ee80d79c136bc Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 31 Oct 2022 13:35:03 -0700 Subject: [PATCH 343/621] Address shellcheck finding (#1073) --- files/bootstrap.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 208f06298..7d8f4b0aa 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -265,7 +265,7 @@ get_cpu_millicores_to_reserve() { local cpu_ranges=(0 1000 2000 4000 $total_cpu_on_instance) local cpu_percentage_reserved_for_ranges=(600 100 50 25) cpu_to_reserve="0" - for i in ${!cpu_percentage_reserved_for_ranges[@]}; do + for i in "${!cpu_percentage_reserved_for_ranges[@]}"; do local start_range=${cpu_ranges[$i]} local end_range=${cpu_ranges[(($i+1))]} local percentage_to_reserve_for_range=${cpu_percentage_reserved_for_ranges[$i]} From 0d8d78c0f0010432c0ebef73d41bf88ebbdf4f60 Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 31 Oct 2022 15:44:45 -0700 Subject: [PATCH 344/621] Don't keep padding when formatting (#1075) --- .editorconfig | 2 +- Makefile | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.editorconfig b/.editorconfig index cd9cab171..deaf311fc 100644 --- a/.editorconfig +++ b/.editorconfig @@ -7,5 +7,5 @@ indent_size = 2 binary_next_line = true switch_case_indent = true space_redirects = true -keep_padding = true +keep_padding = false function_next_line = false diff --git a/Makefile b/Makefile index 095922b28..32f431ab1 100644 --- a/Makefile +++ b/Makefile @@ -44,7 +44,6 @@ fmt: ## Format the source files --binary-next-line \ --case-indent \ --space-redirects \ - --keep-padding \ $(MAKEFILE_DIR) .PHONY: test From 1e89a4483c5bd2c34fcdf89d625fbd56085d83cf Mon Sep 17 00:00:00 2001 From: Kulwant Singh Date: Wed, 2 Nov 2022 15:06:48 +0000 Subject: [PATCH 345/621] Update Windows eks-log-collector for container runtime log collection (#1059) 1. Added check for collection for docker 2. Add support for containerd log collection 3. panic.log file is collected for containerd --- .../windows/eks-log-collector.ps1 | 109 +++++++++++++++--- 1 file changed, 91 insertions(+), 18 deletions(-) diff --git a/log-collector-script/windows/eks-log-collector.ps1 b/log-collector-script/windows/eks-log-collector.ps1 index 2d538376d..d3e03660b 100644 --- a/log-collector-script/windows/eks-log-collector.ps1 +++ b/log-collector-script/windows/eks-log-collector.ps1 @@ -52,11 +52,13 @@ Function create_working_dir{ New-Item -type directory -path $info_system -Force >$null New-Item -type directory -path $info_system\eks -Force >$null New-Item -type directory -path $info_system\docker -Force >$null + New-Item -type directory -path $info_system\containerd -Force >$null New-Item -type directory -path $info_system\firewall -Force >$null New-Item -type directory -path $info_system\kubelet -Force >$null New-Item -type directory -path $info_system\kube-proxy -Force >$null New-Item -type directory -path $info_system\cni -Force >$null New-Item -type directory -path $info_system\docker_log -Force >$null + New-Item -type directory -path $info_system\containerd_log -Force >$null New-Item -type directory -path $info_system\network -Force >$null New-Item -type directory -path $info_system\network\hns -Force >$null Write-Host "OK" -ForegroundColor "green" @@ -69,6 +71,38 @@ Function create_working_dir{ } } +Function check_service_installed_and_running { + <# + .SYNOPSIS + This method checks if the specified service is installed and in running state. + #> + [CmdletBinding()] + Param ( + [Parameter(Mandatory=$true)] + [ValidateNotNullOrEmpty()] + [string]$ServiceName + ) + + Write-Host ("Checking status of service: {0}" -f $ServiceName) + try { + if (-not (Get-Service -Name $ServiceName -ErrorAction SilentlyContinue)) { + Write-Host ("Service {0} not found" -f $ServiceName) + return 0 + } + + if ((Get-Service -Name $ServiceName).Status -eq "Running") { + Write-Host ("Service {0} is running." -f $ServiceName) + return 1 + } + Write-Host ("Service {0} is not running." -f $ServiceName) + return 0 + } + catch { + Write-Error "Unable to check if service is installed and running" + break + } +} + Function get_sysinfo{ try { Write-Host "Collecting System information" @@ -181,18 +215,39 @@ Function get_system_services{ } } -Function get_docker_info{ - try { - Write-Host "Collecting Docker daemon information" - docker info > $info_system\docker\docker-info.txt 2>&1 - docker ps --all --no-trunc > $info_system\docker\docker-ps.txt 2>&1 - docker images > $info_system\docker\docker-images.txt 2>&1 - docker version > $info_system\docker\docker-version.txt 2>&1 - Write-Host "OK" -foregroundcolor "green" +Function get_containerd_info{ + Write-Host "Collecting Containerd information" + if (check_service_installed_and_running "containerd") { + try { + ctr version > $info_system\containerd\containerd-version.txt 2>&1 + ctr namespaces list > $info_system\containerd\containerd-namespaces.txt 2>&1 + ctr --namespace k8s.io images list > $info_system\containerd\containerd-images.txt 2>&1 + ctr --namespace k8s.io containers list > $info_system\containerd\containerd-containers.txt 2>&1 + ctr --namespace k8s.io tasks list > $info_system\containerd\containerd-tasks.txt 2>&1 + ctr --namespace k8s.io plugins list > $info_system\containerd\containerd-plugins.txt 2>&1 + Write-Host "OK" -foregroundcolor "green" + } + catch{ + Write-Error "Unable to collect Containerd information" + Break + } } - catch{ - Write-Error "Unable to collect Docker daemon information" - Break +} + +Function get_docker_info{ + Write-Host "Collecting Docker daemon information" + if (check_service_installed_and_running "docker") { + try { + docker info > $info_system\docker\docker-info.txt 2>&1 + docker ps --all --no-trunc > $info_system\docker\docker-ps.txt 2>&1 + docker images > $info_system\docker\docker-images.txt 2>&1 + docker version > $info_system\docker\docker-version.txt 2>&1 + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to collect Docker daemon information" + Break + } } } @@ -243,14 +298,30 @@ Function get_k8s_info{ } Function get_docker_logs{ - try { - Write-Host "Collecting Docker daemon logs" - Get-EventLog -LogName Application -Source Docker | Sort-Object Time | Export-CSV $info_system/docker_log/docker-daemon.csv - Write-Host "OK" -foregroundcolor "green" + Write-Host "Collecting Docker daemon logs" + if (check_service_installed_and_running "docker") { + try { + Get-EventLog -LogName Application -Source Docker | Sort-Object Time | Export-CSV $info_system/docker_log/docker-daemon.csv + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to collect Docker daemon logs" + Break + } } - catch { - Write-Error "Unable to collect Docker daemon logs" - Break +} + +Function get_containerd_logs{ + Write-Host "Collecting containerd logs" + if (check_service_installed_and_running "containerd") { + try { + copy C:\ProgramData\containerd\root\panic.log $info_system\containerd_log\ + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to collect containerd logs" + Break + } } } @@ -312,8 +383,10 @@ Function collect{ get_softwarelist get_system_services get_docker_info + get_containerd_info get_k8s_info get_docker_logs + get_containerd_logs get_eks_logs get_network_info From 0b4b45fc9f8e0b1e9b18f45f85aaf6b35636a1a6 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 3 Nov 2022 10:37:45 -0700 Subject: [PATCH 346/621] Fallback to shfmt in container (#1077) --- Makefile | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index 32f431ab1..97d3374a6 100644 --- a/Makefile +++ b/Makefile @@ -33,18 +33,22 @@ T_RESET := \e[0m .PHONY: all all: 1.20 1.21 1.22 1.23 ## Build all versions of EKS Optimized AL2 AMI +# ensure that these flags are equivalent to the rules in the .editorconfig +SHFMT_FLAGS := --list \ +--language-dialect auto \ +--indent 2 \ +--binary-next-line \ +--case-indent \ +--space-redirects + +SHFMT_COMMAND := $(shell which shfmt) +ifeq (, $(SHFMT_COMMAND)) +SHFMT_COMMAND = docker run --rm -v $(MAKEFILE_DIR):$(MAKEFILE_DIR) mvdan/shfmt +endif + .PHONY: fmt fmt: ## Format the source files - # ensure that these flags are equivalent to the rules in the .editorconfig - shfmt \ - --list \ - --write \ - --language-dialect auto \ - --indent 2 \ - --binary-next-line \ - --case-indent \ - --space-redirects \ - $(MAKEFILE_DIR) + $(SHFMT_COMMAND) $(SHFMT_FLAGS) --write $(MAKEFILE_DIR) .PHONY: test test: ## run the test-harness From b9bd7f65ee84f1f285bacbcabd3cd145ecee318f Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Thu, 3 Nov 2022 16:49:18 -0500 Subject: [PATCH 347/621] Cleanup messages and imds-tokens (#1080) --- scripts/cleanup.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/cleanup.sh b/scripts/cleanup.sh index a661178b3..b9fff7987 100644 --- a/scripts/cleanup.sh +++ b/scripts/cleanup.sh @@ -25,6 +25,8 @@ sudo rm -rf \ /var/log/cloud-init-output.log \ /var/log/cloud-init.log \ /var/log/secure \ - /var/log/wtmp + /var/log/wtmp \ + /var/log/messages \ + /tmp/imds-tokens sudo touch /etc/machine-id From 1bb12c591dda81443b652e6adde44a5c62f0e4fc Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 3 Nov 2022 15:18:50 -0700 Subject: [PATCH 348/621] Garbage collection for imds tokens (#1081) --- files/bin/imds | 31 +++++++++++++++++++++++++------ test/cases/imds-token-refresh.sh | 30 ++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 6 deletions(-) diff --git a/files/bin/imds b/files/bin/imds index c73097b2f..88b372ddf 100755 --- a/files/bin/imds +++ b/files/bin/imds @@ -30,22 +30,41 @@ log "ℹ️ Talking to IMDS at $IMDS_ENDPOINT" TOKEN_DIR=/tmp/imds-tokens mkdir -p $TOKEN_DIR +IMDS_RETRIES=${IMDS_RETRIES:-10} +IMDS_RETRY_DELAY_SECONDS=${IMDS_RETRY_DELAY_SECONDS:-1} + +# default ttl is 15 minutes +IMDS_TOKEN_TTL_SECONDS=${IMDS_TOKEN_TTL_SECONDS:-900} + +# max ttl is 6 hours, see: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-instance-metadata-service.html +IMDS_MAX_TOKEN_TTL_SECONDS=${IMDS_MAX_TOKEN_TTL_SECONDS:-21600} + +# cleanup expired tokens +DELETED_TOKENS=0 +for TOKEN_FILE in $(ls $TOKEN_DIR | awk '$0 < '$(($CURRENT_TIME - $IMDS_MAX_TOKEN_TTL_SECONDS))); do + rm $TOKEN_DIR/$TOKEN_FILE + DELETED_TOKENS=$(($DELETED_TOKENS + 1)) +done +if [ "$DELETED_TOKENS" -gt 0 ]; then + log "🗑️ Deleted $DELETED_TOKENS expired IMDS token(s)." +fi + TOKEN_FILE=$(ls $TOKEN_DIR | awk '$0 > '$CURRENT_TIME | sort -n -r | head -n 1) if [ "$TOKEN_FILE" = "" ] then - # default ttl is 15 minutes - IMDS_TOKEN_TTL_SECONDS=${IMDS_TOKEN_TTL_SECONDS:-900} TOKEN_FILE=$(($CURRENT_TIME + $IMDS_TOKEN_TTL_SECONDS)) curl \ --silent \ --show-error \ - --retry 10 \ - --retry-delay 1 \ + --retry $IMDS_RETRIES \ + --retry-delay $IMDS_RETRY_DELAY_SECONDS \ -o $TOKEN_DIR/$TOKEN_FILE \ -H "X-aws-ec2-metadata-token-ttl-seconds: $IMDS_TOKEN_TTL_SECONDS" \ -X PUT \ "http://$IMDS_ENDPOINT/latest/api/token" + # make sure any user can utilize (and clean up) these tokens + chmod a+rwx $TOKEN_DIR/$TOKEN_FILE log "🔑 Retrieved a fresh IMDS token that will expire in $IMDS_TOKEN_TTL_SECONDS seconds." else log "ℹ️ Using cached IMDS token that expires in $(($TOKEN_FILE - $CURRENT_TIME)) seconds." @@ -54,8 +73,8 @@ fi curl \ --silent \ --show-error \ - --retry 10 \ - --retry-delay 1 \ + --retry $IMDS_RETRIES \ + --retry-delay $IMDS_RETRY_DELAY_SECONDS \ --write-out '\n' \ -H "X-aws-ec2-metadata-token: $(cat $TOKEN_DIR/$TOKEN_FILE)" \ "http://$IMDS_ENDPOINT/$API_PATH" diff --git a/test/cases/imds-token-refresh.sh b/test/cases/imds-token-refresh.sh index cc1334ec6..0947ec61c 100755 --- a/test/cases/imds-token-refresh.sh +++ b/test/cases/imds-token-refresh.sh @@ -47,3 +47,33 @@ then echo "❌ Test Failed: expected two tokens to be present after third IMDS call but got '$(ls $TOKEN_DIR)'" exit 1 fi + +sleep $(($TTL + 1)) + +# both tokens are now expired, but only one should be garbage-collected with a window of $TTL + +IMDS_MAX_TOKEN_TTL_SECONDS=$TTL imds /latest/meta-data/instance-id || exit_code=$? + +if [[ ${exit_code} -ne 0 ]] +then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +elif [[ $(ls $TOKEN_DIR | wc -l) -ne 2 ]] +then + echo "❌ Test Failed: expected two tokens to be present after first garbage-collection but got '$(ls $TOKEN_DIR)'" + exit 1 +fi + +# the other expired token should be removed with a window of 0 + +IMDS_MAX_TOKEN_TTL_SECONDS=0 imds /latest/meta-data/instance-id || exit_code=$? + +if [[ ${exit_code} -ne 0 ]] +then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +elif [[ $(ls $TOKEN_DIR | wc -l) -ne 1 ]] +then + echo "❌ Test Failed: expected one token to be present after second garbage-collection but got '$(ls $TOKEN_DIR)'" + exit 1 +fi \ No newline at end of file From 6014c4e6872a23f82ca295afa93b033207042876 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 3 Nov 2022 15:24:34 -0700 Subject: [PATCH 349/621] Apply style rules to source files. (#1076) --- files/bin/imds | 9 +- files/bootstrap.sh | 654 +++++++++--------- files/max-pods-calculator.sh | 174 +++-- files/pull-sandbox-image.sh | 36 +- .../linux/eks-log-collector.sh | 186 +++-- scripts/cleanup.sh | 38 +- scripts/cleanup_additional_repos.sh | 3 +- scripts/generate-version-info.sh | 3 +- scripts/install-worker.sh | 241 ++++--- scripts/install_additional_repos.sh | 3 +- scripts/upgrade_kernel.sh | 24 +- scripts/validate.sh | 9 +- test/cases/api-qps-k8s-1.21-below.sh | 18 +- test/cases/api-qps-k8s-1.22-above.sh | 18 +- test/cases/container-runtime-defaults.sh | 82 +-- test/cases/imds-token-refresh.sh | 72 +- .../ip-family-service-ipv6-cidr-mismatch.sh | 14 +- test/cases/ipv4-cluster-dns-ip.sh | 20 +- test/cases/ipv6-cluster-dns-ip.sh | 20 +- ...-dns-cluster-ip-given-service-ipv6-cidr.sh | 20 +- .../ipv6-ip-family-and-service-ipv6-cidr.sh | 14 +- test/cases/max-pods-cni-1-11-2-delegation.sh | 14 +- test/cases/max-pods-cni-1-11-2.sh | 12 +- test/cases/max-pods-cni-1-7-5.sh | 14 +- test/entrypoint.sh | 7 +- test/mocks/aws | 23 +- test/mocks/iptables-save | 2 +- test/mocks/kubelet | 4 +- test/mocks/sudo | 2 +- test/mocks/systemctl | 2 +- test/test-harness.sh | 68 +- 31 files changed, 899 insertions(+), 907 deletions(-) diff --git a/files/bin/imds b/files/bin/imds index 88b372ddf..e9f8e749d 100755 --- a/files/bin/imds +++ b/files/bin/imds @@ -6,14 +6,12 @@ set -o nounset IMDS_DEBUG="${IMDS_DEBUG:-false}" function log() { - if [ "$IMDS_DEBUG" = "true" ] - then + if [ "$IMDS_DEBUG" = "true" ]; then echo >&2 "$1" fi } -if [ "$#" -ne 1 ] -then +if [ "$#" -ne 1 ]; then echo >&2 "usage: imds API_PATH" exit 1 fi @@ -51,8 +49,7 @@ fi TOKEN_FILE=$(ls $TOKEN_DIR | awk '$0 > '$CURRENT_TIME | sort -n -r | head -n 1) -if [ "$TOKEN_FILE" = "" ] -then +if [ "$TOKEN_FILE" = "" ]; then TOKEN_FILE=$(($CURRENT_TIME + $IMDS_TOKEN_TTL_SECONDS)) curl \ --silent \ diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 7d8f4b0aa..c7aa93c22 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -5,129 +5,129 @@ set -o nounset set -o errexit err_report() { - echo "Exited with error on line $1" + echo "Exited with error on line $1" } trap 'err_report $LINENO' ERR IFS=$'\n\t' function print_help { - echo "usage: $0 [options] " - echo "Bootstraps an instance into an EKS cluster" - echo "" - echo "-h,--help print this help" - echo "--use-max-pods Sets --max-pods for the kubelet when true. (default: true)" - echo "--b64-cluster-ca The base64 encoded cluster CA content. Only valid when used with --apiserver-endpoint. Bypasses calling \"aws eks describe-cluster\"" - echo "--apiserver-endpoint The EKS cluster API Server endpoint. Only valid when used with --b64-cluster-ca. Bypasses calling \"aws eks describe-cluster\"" - echo "--kubelet-extra-args Extra arguments to add to the kubelet. Useful for adding labels or taints." - echo "--enable-docker-bridge Restores the docker default bridge network. (default: false)" - echo "--aws-api-retry-attempts Number of retry attempts for AWS API call (DescribeCluster) (default: 3)" - echo "--docker-config-json The contents of the /etc/docker/daemon.json file. Useful if you want a custom config differing from the default one in the AMI" - echo "--containerd-config-file File containing the containerd configuration to be used in place of AMI defaults." - echo "--dns-cluster-ip Overrides the IP address to use for DNS queries within the cluster. Defaults to 10.100.0.10 or 172.20.0.10 based on the IP address of the primary interface" - echo "--pause-container-account The AWS account (number) to pull the pause container from" - echo "--pause-container-version The tag of the pause container" - echo "--container-runtime Specify a container runtime (default: dockerd)" - echo "--ip-family Specify ip family of the cluster" - echo "--service-ipv6-cidr ipv6 cidr range of the cluster" - echo "--enable-local-outpost Enable support for worker nodes to communicate with the local control plane when running on a disconnected Outpost. (true or false)" - echo "--cluster-id Specify the id of EKS cluster" + echo "usage: $0 [options] " + echo "Bootstraps an instance into an EKS cluster" + echo "" + echo "-h,--help print this help" + echo "--use-max-pods Sets --max-pods for the kubelet when true. (default: true)" + echo "--b64-cluster-ca The base64 encoded cluster CA content. Only valid when used with --apiserver-endpoint. Bypasses calling \"aws eks describe-cluster\"" + echo "--apiserver-endpoint The EKS cluster API Server endpoint. Only valid when used with --b64-cluster-ca. Bypasses calling \"aws eks describe-cluster\"" + echo "--kubelet-extra-args Extra arguments to add to the kubelet. Useful for adding labels or taints." + echo "--enable-docker-bridge Restores the docker default bridge network. (default: false)" + echo "--aws-api-retry-attempts Number of retry attempts for AWS API call (DescribeCluster) (default: 3)" + echo "--docker-config-json The contents of the /etc/docker/daemon.json file. Useful if you want a custom config differing from the default one in the AMI" + echo "--containerd-config-file File containing the containerd configuration to be used in place of AMI defaults." + echo "--dns-cluster-ip Overrides the IP address to use for DNS queries within the cluster. Defaults to 10.100.0.10 or 172.20.0.10 based on the IP address of the primary interface" + echo "--pause-container-account The AWS account (number) to pull the pause container from" + echo "--pause-container-version The tag of the pause container" + echo "--container-runtime Specify a container runtime (default: dockerd)" + echo "--ip-family Specify ip family of the cluster" + echo "--service-ipv6-cidr ipv6 cidr range of the cluster" + echo "--enable-local-outpost Enable support for worker nodes to communicate with the local control plane when running on a disconnected Outpost. (true or false)" + echo "--cluster-id Specify the id of EKS cluster" } POSITIONAL=() while [[ $# -gt 0 ]]; do - key="$1" - case $key in - -h|--help) - print_help - exit 1 - ;; - --use-max-pods) - USE_MAX_PODS="$2" - shift - shift - ;; - --b64-cluster-ca) - B64_CLUSTER_CA=$2 - shift - shift - ;; - --apiserver-endpoint) - APISERVER_ENDPOINT=$2 - shift - shift - ;; - --kubelet-extra-args) - KUBELET_EXTRA_ARGS=$2 - shift - shift - ;; - --enable-docker-bridge) - ENABLE_DOCKER_BRIDGE=$2 - shift - shift - ;; - --aws-api-retry-attempts) - API_RETRY_ATTEMPTS=$2 - shift - shift - ;; - --docker-config-json) - DOCKER_CONFIG_JSON=$2 - shift - shift - ;; - --containerd-config-file) - CONTAINERD_CONFIG_FILE=$2 - shift - shift - ;; - --pause-container-account) - PAUSE_CONTAINER_ACCOUNT=$2 - shift - shift - ;; - --pause-container-version) - PAUSE_CONTAINER_VERSION=$2 - shift - shift - ;; - --dns-cluster-ip) - DNS_CLUSTER_IP=$2 - shift - shift - ;; - --container-runtime) - CONTAINER_RUNTIME=$2 - shift - shift - ;; - --ip-family) - IP_FAMILY=$2 - shift - shift - ;; - --service-ipv6-cidr) - SERVICE_IPV6_CIDR=$2 - shift - shift - ;; - --enable-local-outpost) - ENABLE_LOCAL_OUTPOST=$2 - shift - shift - ;; - --cluster-id) - CLUSTER_ID=$2 - shift - shift - ;; - *) # unknown option - POSITIONAL+=("$1") # save it in an array for later - shift # past argument - ;; - esac + key="$1" + case $key in + -h | --help) + print_help + exit 1 + ;; + --use-max-pods) + USE_MAX_PODS="$2" + shift + shift + ;; + --b64-cluster-ca) + B64_CLUSTER_CA=$2 + shift + shift + ;; + --apiserver-endpoint) + APISERVER_ENDPOINT=$2 + shift + shift + ;; + --kubelet-extra-args) + KUBELET_EXTRA_ARGS=$2 + shift + shift + ;; + --enable-docker-bridge) + ENABLE_DOCKER_BRIDGE=$2 + shift + shift + ;; + --aws-api-retry-attempts) + API_RETRY_ATTEMPTS=$2 + shift + shift + ;; + --docker-config-json) + DOCKER_CONFIG_JSON=$2 + shift + shift + ;; + --containerd-config-file) + CONTAINERD_CONFIG_FILE=$2 + shift + shift + ;; + --pause-container-account) + PAUSE_CONTAINER_ACCOUNT=$2 + shift + shift + ;; + --pause-container-version) + PAUSE_CONTAINER_VERSION=$2 + shift + shift + ;; + --dns-cluster-ip) + DNS_CLUSTER_IP=$2 + shift + shift + ;; + --container-runtime) + CONTAINER_RUNTIME=$2 + shift + shift + ;; + --ip-family) + IP_FAMILY=$2 + shift + shift + ;; + --service-ipv6-cidr) + SERVICE_IPV6_CIDR=$2 + shift + shift + ;; + --enable-local-outpost) + ENABLE_LOCAL_OUTPOST=$2 + shift + shift + ;; + --cluster-id) + CLUSTER_ID=$2 + shift + shift + ;; + *) # unknown option + POSITIONAL+=("$1") # save it in an array for later + shift # past argument + ;; + esac done set +u @@ -139,10 +139,10 @@ KUBELET_VERSION=$(kubelet --version | grep -Eo '[0-9]\.[0-9]+\.[0-9]+') echo "Using kubelet version $KUBELET_VERSION" function is_greater_than_or_equal_to_version() { - local actual_version="$1" - local compared_version="$2" + local actual_version="$1" + local compared_version="$2" - [ $actual_version = "`echo -e \"$actual_version\n$compared_version\" | sort -V | tail -n1`" ] + [ $actual_version = "$(echo -e "$actual_version\n$compared_version" | sort -V | tail -n1)" ] } # As of Kubernetes version 1.24, we will start defaulting the container runtime to containerd @@ -150,8 +150,8 @@ function is_greater_than_or_equal_to_version() { IS_124_OR_GREATER=false DEFAULT_CONTAINER_RUNTIME=dockerd if is_greater_than_or_equal_to_version $KUBELET_VERSION "1.24.0"; then - IS_124_OR_GREATER=true - DEFAULT_CONTAINER_RUNTIME=containerd + IS_124_OR_GREATER=true + DEFAULT_CONTAINER_RUNTIME=containerd fi # Set container runtime related variables @@ -162,8 +162,8 @@ CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-$DEFAULT_CONTAINER_RUNTIME}" echo "Using $CONTAINER_RUNTIME as the container runtime" if $IS_124_OR_GREATER && [ $CONTAINER_RUNTIME != "containerd" ]; then - echo "ERROR: containerd is the only supported container runtime as of Kubernetes version 1.24" - exit 1 + echo "ERROR: containerd is the only supported container runtime as of Kubernetes version 1.24" + exit 1 fi USE_MAX_PODS="${USE_MAX_PODS:-true}" @@ -180,36 +180,49 @@ SERVICE_IPV6_CIDR="${SERVICE_IPV6_CIDR:-}" ENABLE_LOCAL_OUTPOST="${ENABLE_LOCAL_OUTPOST:-}" CLUSTER_ID="${CLUSTER_ID:-}" -function get_pause_container_account_for_region () { - local region="$1" - case "${region}" in +function get_pause_container_account_for_region() { + local region="$1" + case "${region}" in ap-east-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-800184023465}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-800184023465}" + ;; me-south-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-558608220178}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-558608220178}" + ;; cn-north-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-918309763551}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-918309763551}" + ;; cn-northwest-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-961992271922}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-961992271922}" + ;; us-gov-west-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-013241004608}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-013241004608}" + ;; us-gov-east-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-151742754352}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-151742754352}" + ;; us-iso-east-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-725322719131}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-725322719131}" + ;; us-isob-east-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-187977181151}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-187977181151}" + ;; af-south-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-877085696533}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-877085696533}" + ;; eu-south-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-590381155156}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-590381155156}" + ;; ap-southeast-3) - echo "${PAUSE_CONTAINER_ACCOUNT:-296578399912}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-296578399912}" + ;; me-central-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-759879836304}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-759879836304}" + ;; *) - echo "${PAUSE_CONTAINER_ACCOUNT:-602401143452}";; - esac + echo "${PAUSE_CONTAINER_ACCOUNT:-602401143452}" + ;; + esac } # Helper function which calculates the amount of the given resource (either CPU or memory) @@ -230,9 +243,8 @@ get_resource_to_reserve_in_range() { local end_range=$3 local percentage=$4 resources_to_reserve="0" - if (( $total_resource_on_instance > $start_range )); then - resources_to_reserve=$(((($total_resource_on_instance < $end_range ? \ - $total_resource_on_instance : $end_range) - $start_range) * $percentage / 100 / 100)) + if (($total_resource_on_instance > $start_range)); then + resources_to_reserve=$(((($total_resource_on_instance < $end_range ? $total_resource_on_instance : $end_range) - $start_range) * $percentage / 100 / 100)) fi echo $resources_to_reserve } @@ -267,33 +279,32 @@ get_cpu_millicores_to_reserve() { cpu_to_reserve="0" for i in "${!cpu_percentage_reserved_for_ranges[@]}"; do local start_range=${cpu_ranges[$i]} - local end_range=${cpu_ranges[(($i+1))]} + local end_range=${cpu_ranges[(($i + 1))]} local percentage_to_reserve_for_range=${cpu_percentage_reserved_for_ranges[$i]} - cpu_to_reserve=$(($cpu_to_reserve + \ - $(get_resource_to_reserve_in_range $total_cpu_on_instance $start_range $end_range $percentage_to_reserve_for_range))) + cpu_to_reserve=$(($cpu_to_reserve + $(get_resource_to_reserve_in_range $total_cpu_on_instance $start_range $end_range $percentage_to_reserve_for_range))) done echo $cpu_to_reserve } if [ -z "$CLUSTER_NAME" ]; then - echo "CLUSTER_NAME is not defined" - exit 1 + echo "CLUSTER_NAME is not defined" + exit 1 fi if [[ ! -z "${IP_FAMILY}" ]]; then IP_FAMILY="$(tr [A-Z] [a-z] <<< "$IP_FAMILY")" - if [[ "${IP_FAMILY}" != "ipv4" ]] && [[ "${IP_FAMILY}" != "ipv6" ]] ; then - echo "Invalid IpFamily. Only ipv4 or ipv6 are allowed" - exit 1 + if [[ "${IP_FAMILY}" != "ipv4" ]] && [[ "${IP_FAMILY}" != "ipv6" ]]; then + echo "Invalid IpFamily. Only ipv4 or ipv6 are allowed" + exit 1 fi fi if [[ ! -z "${SERVICE_IPV6_CIDR}" ]]; then - if [[ "${IP_FAMILY}" == "ipv4" ]]; then - echo "ip-family should be ipv6 when service-ipv6-cidr is specified" - exit 1 - fi - IP_FAMILY="ipv6" + if [[ "${IP_FAMILY}" == "ipv4" ]]; then + echo "ip-family should be ipv6 when service-ipv6-cidr is specified" + exit 1 + fi + IP_FAMILY="ipv6" fi AWS_DEFAULT_REGION=$(imds 'latest/dynamic/instance-identity/document' | jq .region -r) @@ -301,8 +312,8 @@ AWS_SERVICES_DOMAIN=$(imds 'latest/meta-data/services/domain') MACHINE=$(uname -m) if [[ "$MACHINE" != "x86_64" && "$MACHINE" != "aarch64" ]]; then - echo "Unknown machine architecture '$MACHINE'" >&2 - exit 1 + echo "Unknown machine architecture '$MACHINE'" >&2 + exit 1 fi PAUSE_CONTAINER_ACCOUNT=$(get_pause_container_account_for_region "${AWS_DEFAULT_REGION}") @@ -315,62 +326,62 @@ CA_CERTIFICATE_DIRECTORY=/etc/kubernetes/pki CA_CERTIFICATE_FILE_PATH=$CA_CERTIFICATE_DIRECTORY/ca.crt mkdir -p $CA_CERTIFICATE_DIRECTORY if [[ -z "${B64_CLUSTER_CA}" ]] || [[ -z "${APISERVER_ENDPOINT}" ]]; then - DESCRIBE_CLUSTER_RESULT="/tmp/describe_cluster_result.txt" - - # Retry the DescribeCluster API for API_RETRY_ATTEMPTS - for attempt in `seq 0 $API_RETRY_ATTEMPTS`; do - rc=0 - if [[ $attempt -gt 0 ]]; then - echo "Attempt $attempt of $API_RETRY_ATTEMPTS" - fi - - aws eks wait cluster-active \ - --region=${AWS_DEFAULT_REGION} \ - --name=${CLUSTER_NAME} - - aws eks describe-cluster \ - --region=${AWS_DEFAULT_REGION} \ - --name=${CLUSTER_NAME} \ - --output=text \ - --query 'cluster.{certificateAuthorityData: certificateAuthority.data, endpoint: endpoint, serviceIpv4Cidr: kubernetesNetworkConfig.serviceIpv4Cidr, serviceIpv6Cidr: kubernetesNetworkConfig.serviceIpv6Cidr, clusterIpFamily: kubernetesNetworkConfig.ipFamily, outpostArn: outpostConfig.outpostArns[0], id: id}' > $DESCRIBE_CLUSTER_RESULT || rc=$? - if [[ $rc -eq 0 ]]; then - break - fi - if [[ $attempt -eq $API_RETRY_ATTEMPTS ]]; then - exit $rc - fi - jitter=$((1 + RANDOM % 10)) - sleep_sec="$(( $(( 5 << $((1+$attempt)) )) + $jitter))" - sleep $sleep_sec - done - B64_CLUSTER_CA=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $1}') - APISERVER_ENDPOINT=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $3}') - CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $4}') - OUTPOST_ARN=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $5}') - SERVICE_IPV4_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $6}') - SERVICE_IPV6_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $7}') - - if [[ -z "${IP_FAMILY}" ]]; then - IP_FAMILY=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $2}') - fi + DESCRIBE_CLUSTER_RESULT="/tmp/describe_cluster_result.txt" - # Automatically detect local cluster in outpost - if [[ -z "${OUTPOST_ARN}" ]] || [[ "${OUTPOST_ARN}" == "None" ]]; then - IS_LOCAL_OUTPOST_DETECTED=false - else - IS_LOCAL_OUTPOST_DETECTED=true + # Retry the DescribeCluster API for API_RETRY_ATTEMPTS + for attempt in $(seq 0 $API_RETRY_ATTEMPTS); do + rc=0 + if [[ $attempt -gt 0 ]]; then + echo "Attempt $attempt of $API_RETRY_ATTEMPTS" fi - # If the cluster id is returned from describe cluster, let us use it no matter whether cluster id is passed from option - if [[ ! -z "${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT}" ]] && [[ "${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT}" != "None" ]]; then - CLUSTER_ID=${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT} + aws eks wait cluster-active \ + --region=${AWS_DEFAULT_REGION} \ + --name=${CLUSTER_NAME} + + aws eks describe-cluster \ + --region=${AWS_DEFAULT_REGION} \ + --name=${CLUSTER_NAME} \ + --output=text \ + --query 'cluster.{certificateAuthorityData: certificateAuthority.data, endpoint: endpoint, serviceIpv4Cidr: kubernetesNetworkConfig.serviceIpv4Cidr, serviceIpv6Cidr: kubernetesNetworkConfig.serviceIpv6Cidr, clusterIpFamily: kubernetesNetworkConfig.ipFamily, outpostArn: outpostConfig.outpostArns[0], id: id}' > $DESCRIBE_CLUSTER_RESULT || rc=$? + if [[ $rc -eq 0 ]]; then + break fi + if [[ $attempt -eq $API_RETRY_ATTEMPTS ]]; then + exit $rc + fi + jitter=$((1 + RANDOM % 10)) + sleep_sec="$(($((5 << $((1 + $attempt)))) + $jitter))" + sleep $sleep_sec + done + B64_CLUSTER_CA=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $1}') + APISERVER_ENDPOINT=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $3}') + CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $4}') + OUTPOST_ARN=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $5}') + SERVICE_IPV4_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $6}') + SERVICE_IPV6_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $7}') + + if [[ -z "${IP_FAMILY}" ]]; then + IP_FAMILY=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $2}') + fi + + # Automatically detect local cluster in outpost + if [[ -z "${OUTPOST_ARN}" ]] || [[ "${OUTPOST_ARN}" == "None" ]]; then + IS_LOCAL_OUTPOST_DETECTED=false + else + IS_LOCAL_OUTPOST_DETECTED=true + fi + + # If the cluster id is returned from describe cluster, let us use it no matter whether cluster id is passed from option + if [[ ! -z "${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT}" ]] && [[ "${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT}" != "None" ]]; then + CLUSTER_ID=${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT} + fi fi if [[ -z "${IP_FAMILY}" ]] || [[ "${IP_FAMILY}" == "None" ]]; then - ### this can happen when the ipFamily field is not found in describeCluster response - ### or B64_CLUSTER_CA and APISERVER_ENDPOINT are defined but IPFamily isn't - IP_FAMILY="ipv4" + ### this can happen when the ipFamily field is not found in describeCluster response + ### or B64_CLUSTER_CA and APISERVER_ENDPOINT are defined but IPFamily isn't + IP_FAMILY="ipv4" fi echo $B64_CLUSTER_CA | base64 -d > $CA_CERTIFICATE_FILE_PATH @@ -379,11 +390,11 @@ sed -i s,MASTER_ENDPOINT,$APISERVER_ENDPOINT,g /var/lib/kubelet/kubeconfig sed -i s,AWS_REGION,$AWS_DEFAULT_REGION,g /var/lib/kubelet/kubeconfig if [[ -z "$ENABLE_LOCAL_OUTPOST" ]]; then - # Only when "--enable-local-outpost" option is not set explicity on calling bootstrap.sh, it will be assigned with - # - the result of auto-detectection through describe-cluster - # - or "false" when describe-cluster is bypassed. - # This also means if "--enable-local-outpost" option is set explicity, it will override auto-detection result - ENABLE_LOCAL_OUTPOST="${IS_LOCAL_OUTPOST_DETECTED:-false}" + # Only when "--enable-local-outpost" option is not set explicity on calling bootstrap.sh, it will be assigned with + # - the result of auto-detectection through describe-cluster + # - or "false" when describe-cluster is bypassed. + # This also means if "--enable-local-outpost" option is set explicity, it will override auto-detection result + ENABLE_LOCAL_OUTPOST="${IS_LOCAL_OUTPOST_DETECTED:-false}" fi ### To support worker nodes to continue to communicate and connect to local cluster even when the Outpost @@ -395,32 +406,31 @@ fi ### worker node can be authentiacated through X.509 certificate which works for both connected and #### disconnected state. if [[ "${ENABLE_LOCAL_OUTPOST}" == "true" ]]; then - ### append to /etc/hosts file with shuffled mappings of "IP address to API server domain name" - DOMAIN_NAME=$(echo "$APISERVER_ENDPOINT" | awk -F/ '{print $3}' | awk -F: '{print $1}') - getent hosts "$DOMAIN_NAME" | shuf >> /etc/hosts - - ### kubelet bootstrap kubeconfig uses aws-iam-authenticator with cluster id to authenticate to cluster - ### - if "aws eks describe-cluster" is bypassed, for local outpost, the value of CLUSTER_NAME parameter will be cluster id. - ### - otherwise, the cluster id will use the id returned by "aws eks describe-cluster". - if [[ -z "${CLUSTER_ID}" ]]; then - echo "Cluster ID is required when local outpost support is enabled" - exit 1 - else - sed -i s,CLUSTER_NAME,$CLUSTER_ID,g /var/lib/kubelet/kubeconfig + ### append to /etc/hosts file with shuffled mappings of "IP address to API server domain name" + DOMAIN_NAME=$(echo "$APISERVER_ENDPOINT" | awk -F/ '{print $3}' | awk -F: '{print $1}') + getent hosts "$DOMAIN_NAME" | shuf >> /etc/hosts + + ### kubelet bootstrap kubeconfig uses aws-iam-authenticator with cluster id to authenticate to cluster + ### - if "aws eks describe-cluster" is bypassed, for local outpost, the value of CLUSTER_NAME parameter will be cluster id. + ### - otherwise, the cluster id will use the id returned by "aws eks describe-cluster". + if [[ -z "${CLUSTER_ID}" ]]; then + echo "Cluster ID is required when local outpost support is enabled" + exit 1 + else + sed -i s,CLUSTER_NAME,$CLUSTER_ID,g /var/lib/kubelet/kubeconfig - ### use aws-iam-authenticator as bootstrap auth and download X.509 cert used in kubelet kubeconfig - mv /var/lib/kubelet/kubeconfig /var/lib/kubelet/bootstrap-kubeconfig - KUBELET_EXTRA_ARGS="--bootstrap-kubeconfig /var/lib/kubelet/bootstrap-kubeconfig $KUBELET_EXTRA_ARGS" - fi + ### use aws-iam-authenticator as bootstrap auth and download X.509 cert used in kubelet kubeconfig + mv /var/lib/kubelet/kubeconfig /var/lib/kubelet/bootstrap-kubeconfig + KUBELET_EXTRA_ARGS="--bootstrap-kubeconfig /var/lib/kubelet/bootstrap-kubeconfig $KUBELET_EXTRA_ARGS" + fi else - sed -i s,CLUSTER_NAME,$CLUSTER_NAME,g /var/lib/kubelet/kubeconfig + sed -i s,CLUSTER_NAME,$CLUSTER_NAME,g /var/lib/kubelet/kubeconfig fi ### kubelet.service configuration MAC=$(imds 'latest/meta-data/network/interfaces/macs/' | head -n 1 | sed 's/\/$//') - if [[ -z "${DNS_CLUSTER_IP}" ]]; then if [[ "${IP_FAMILY}" == "ipv6" ]]; then if [[ -z "${SERVICE_IPV6_CIDR}" ]]; then @@ -432,14 +442,14 @@ if [[ -z "${DNS_CLUSTER_IP}" ]]; then if [[ "${IP_FAMILY}" == "ipv4" ]]; then if [[ ! -z "${SERVICE_IPV4_CIDR}" ]] && [[ "${SERVICE_IPV4_CIDR}" != "None" ]]; then - #Sets the DNS Cluster IP address that would be chosen from the serviceIpv4Cidr. (x.y.z.10) - DNS_CLUSTER_IP=${SERVICE_IPV4_CIDR%.*}.10 + #Sets the DNS Cluster IP address that would be chosen from the serviceIpv4Cidr. (x.y.z.10) + DNS_CLUSTER_IP=${SERVICE_IPV4_CIDR%.*}.10 else - TEN_RANGE=$(imds "latest/meta-data/network/interfaces/macs/$MAC/vpc-ipv4-cidr-blocks" | grep -c '^10\..*' || true ) - DNS_CLUSTER_IP=10.100.0.10 - if [[ "$TEN_RANGE" != "0" ]]; then - DNS_CLUSTER_IP=172.20.0.10 - fi + TEN_RANGE=$(imds "latest/meta-data/network/interfaces/macs/$MAC/vpc-ipv4-cidr-blocks" | grep -c '^10\..*' || true) + DNS_CLUSTER_IP=10.100.0.10 + if [[ "$TEN_RANGE" != "0" ]]; then + DNS_CLUSTER_IP=172.20.0.10 + fi fi fi else @@ -450,19 +460,18 @@ KUBELET_CONFIG=/etc/kubernetes/kubelet/kubelet-config.json echo "$(jq ".clusterDNS=[\"$DNS_CLUSTER_IP\"]" $KUBELET_CONFIG)" > $KUBELET_CONFIG if [[ "${IP_FAMILY}" == "ipv4" ]]; then - INTERNAL_IP=$(imds 'latest/meta-data/local-ipv4') + INTERNAL_IP=$(imds 'latest/meta-data/local-ipv4') else - INTERNAL_IP_URI=latest/meta-data/network/interfaces/macs/$MAC/ipv6s - INTERNAL_IP=$(imds $INTERNAL_IP_URI) + INTERNAL_IP_URI=latest/meta-data/network/interfaces/macs/$MAC/ipv6s + INTERNAL_IP=$(imds $INTERNAL_IP_URI) fi INSTANCE_TYPE=$(imds 'latest/meta-data/instance-type') if is_greater_than_or_equal_to_version $KUBELET_VERSION "1.22.0"; then - # for K8s versions that suport API Priority & Fairness, increase our API server QPS - echo $(jq ".kubeAPIQPS=( .kubeAPIQPS // 10)|.kubeAPIBurst=( .kubeAPIBurst // 20)" $KUBELET_CONFIG) > $KUBELET_CONFIG + # for K8s versions that suport API Priority & Fairness, increase our API server QPS + echo $(jq ".kubeAPIQPS=( .kubeAPIQPS // 10)|.kubeAPIBurst=( .kubeAPIBurst // 20)" $KUBELET_CONFIG) > $KUBELET_CONFIG fi - # Sets kubeReserved and evictionHard in /etc/kubernetes/kubelet/kubelet-config.json for worker nodes. The following two function # calls calculate the CPU and memory resources to reserve for kubeReserved based on the instance type of the worker node. # Note that allocatable memory and CPU resources on worker nodes is calculated by the Kubernetes scheduler @@ -474,11 +483,11 @@ set +o pipefail MAX_PODS=$(cat $MAX_PODS_FILE | awk "/^${INSTANCE_TYPE:-unset}/"' { print $2 }') set -o pipefail if [ -z "$MAX_PODS" ] || [ -z "$INSTANCE_TYPE" ]; then - echo "No entry for type '$INSTANCE_TYPE' in $MAX_PODS_FILE. Will attempt to auto-discover value." - # When determining the value of maxPods, we're using the legacy calculation by default since it's more restrictive than - # the PrefixDelegation based alternative and is likely to be in-use by more customers. - # The legacy numbers also maintain backwards compatibility when used to calculate `kubeReserved.memory` - MAX_PODS=$(/etc/eks/max-pods-calculator.sh --instance-type-from-imds --cni-version 1.10.0 --show-max-allowed) + echo "No entry for type '$INSTANCE_TYPE' in $MAX_PODS_FILE. Will attempt to auto-discover value." + # When determining the value of maxPods, we're using the legacy calculation by default since it's more restrictive than + # the PrefixDelegation based alternative and is likely to be in-use by more customers. + # The legacy numbers also maintain backwards compatibility when used to calculate `kubeReserved.memory` + MAX_PODS=$(/etc/eks/max-pods-calculator.sh --instance-type-from-imds --cni-version 1.10.0 --show-max-allowed) fi # calculates the amount of each resource to reserve @@ -487,115 +496,114 @@ cpu_millicores_to_reserve=$(get_cpu_millicores_to_reserve) # writes kubeReserved and evictionHard to the kubelet-config using the amount of CPU and memory to be reserved echo "$(jq '. += {"evictionHard": {"memory.available": "100Mi", "nodefs.available": "10%", "nodefs.inodesFree": "5%"}}' $KUBELET_CONFIG)" > $KUBELET_CONFIG echo "$(jq --arg mebibytes_to_reserve "${mebibytes_to_reserve}Mi" --arg cpu_millicores_to_reserve "${cpu_millicores_to_reserve}m" \ - '. += {kubeReserved: {"cpu": $cpu_millicores_to_reserve, "ephemeral-storage": "1Gi", "memory": $mebibytes_to_reserve}}' $KUBELET_CONFIG)" > $KUBELET_CONFIG + '. += {kubeReserved: {"cpu": $cpu_millicores_to_reserve, "ephemeral-storage": "1Gi", "memory": $mebibytes_to_reserve}}' $KUBELET_CONFIG)" > $KUBELET_CONFIG if [[ "$USE_MAX_PODS" = "true" ]]; then - echo "$(jq ".maxPods=$MAX_PODS" $KUBELET_CONFIG)" > $KUBELET_CONFIG + echo "$(jq ".maxPods=$MAX_PODS" $KUBELET_CONFIG)" > $KUBELET_CONFIG fi mkdir -p /etc/systemd/system/kubelet.service.d -cat < /etc/systemd/system/kubelet.service.d/10-kubelet-args.conf +cat << EOF > /etc/systemd/system/kubelet.service.d/10-kubelet-args.conf [Service] Environment='KUBELET_ARGS=--node-ip=$INTERNAL_IP --pod-infra-container-image=$PAUSE_CONTAINER --v=2' EOF if [[ -n "$KUBELET_EXTRA_ARGS" ]]; then - cat < /etc/systemd/system/kubelet.service.d/30-kubelet-extra-args.conf + cat << EOF > /etc/systemd/system/kubelet.service.d/30-kubelet-extra-args.conf [Service] Environment='KUBELET_EXTRA_ARGS=$KUBELET_EXTRA_ARGS' EOF fi if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then - if $ENABLE_DOCKER_BRIDGE; then - echo "WARNING: Flag --enable-docker-bridge was set but will be ignored as it's not relevant to containerd" - fi + if $ENABLE_DOCKER_BRIDGE; then + echo "WARNING: Flag --enable-docker-bridge was set but will be ignored as it's not relevant to containerd" + fi - if [ ! -z "$DOCKER_CONFIG_JSON" ]; then - echo "WARNING: Flag --docker-config-json was set but will be ignored as it's not relevant to containerd" - fi + if [ ! -z "$DOCKER_CONFIG_JSON" ]; then + echo "WARNING: Flag --docker-config-json was set but will be ignored as it's not relevant to containerd" + fi - sudo mkdir -p /etc/containerd - sudo mkdir -p /etc/cni/net.d - mkdir -p /etc/systemd/system/containerd.service.d - cat < /etc/systemd/system/containerd.service.d/10-compat-symlink.conf + sudo mkdir -p /etc/containerd + sudo mkdir -p /etc/cni/net.d + mkdir -p /etc/systemd/system/containerd.service.d + cat << EOF > /etc/systemd/system/containerd.service.d/10-compat-symlink.conf [Service] ExecStartPre=/bin/ln -sf /run/containerd/containerd.sock /run/dockershim.sock EOF - if [[ -n "$CONTAINERD_CONFIG_FILE" ]]; then - sudo cp -v $CONTAINERD_CONFIG_FILE /etc/eks/containerd/containerd-config.toml - fi - echo "$(jq '.cgroupDriver="systemd"' $KUBELET_CONFIG)" > $KUBELET_CONFIG - sudo sed -i s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g /etc/eks/containerd/containerd-config.toml - sudo cp -v /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml - sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service - sudo cp -v /etc/eks/containerd/kubelet-containerd.service /etc/systemd/system/kubelet.service - sudo chown root:root /etc/systemd/system/kubelet.service - sudo chown root:root /etc/systemd/system/sandbox-image.service - systemctl daemon-reload - systemctl enable containerd - systemctl restart containerd - systemctl enable sandbox-image - systemctl start sandbox-image + if [[ -n "$CONTAINERD_CONFIG_FILE" ]]; then + sudo cp -v $CONTAINERD_CONFIG_FILE /etc/eks/containerd/containerd-config.toml + fi + echo "$(jq '.cgroupDriver="systemd"' $KUBELET_CONFIG)" > $KUBELET_CONFIG + sudo sed -i s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g /etc/eks/containerd/containerd-config.toml + sudo cp -v /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml + sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service + sudo cp -v /etc/eks/containerd/kubelet-containerd.service /etc/systemd/system/kubelet.service + sudo chown root:root /etc/systemd/system/kubelet.service + sudo chown root:root /etc/systemd/system/sandbox-image.service + systemctl daemon-reload + systemctl enable containerd + systemctl restart containerd + systemctl enable sandbox-image + systemctl start sandbox-image elif [[ "$CONTAINER_RUNTIME" = "dockerd" ]]; then - mkdir -p /etc/docker - bash -c "/sbin/iptables-save > /etc/sysconfig/iptables" - cp -v /etc/eks/iptables-restore.service /etc/systemd/system/iptables-restore.service - sudo chown root:root /etc/systemd/system/iptables-restore.service - systemctl daemon-reload - systemctl enable iptables-restore - - if [[ -n "$DOCKER_CONFIG_JSON" ]]; then - echo "$DOCKER_CONFIG_JSON" > /etc/docker/daemon.json - fi - if [[ "$ENABLE_DOCKER_BRIDGE" = "true" ]]; then - # Enabling the docker bridge network. We have to disable live-restore as it - # prevents docker from recreating the default bridge network on restart - echo "$(jq '.bridge="docker0" | ."live-restore"=false' /etc/docker/daemon.json)" > /etc/docker/daemon.json - fi - systemctl daemon-reload - systemctl enable docker - systemctl restart docker + mkdir -p /etc/docker + bash -c "/sbin/iptables-save > /etc/sysconfig/iptables" + cp -v /etc/eks/iptables-restore.service /etc/systemd/system/iptables-restore.service + sudo chown root:root /etc/systemd/system/iptables-restore.service + systemctl daemon-reload + systemctl enable iptables-restore + + if [[ -n "$DOCKER_CONFIG_JSON" ]]; then + echo "$DOCKER_CONFIG_JSON" > /etc/docker/daemon.json + fi + if [[ "$ENABLE_DOCKER_BRIDGE" = "true" ]]; then + # Enabling the docker bridge network. We have to disable live-restore as it + # prevents docker from recreating the default bridge network on restart + echo "$(jq '.bridge="docker0" | ."live-restore"=false' /etc/docker/daemon.json)" > /etc/docker/daemon.json + fi + systemctl daemon-reload + systemctl enable docker + systemctl restart docker else - echo "Container runtime ${CONTAINER_RUNTIME} is not supported." - exit 1 + echo "Container runtime ${CONTAINER_RUNTIME} is not supported." + exit 1 fi - systemctl enable kubelet systemctl start kubelet # gpu boost clock -if command -v nvidia-smi &>/dev/null ; then - echo "nvidia-smi found" - - nvidia-smi -q > /tmp/nvidia-smi-check - if [[ "$?" == "0" ]]; then - sudo nvidia-smi -pm 1 # set persistence mode - sudo nvidia-smi --auto-boost-default=0 - - GPUNAME=$(nvidia-smi -L | head -n1) - echo $GPUNAME - - # set application clock to maximum - if [[ $GPUNAME == *"A100"* ]]; then - nvidia-smi -ac 1215,1410 - elif [[ $GPUNAME == *"V100"* ]]; then - nvidia-smi -ac 877,1530 - elif [[ $GPUNAME == *"K80"* ]]; then - nvidia-smi -ac 2505,875 - elif [[ $GPUNAME == *"T4"* ]]; then - nvidia-smi -ac 5001,1590 - elif [[ $GPUNAME == *"M60"* ]]; then - nvidia-smi -ac 2505,1177 - else - echo "unsupported gpu" - fi - else - cat /tmp/nvidia-smi-check - fi +if command -v nvidia-smi &> /dev/null; then + echo "nvidia-smi found" + + nvidia-smi -q > /tmp/nvidia-smi-check + if [[ "$?" == "0" ]]; then + sudo nvidia-smi -pm 1 # set persistence mode + sudo nvidia-smi --auto-boost-default=0 + + GPUNAME=$(nvidia-smi -L | head -n1) + echo $GPUNAME + + # set application clock to maximum + if [[ $GPUNAME == *"A100"* ]]; then + nvidia-smi -ac 1215,1410 + elif [[ $GPUNAME == *"V100"* ]]; then + nvidia-smi -ac 877,1530 + elif [[ $GPUNAME == *"K80"* ]]; then + nvidia-smi -ac 2505,875 + elif [[ $GPUNAME == *"T4"* ]]; then + nvidia-smi -ac 5001,1590 + elif [[ $GPUNAME == *"M60"* ]]; then + nvidia-smi -ac 2505,1177 + else + echo "unsupported gpu" + fi + else + cat /tmp/nvidia-smi-check + fi else - echo "nvidia-smi not found" + echo "nvidia-smi not found" fi diff --git a/files/max-pods-calculator.sh b/files/max-pods-calculator.sh index a9bb7122b..c52c5d8d5 100755 --- a/files/max-pods-calculator.sh +++ b/files/max-pods-calculator.sh @@ -5,68 +5,68 @@ set -o nounset set -o errexit err_report() { - echo "Exited with error on line $1" + echo "Exited with error on line $1" } trap 'err_report $LINENO' ERR function print_help { - echo "usage: $0 [options]" - echo "Calculates maxPods value to be used when starting up the kubelet." - echo "-h,--help print this help." - echo "--instance-type Specify the instance type to calculate max pods value." - echo "--instance-type-from-imds Use this flag if the instance type should be fetched from IMDS." - echo "--cni-version Specify the version of the CNI (example - 1.7.5)." - echo "--cni-custom-networking-enabled Use this flag to indicate if CNI custom networking mode has been enabled." - echo "--cni-prefix-delegation-enabled Use this flag to indicate if CNI prefix delegation has been enabled." - echo "--cni-max-eni specify how many ENIs should be used for prefix delegation. Defaults to using all ENIs per instance." - echo "--show-max-allowed Use this flag to show max number of Pods allowed to run in Worker Node. Otherwise the script will show the recommended value" + echo "usage: $0 [options]" + echo "Calculates maxPods value to be used when starting up the kubelet." + echo "-h,--help print this help." + echo "--instance-type Specify the instance type to calculate max pods value." + echo "--instance-type-from-imds Use this flag if the instance type should be fetched from IMDS." + echo "--cni-version Specify the version of the CNI (example - 1.7.5)." + echo "--cni-custom-networking-enabled Use this flag to indicate if CNI custom networking mode has been enabled." + echo "--cni-prefix-delegation-enabled Use this flag to indicate if CNI prefix delegation has been enabled." + echo "--cni-max-eni specify how many ENIs should be used for prefix delegation. Defaults to using all ENIs per instance." + echo "--show-max-allowed Use this flag to show max number of Pods allowed to run in Worker Node. Otherwise the script will show the recommended value" } POSITIONAL=() while [[ $# -gt 0 ]]; do - key="$1" - case $key in - -h|--help) - print_help - exit 1 - ;; - --instance-type) - INSTANCE_TYPE=$2 - shift - shift - ;; - --instance-type-from-imds) - INSTANCE_TYPE_FROM_IMDS=true - shift - ;; - --cni-version) - CNI_VERSION=$2 - shift - shift - ;; - --cni-custom-networking-enabled) - CNI_CUSTOM_NETWORKING_ENABLED=true - shift - ;; - --cni-prefix-delegation-enabled) - CNI_PREFIX_DELEGATION_ENABLED=true - shift - ;; - --cni-max-eni) - CNI_MAX_ENI=$2 - shift - shift - ;; - --show-max-allowed) - SHOW_MAX_ALLOWED=true - shift - ;; - *) # unknown option - POSITIONAL+=("$1") # save it in an array for later - shift # past argument - ;; - esac + key="$1" + case $key in + -h | --help) + print_help + exit 1 + ;; + --instance-type) + INSTANCE_TYPE=$2 + shift + shift + ;; + --instance-type-from-imds) + INSTANCE_TYPE_FROM_IMDS=true + shift + ;; + --cni-version) + CNI_VERSION=$2 + shift + shift + ;; + --cni-custom-networking-enabled) + CNI_CUSTOM_NETWORKING_ENABLED=true + shift + ;; + --cni-prefix-delegation-enabled) + CNI_PREFIX_DELEGATION_ENABLED=true + shift + ;; + --cni-max-eni) + CNI_MAX_ENI=$2 + shift + shift + ;; + --show-max-allowed) + SHOW_MAX_ALLOWED=true + shift + ;; + *) # unknown option + POSITIONAL+=("$1") # save it in an array for later + shift # past argument + ;; + esac done CNI_VERSION="${CNI_VERSION:-}" @@ -81,83 +81,79 @@ PREFIX_DELEGATION_SUPPORTED=false IPS_PER_PREFIX=16 if [ "$INSTANCE_TYPE_FROM_IMDS" = true ]; then - export AWS_DEFAULT_REGION=$(imds /latest/dynamic/instance-identity/document | jq .region -r) - INSTANCE_TYPE=$(imds /latest/meta-data/instance-type) -elif [ -z "$INSTANCE_TYPE" ]; - # There's no reasonable default for an instanceType so force one to be provided to the script. - then echo "You must specify an instance type to calculate max pods value." - exit 1 + export AWS_DEFAULT_REGION=$(imds /latest/dynamic/instance-identity/document | jq .region -r) + INSTANCE_TYPE=$(imds /latest/meta-data/instance-type) +elif [ -z "$INSTANCE_TYPE" ]; then # There's no reasonable default for an instanceType so force one to be provided to the script. + echo "You must specify an instance type to calculate max pods value." + exit 1 fi -if [ -z "$CNI_VERSION" ]; - then echo "You must specify a CNI Version to use. Example - 1.7.5" - exit 1 +if [ -z "$CNI_VERSION" ]; then + echo "You must specify a CNI Version to use. Example - 1.7.5" + exit 1 fi calculate_max_ip_addresses_prefix_delegation() { - enis=$1 - instance_max_eni_ips=$2 - echo $(($enis * (($instance_max_eni_ips - 1) * $IPS_PER_PREFIX ) + 2)) + enis=$1 + instance_max_eni_ips=$2 + echo $(($enis * (($instance_max_eni_ips - 1) * $IPS_PER_PREFIX) + 2)) } calculate_max_ip_addresses_secondary_ips() { - enis=$1 - instance_max_eni_ips=$2 - echo $(($enis * ($instance_max_eni_ips - 1) + 2)) + enis=$1 + instance_max_eni_ips=$2 + echo $(($enis * ($instance_max_eni_ips - 1) + 2)) } min_number() { - printf "%s\n" "$@" | sort -g | head -n1 + printf "%s\n" "$@" | sort -g | head -n1 } - VERSION_SPLIT=(${CNI_VERSION//./ }) CNI_MAJOR_VERSION="${VERSION_SPLIT[0]}" CNI_MINOR_VERSION="${VERSION_SPLIT[1]}" if [[ "$CNI_MAJOR_VERSION" -gt 1 ]] || ([[ "$CNI_MAJOR_VERSION" = 1 ]] && [[ "$CNI_MINOR_VERSION" -gt 8 ]]); then - PREFIX_DELEGATION_SUPPORTED=true + PREFIX_DELEGATION_SUPPORTED=true fi DESCRIBE_INSTANCES_RESULT=$(aws ec2 describe-instance-types --instance-type "${INSTANCE_TYPE}" --query 'InstanceTypes[0].{Hypervisor: Hypervisor, EniCount: NetworkInfo.MaximumNetworkInterfaces, PodsPerEniCount: NetworkInfo.Ipv4AddressesPerInterface, CpuCount: VCpuInfo.DefaultVCpus}' --output json) -HYPERVISOR_TYPE=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.Hypervisor' ) +HYPERVISOR_TYPE=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.Hypervisor') IS_NITRO=false if [[ "$HYPERVISOR_TYPE" == "nitro" ]]; then - IS_NITRO=true + IS_NITRO=true fi -INSTANCE_MAX_ENIS=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.EniCount' ) -INSTANCE_MAX_ENIS_IPS=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.PodsPerEniCount' ) +INSTANCE_MAX_ENIS=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.EniCount') +INSTANCE_MAX_ENIS_IPS=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.PodsPerEniCount') -if [ -z "$CNI_MAX_ENI" ] ; then - enis_for_pods=$INSTANCE_MAX_ENIS +if [ -z "$CNI_MAX_ENI" ]; then + enis_for_pods=$INSTANCE_MAX_ENIS else - enis_for_pods="$(min_number $CNI_MAX_ENI $INSTANCE_MAX_ENIS)" + enis_for_pods="$(min_number $CNI_MAX_ENI $INSTANCE_MAX_ENIS)" fi -if [ "$CNI_CUSTOM_NETWORKING_ENABLED" = true ] ; then - enis_for_pods=$((enis_for_pods-1)) +if [ "$CNI_CUSTOM_NETWORKING_ENABLED" = true ]; then + enis_for_pods=$((enis_for_pods - 1)) fi - if [ "$IS_NITRO" = true ] && [ "$CNI_PREFIX_DELEGATION_ENABLED" = true ] && [ "$PREFIX_DELEGATION_SUPPORTED" = true ]; then - max_pods=$(calculate_max_ip_addresses_prefix_delegation $enis_for_pods $INSTANCE_MAX_ENIS_IPS) + max_pods=$(calculate_max_ip_addresses_prefix_delegation $enis_for_pods $INSTANCE_MAX_ENIS_IPS) else - max_pods=$(calculate_max_ip_addresses_secondary_ips $enis_for_pods $INSTANCE_MAX_ENIS_IPS) + max_pods=$(calculate_max_ip_addresses_secondary_ips $enis_for_pods $INSTANCE_MAX_ENIS_IPS) fi # Limit the total number of pods that can be launched on any instance type based on the vCPUs on that instance type. MAX_POD_CEILING_FOR_LOW_CPU=110 MAX_POD_CEILING_FOR_HIGH_CPU=250 -CPU_COUNT=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.CpuCount' ) +CPU_COUNT=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.CpuCount') -if [ "$SHOW_MAX_ALLOWED" = true ] ; then +if [ "$SHOW_MAX_ALLOWED" = true ]; then echo $max_pods exit 0 fi -if [ "$CPU_COUNT" -gt 30 ] ; then - echo $(min_number $MAX_POD_CEILING_FOR_HIGH_CPU $max_pods) +if [ "$CPU_COUNT" -gt 30 ]; then + echo $(min_number $MAX_POD_CEILING_FOR_HIGH_CPU $max_pods) else - echo $(min_number $MAX_POD_CEILING_FOR_LOW_CPU $max_pods) + echo $(min_number $MAX_POD_CEILING_FOR_LOW_CPU $max_pods) fi - diff --git a/files/pull-sandbox-image.sh b/files/pull-sandbox-image.sh index 588f155f2..270be7d32 100644 --- a/files/pull-sandbox-image.sh +++ b/files/pull-sandbox-image.sh @@ -6,22 +6,22 @@ region=$(echo "$sandbox_image" | cut -f4 -d ".") ecr_password=$(aws ecr get-login-password --region $region) API_RETRY_ATTEMPTS=5 -for attempt in `seq 0 $API_RETRY_ATTEMPTS`; do - rc=0 - if [[ $attempt -gt 0 ]]; then - echo "Attempt $attempt of $API_RETRY_ATTEMPTS" - fi - ### pull sandbox image from ecr - ### username will always be constant i.e; AWS - sudo ctr --namespace k8s.io image pull $sandbox_image --user AWS:$ecr_password - rc=$?; - if [[ $rc -eq 0 ]]; then - break - fi - if [[ $attempt -eq $API_RETRY_ATTEMPTS ]]; then - exit $rc - fi - jitter=$((1 + RANDOM % 10)) - sleep_sec="$(( $(( 5 << $((1+$attempt)) )) + $jitter))" - sleep $sleep_sec +for attempt in $(seq 0 $API_RETRY_ATTEMPTS); do + rc=0 + if [[ $attempt -gt 0 ]]; then + echo "Attempt $attempt of $API_RETRY_ATTEMPTS" + fi + ### pull sandbox image from ecr + ### username will always be constant i.e; AWS + sudo ctr --namespace k8s.io image pull $sandbox_image --user AWS:$ecr_password + rc=$? + if [[ $rc -eq 0 ]]; then + break + fi + if [[ $attempt -eq $API_RETRY_ATTEMPTS ]]; then + exit $rc + fi + jitter=$((1 + RANDOM % 10)) + sleep_sec="$(($((5 << $((1 + $attempt)))) + $jitter))" + sleep $sleep_sec done diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index a0a3cafab..b538dd97a 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -57,18 +57,18 @@ COMMON_DIRECTORIES=( var_log networking sandbox-image # eks - ipamd # eks - sysctls # eks - kubelet # eks - cni # eks + ipamd # eks + sysctls # eks + kubelet # eks + cni # eks ) COMMON_LOGS=( syslog messages aws-routed-eni # eks - containers # eks - pods # eks + containers # eks + pods # eks cloud-init.log cloud-init-output.log kube-proxy.log @@ -151,7 +151,7 @@ is_root() { check_required_utils() { for utils in ${REQUIRED_UTILS[*]}; do # If exit code of "command -v" not equal to 0, fail - if ! command -v "${utils}" >/dev/null 2>&1; then + if ! command -v "${utils}" > /dev/null 2>&1; then echo -e "\nApplication \"${utils}\" is missing, please install \"${utils}\" as this script requires it." fi done @@ -167,13 +167,13 @@ log_parameters() { } systemd_check() { - if command -v systemctl >/dev/null 2>&1; then - INIT_TYPE="systemd" - if command -v snap >/dev/null 2>&1; then + if command -v systemctl > /dev/null 2>&1; then + INIT_TYPE="systemd" + if command -v snap > /dev/null 2>&1; then INIT_TYPE="snap" fi else - INIT_TYPE="other" + INIT_TYPE="other" fi } @@ -214,14 +214,14 @@ is_diskfull() { # If "result" is less than or equal to "threshold", fail. if [[ "${result}" -le "${threshold}" ]]; then - die "Free space on root volume is less than or equal to $((threshold>>10))MB, please ensure adequate disk space to collect and store the log files." + die "Free space on root volume is less than or equal to $((threshold >> 10))MB, please ensure adequate disk space to collect and store the log files." fi } cleanup() { #guard rails to avoid accidental deletion of unknown data if [[ "${COLLECT_DIR}" == "/tmp/eks-log-collector" ]]; then - rm --recursive --force "${COLLECT_DIR}" >/dev/null 2>&1 + rm --recursive --force "${COLLECT_DIR}" > /dev/null 2>&1 else echo "Unable to Cleanup as {COLLECT_DIR} variable is modified. Please cleanup manually!" fi @@ -287,25 +287,25 @@ get_mounts_info() { pvs > "${COLLECT_DIR}"/storage/pvs.txt vgs > "${COLLECT_DIR}"/storage/vgs.txt mount -t xfs | awk '{print $1}' | xargs -I{} -- sh -c "xfs_info {}; xfs_db -r -c 'freesp -s' {}" > "${COLLECT_DIR}"/storage/xfs.txt - mount | grep ^overlay | sed 's/.*upperdir=//' | sed 's/,.*//' | xargs -n 1 timeout 75 du -sh | grep -v ^0 > "${COLLECT_DIR}"/storage/pod_local_storage.txt + mount | grep ^overlay | sed 's/.*upperdir=//' | sed 's/,.*//' | xargs -n 1 timeout 75 du -sh | grep -v ^0 > "${COLLECT_DIR}"/storage/pod_local_storage.txt ok } get_selinux_info() { try "collect SELinux status" - if ! command -v getenforce >/dev/null 2>&1; then - echo -e "SELinux mode:\n\t Not installed" > "${COLLECT_DIR}"/system/selinux.txt - else - echo -e "SELinux mode:\n\t $(getenforce)" > "${COLLECT_DIR}"/system/selinux.txt + if ! command -v getenforce > /dev/null 2>&1; then + echo -e "SELinux mode:\n\t Not installed" > "${COLLECT_DIR}"/system/selinux.txt + else + echo -e "SELinux mode:\n\t $(getenforce)" > "${COLLECT_DIR}"/system/selinux.txt fi ok } get_iptables_info() { - if ! command -v iptables >/dev/null 2>&1; then - echo "IPtables not installed" |tee -a iptables.txt + if ! command -v iptables > /dev/null 2>&1; then + echo "IPtables not installed" | tee -a iptables.txt else try "collect iptables information" iptables --wait 1 --numeric --verbose --list --table mangle | tee "${COLLECT_DIR}"/networking/iptables-mangle.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables-mangle.txt @@ -323,29 +323,29 @@ get_common_logs() { for entry in ${COMMON_LOGS[*]}; do if [[ -e "/var/log/${entry}" ]]; then - if [[ "${entry}" == "messages" ]]; then - tail -c 100M /var/log/messages > "${COLLECT_DIR}"/var_log/messages - continue - fi - if [[ "${entry}" == "containers" ]]; then - cp --force --dereference --recursive /var/log/containers/aws-node* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --dereference --recursive /var/log/containers/kube-system_cni-metrics-helper* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --dereference --recursive /var/log/containers/coredns-* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --dereference --recursive /var/log/containers/kube-proxy* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --dereference --recursive /var/log/containers/ebs-csi* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --dereference --recursive /var/log/containers/efs-csi* "${COLLECT_DIR}"/var_log/ 2>/dev/null - continue - fi - if [[ "${entry}" == "pods" ]]; then - cp --force --dereference --recursive /var/log/pods/kube-system_aws-node* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --dereference --recursive /var/log/pods/kube-system_cni-metrics-helper* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --dereference --recursive /var/log/pods/kube-system_coredns* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --dereference --recursive /var/log/pods/kube-system_kube-proxy* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --dereference --recursive /var/log/pods/kube-system_ebs-csi-* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --dereference --recursive /var/log/pods/kube-system_efs-csi-* "${COLLECT_DIR}"/var_log/ 2>/dev/null - continue - fi - cp --force --recursive --dereference /var/log/"${entry}" "${COLLECT_DIR}"/var_log/ 2>/dev/null + if [[ "${entry}" == "messages" ]]; then + tail -c 100M /var/log/messages > "${COLLECT_DIR}"/var_log/messages + continue + fi + if [[ "${entry}" == "containers" ]]; then + cp --force --dereference --recursive /var/log/containers/aws-node* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/kube-system_cni-metrics-helper* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/coredns-* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/kube-proxy* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/ebs-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/efs-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null + continue + fi + if [[ "${entry}" == "pods" ]]; then + cp --force --dereference --recursive /var/log/pods/kube-system_aws-node* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_cni-metrics-helper* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_coredns* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_kube-proxy* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_ebs-csi-* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_efs-csi-* "${COLLECT_DIR}"/var_log/ 2> /dev/null + continue + fi + cp --force --recursive --dereference /var/log/"${entry}" "${COLLECT_DIR}"/var_log/ 2> /dev/null fi done @@ -356,7 +356,7 @@ get_kernel_info() { try "collect kernel logs" if [[ -e "/var/log/dmesg" ]]; then - cp --force /var/log/dmesg "${COLLECT_DIR}/kernel/dmesg.boot" + cp --force /var/log/dmesg "${COLLECT_DIR}/kernel/dmesg.boot" fi dmesg > "${COLLECT_DIR}/kernel/dmesg.current" dmesg --ctime > "${COLLECT_DIR}/kernel/dmesg.human.current" @@ -369,7 +369,7 @@ get_docker_logs() { try "collect Docker daemon logs" case "${INIT_TYPE}" in - systemd|snap) + systemd | snap) journalctl --unit=docker --since "${DAYS_10}" > "${COLLECT_DIR}"/docker/docker.log ;; other) @@ -392,29 +392,29 @@ get_k8s_info() { if [[ -n "${KUBECONFIG:-}" ]]; then command -v kubectl > /dev/null && kubectl get --kubeconfig="${KUBECONFIG}" svc > "${COLLECT_DIR}"/kubelet/svc.log - command -v kubectl > /dev/null && kubectl --kubeconfig="${KUBECONFIG}" config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + command -v kubectl > /dev/null && kubectl --kubeconfig="${KUBECONFIG}" config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml elif [[ -f /etc/eksctl/kubeconfig.yaml ]]; then KUBECONFIG="/etc/eksctl/kubeconfig.yaml" command -v kubectl > /dev/null && kubectl get --kubeconfig="${KUBECONFIG}" svc > "${COLLECT_DIR}"/kubelet/svc.log - command -v kubectl > /dev/null && kubectl --kubeconfig="${KUBECONFIG}" config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + command -v kubectl > /dev/null && kubectl --kubeconfig="${KUBECONFIG}" config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml elif [[ -f /etc/systemd/system/kubelet.service ]]; then KUBECONFIG=$(grep kubeconfig /etc/systemd/system/kubelet.service | awk '{print $2}') command -v kubectl > /dev/null && kubectl get --kubeconfig="${KUBECONFIG}" svc > "${COLLECT_DIR}"/kubelet/svc.log - command -v kubectl > /dev/null && kubectl --kubeconfig="${KUBECONFIG}" config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + command -v kubectl > /dev/null && kubectl --kubeconfig="${KUBECONFIG}" config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml elif [[ -f /var/lib/kubelet/kubeconfig ]]; then KUBECONFIG="/var/lib/kubelet/kubeconfig" command -v kubectl > /dev/null && kubectl get --kubeconfig=${KUBECONFIG} svc > "${COLLECT_DIR}"/kubelet/svc.log - command -v kubectl > /dev/null && kubectl --kubeconfig=${KUBECONFIG} config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + command -v kubectl > /dev/null && kubectl --kubeconfig=${KUBECONFIG} config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml else echo "======== Unable to find KUBECONFIG, IGNORING POD DATA =========" >> "${COLLECT_DIR}"/kubelet/svc.log fi # Try to copy the kubeconfig file if kubectl command doesn't exist - [[ (! -f "${COLLECT_DIR}/kubelet/kubeconfig.yaml") && ( -n ${KUBECONFIG}) ]] && cp ${KUBECONFIG} "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + [[ (! -f "${COLLECT_DIR}/kubelet/kubeconfig.yaml") && (-n ${KUBECONFIG}) ]] && cp ${KUBECONFIG} "${COLLECT_DIR}"/kubelet/kubeconfig.yaml case "${INIT_TYPE}" in systemd) @@ -442,14 +442,14 @@ get_ipamd_info() { curl --max-time 3 --silent http://localhost:61679/v1/"${entry}" >> "${COLLECT_DIR}"/ipamd/"${entry}".json done else - echo "Ignoring IPAM introspection stats as mentioned"| tee -a "${COLLECT_DIR}"/ipamd/ipam_introspection_ignore.txt + echo "Ignoring IPAM introspection stats as mentioned" | tee -a "${COLLECT_DIR}"/ipamd/ipam_introspection_ignore.txt fi if [[ "${ignore_metrics}" == "false" ]]; then try "collect L-IPAMD prometheus metrics" curl --max-time 3 --silent http://localhost:61678/metrics > "${COLLECT_DIR}"/ipamd/metrics.json 2>&1 else - echo "Ignoring Prometheus Metrics collection as mentioned"| tee -a "${COLLECT_DIR}"/ipamd/ipam_metrics_ignore.txt + echo "Ignoring Prometheus Metrics collection as mentioned" | tee -a "${COLLECT_DIR}"/ipamd/ipam_metrics_ignore.txt fi try "collect L-IPAMD checkpoint" @@ -460,7 +460,7 @@ get_ipamd_info() { get_multus_info() { try "collect Multus logs if they exist" - cp --force --dereference --recursive /var/log/pods/kube-system_kube-multus* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_kube-multus* "${COLLECT_DIR}"/var_log/ 2> /dev/null ok } @@ -468,7 +468,7 @@ get_multus_info() { get_sysctls_info() { try "collect sysctls information" # dump all sysctls - sysctl --all >> "${COLLECT_DIR}"/sysctls/sysctl_all.txt 2>/dev/null + sysctl --all >> "${COLLECT_DIR}"/sysctls/sysctl_all.txt 2> /dev/null ok } @@ -500,7 +500,7 @@ get_networking_info() { API_SERVER=$(grep server: "${COLLECT_DIR}"/kubelet/kubeconfig.yaml | sed 's/.*server: //') CA_CRT=$(grep certificate-authority: "${COLLECT_DIR}"/kubelet/kubeconfig.yaml | sed 's/.*certificate-authority: //') for i in $(seq 5); do - echo -e "curling ${API_SERVER} ($i of 5) $(date --utc +%FT%T.%3N%Z)\n\n" >> ${COLLECT_DIR}"/networking/curl_api_server.txt" + echo -e "curling ${API_SERVER} ($i of 5) $(date --utc +%FT%T.%3N%Z)\n\n" >> ${COLLECT_DIR}"/networking/curl_api_server.txt" timeout 75 curl -v --cacert "${CA_CRT}" "${API_SERVER}"/livez?verbose >> ${COLLECT_DIR}"/networking/curl_api_server.txt" 2>&1 done fi @@ -512,17 +512,17 @@ get_networking_info() { get_cni_config() { try "collect CNI configuration information" - if [[ -e "/etc/cni/net.d/" ]]; then - cp --force --recursive --dereference /etc/cni/net.d/* "${COLLECT_DIR}"/cni/ - fi + if [[ -e "/etc/cni/net.d/" ]]; then + cp --force --recursive --dereference /etc/cni/net.d/* "${COLLECT_DIR}"/cni/ + fi ok } get_pkgtype() { - if [[ "$(command -v rpm )" ]]; then + if [[ "$(command -v rpm)" ]]; then PACKAGE_TYPE=rpm - elif [[ "$(command -v dpkg )" ]]; then + elif [[ "$(command -v dpkg)" ]]; then PACKAGE_TYPE=deb else PACKAGE_TYPE='unknown' @@ -551,7 +551,7 @@ get_system_services() { try "collect active system services" case "${INIT_TYPE}" in - systemd|snap) + systemd | snap) systemctl list-units > "${COLLECT_DIR}"/system/services.txt 2>&1 ;; other) @@ -575,36 +575,36 @@ get_system_services() { } get_containerd_info() { - try "Collect Containerd daemon information" + try "Collect Containerd daemon information" - if [[ "$(pgrep -o containerd)" -ne 0 ]]; then - timeout 75 containerd config dump > "${COLLECT_DIR}"/containerd/containerd-config.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " - timeout 75 journalctl -u containerd > "${COLLECT_DIR}"/containerd/containerd-log.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " - else - warning "The Containerd daemon is not running." - fi + if [[ "$(pgrep -o containerd)" -ne 0 ]]; then + timeout 75 containerd config dump > "${COLLECT_DIR}"/containerd/containerd-config.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 journalctl -u containerd > "${COLLECT_DIR}"/containerd/containerd-log.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + else + warning "The Containerd daemon is not running." + fi - ok + ok - try "Collect Containerd running information" - if ! command -v ctr >/dev/null 2>&1; then - warning "ctr not installed" - else - timeout 75 ctr version > "${COLLECT_DIR}"/containerd/containerd-version.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " - timeout 75 ctr namespaces list > "${COLLECT_DIR}"/containerd/containerd-namespaces.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " - timeout 75 ctr --namespace k8s.io images list > "${COLLECT_DIR}"/containerd/containerd-images.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " - timeout 75 ctr --namespace k8s.io containers list > "${COLLECT_DIR}"/containerd/containerd-containers.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " - timeout 75 ctr --namespace k8s.io tasks list > "${COLLECT_DIR}"/containerd/containerd-tasks.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " - timeout 75 ctr --namespace k8s.io plugins list > "${COLLECT_DIR}"/containerd/containerd-plugins.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " - fi + try "Collect Containerd running information" + if ! command -v ctr > /dev/null 2>&1; then + warning "ctr not installed" + else + timeout 75 ctr version > "${COLLECT_DIR}"/containerd/containerd-version.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr namespaces list > "${COLLECT_DIR}"/containerd/containerd-namespaces.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr --namespace k8s.io images list > "${COLLECT_DIR}"/containerd/containerd-images.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr --namespace k8s.io containers list > "${COLLECT_DIR}"/containerd/containerd-containers.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr --namespace k8s.io tasks list > "${COLLECT_DIR}"/containerd/containerd-tasks.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr --namespace k8s.io plugins list > "${COLLECT_DIR}"/containerd/containerd-plugins.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + fi - ok + ok } get_sandboxImage_info() { - try "Collect sandbox-image daemon information" - timeout 75 journalctl -u sandbox-image > "${COLLECT_DIR}"/sandbox-image/sandbox-image-log.txt 2>&1 || echo -e "\tTimed out, ignoring \"sandbox-image info output \" " - ok + try "Collect sandbox-image daemon information" + timeout 75 journalctl -u sandbox-image > "${COLLECT_DIR}"/sandbox-image/sandbox-image-log.txt 2>&1 || echo -e "\tTimed out, ignoring \"sandbox-image info output \" " + ok } get_docker_info() { @@ -626,28 +626,26 @@ get_docker_info() { get_cpu_throttled_processes() { try "Collect CPU Throttled Process Information" readonly THROTTLE_LOG="${COLLECT_DIR}"/system/cpu_throttling.txt - command find /sys/fs/cgroup -iname "cpu.stat" -print0 | while IFS= read -r -d '' cs - do + command find /sys/fs/cgroup -iname "cpu.stat" -print0 | while IFS= read -r -d '' cs; do # look for a non-zero nr_throttled value if grep -q "nr_throttled [1-9]" "${cs}"; then pids=${cs/cpu.stat/cgroup.procs} lines=$(wc -l < "${pids}") # ignore if no PIDs are listed - if [ "${lines}" -eq "0" ] ; then + if [ "${lines}" -eq "0" ]; then continue fi echo "$cs" >> "${THROTTLE_LOG}" cat "${cs}" >> "${THROTTLE_LOG}" - while IFS= read -r pid - do + while IFS= read -r pid; do command ps ax | grep "^${pid}" >> "${THROTTLE_LOG}" - done < "${pids}" - echo "" >> "${THROTTLE_LOG}" - fi + done < "${pids}" + echo "" >> "${THROTTLE_LOG}" + fi done if [ ! -e "${THROTTLE_LOG}" ]; then - echo "No CPU Throttling Found" >> "${THROTTLE_LOG}" + echo "No CPU Throttling Found" >> "${THROTTLE_LOG}" fi ok } @@ -658,7 +656,7 @@ get_io_throttled_processes() { command echo -e "PID Name Block IO Delay (centisconds)" > ${IO_THROTTLE_LOG} # column 42 is Aggregated block I/O delays, measured in centiseconds so we capture the non-zero block # I/O delays. - command cut -d" " -f 1,2,42 /proc/[0-9]*/stat | sort -n -k+3 -r | grep -v 0$ >> ${IO_THROTTLE_LOG} + command cut -d" " -f 1,2,42 /proc/[0-9]*/stat | sort -n -k+3 -r | grep -v 0$ >> ${IO_THROTTLE_LOG} ok } diff --git a/scripts/cleanup.sh b/scripts/cleanup.sh index b9fff7987..24861c3e9 100644 --- a/scripts/cleanup.sh +++ b/scripts/cleanup.sh @@ -9,24 +9,24 @@ sudo rm -rf /tmp/worker # Clean up files to reduce confusion during debug sudo rm -rf \ - /etc/hostname \ - /etc/machine-id \ - /etc/resolv.conf \ - /etc/ssh/ssh_host* \ - /home/ec2-user/.ssh/authorized_keys \ - /root/.ssh/authorized_keys \ - /var/lib/cloud/data \ - /var/lib/cloud/instance \ - /var/lib/cloud/instances \ - /var/lib/cloud/sem \ - /var/lib/dhclient/* \ - /var/lib/dhcp/dhclient.* \ - /var/lib/yum/history \ - /var/log/cloud-init-output.log \ - /var/log/cloud-init.log \ - /var/log/secure \ - /var/log/wtmp \ - /var/log/messages \ - /tmp/imds-tokens + /etc/hostname \ + /etc/machine-id \ + /etc/resolv.conf \ + /etc/ssh/ssh_host* \ + /home/ec2-user/.ssh/authorized_keys \ + /root/.ssh/authorized_keys \ + /var/lib/cloud/data \ + /var/lib/cloud/instance \ + /var/lib/cloud/instances \ + /var/lib/cloud/sem \ + /var/lib/dhclient/* \ + /var/lib/dhcp/dhclient.* \ + /var/lib/yum/history \ + /var/log/cloud-init-output.log \ + /var/log/cloud-init.log \ + /var/log/secure \ + /var/log/wtmp \ + /var/log/messages \ + /tmp/imds-tokens sudo touch /etc/machine-id diff --git a/scripts/cleanup_additional_repos.sh b/scripts/cleanup_additional_repos.sh index e2665b484..79179d674 100644 --- a/scripts/cleanup_additional_repos.sh +++ b/scripts/cleanup_additional_repos.sh @@ -9,7 +9,6 @@ if [ -z "${ADDITIONAL_YUM_REPOS}" ]; then exit 0 fi - AWK_CMD=' BEGIN {RS=";";FS=","} { @@ -24,4 +23,4 @@ BEGIN {RS=";";FS=","} } {cmd="rm -f " Repo; system(cmd)} ' -sudo awk "$AWK_CMD" <<< "${ADDITIONAL_YUM_REPOS}" \ No newline at end of file +sudo awk "$AWK_CMD" <<< "${ADDITIONAL_YUM_REPOS}" diff --git a/scripts/generate-version-info.sh b/scripts/generate-version-info.sh index 889a758ef..22ef83a45 100644 --- a/scripts/generate-version-info.sh +++ b/scripts/generate-version-info.sh @@ -5,8 +5,7 @@ set -o errexit set -o pipefail -if [ "$#" -ne 1 ] -then +if [ "$#" -ne 1 ]; then echo "usage: $0 OUTPUT_FILE" exit 1 fi diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index da0de5789..f329cd996 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -11,14 +11,14 @@ TEMPLATE_DIR=${TEMPLATE_DIR:-/tmp/worker} ### Validate Required Arguments ################################################ ################################################################################ validate_env_set() { - ( - set +o nounset - - if [ -z "${!1}" ]; then - echo "Packer variable '$1' was not set. Aborting" - exit 1 - fi - ) + ( + set +o nounset + + if [ -z "${!1}" ]; then + echo "Packer variable '$1' was not set. Aborting" + exit 1 + fi + ) } validate_env_set BINARY_BUCKET_NAME @@ -37,12 +37,12 @@ validate_env_set PULL_CNI_FROM_GITHUB MACHINE=$(uname -m) if [ "$MACHINE" == "x86_64" ]; then - ARCH="amd64" + ARCH="amd64" elif [ "$MACHINE" == "aarch64" ]; then - ARCH="arm64" + ARCH="arm64" else - echo "Unknown machine architecture '$MACHINE'" >&2 - exit 1 + echo "Unknown machine architecture '$MACHINE'" >&2 + exit 1 fi ################################################################################ @@ -61,20 +61,20 @@ sudo yum update -y # Install necessary packages sudo yum install -y \ - aws-cfn-bootstrap \ - awscli \ - chrony \ - conntrack \ - curl \ - ec2-instance-connect \ - ipvsadm \ - jq \ - nfs-utils \ - socat \ - unzip \ - wget \ - yum-plugin-versionlock \ - yum-utils + aws-cfn-bootstrap \ + awscli \ + chrony \ + conntrack \ + curl \ + ec2-instance-connect \ + ipvsadm \ + jq \ + nfs-utils \ + socat \ + unzip \ + wget \ + yum-plugin-versionlock \ + yum-utils # Remove any old kernel versions. `--count=1` here means "only leave 1 kernel version installed" sudo package-cleanup --oldkernels --count=1 -y @@ -90,18 +90,18 @@ if yum list installed | grep ec2-net-utils; then sudo yum remove ec2-net-utils - sudo chkconfig chronyd on # Make sure that chronyd syncs RTC clock to the kernel. -cat < $TEMPLATE_DIR/kubelet-config.json + KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED=$(cat $TEMPLATE_DIR/kubelet-config.json | jq '.featureGates += {CSIServiceAccountToken: true}') + echo $KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED > $TEMPLATE_DIR/kubelet-config.json fi if [[ ! $KUBERNETES_VERSION =~ "1.19"* && ! $KUBERNETES_VERSION =~ "1.20"* && ! $KUBERNETES_VERSION =~ "1.21"* ]]; then - # enable CredentialProviders feature flags in kubelet service file - IMAGE_CREDENTIAL_PROVIDER_FLAGS='\\\n --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \\\n --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider' - sudo sed -i s,"aws","aws $IMAGE_CREDENTIAL_PROVIDER_FLAGS", $TEMPLATE_DIR/kubelet.service - # enable KubeletCredentialProviders features in kubelet configuration - KUBELET_CREDENTIAL_PROVIDERS_FEATURES=$(cat $TEMPLATE_DIR/kubelet-config.json | jq '.featureGates += {KubeletCredentialProviders: true}') - printf "%s" "$KUBELET_CREDENTIAL_PROVIDERS_FEATURES" > "$TEMPLATE_DIR/kubelet-config.json" + # enable CredentialProviders feature flags in kubelet service file + IMAGE_CREDENTIAL_PROVIDER_FLAGS='\\\n --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \\\n --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider' + sudo sed -i s,"aws","aws $IMAGE_CREDENTIAL_PROVIDER_FLAGS", $TEMPLATE_DIR/kubelet.service + # enable KubeletCredentialProviders features in kubelet configuration + KUBELET_CREDENTIAL_PROVIDERS_FEATURES=$(cat $TEMPLATE_DIR/kubelet-config.json | jq '.featureGates += {KubeletCredentialProviders: true}') + printf "%s" "$KUBELET_CREDENTIAL_PROVIDERS_FEATURES" > "$TEMPLATE_DIR/kubelet-config.json" fi sudo mv $TEMPLATE_DIR/kubelet.service /etc/systemd/system/kubelet.service @@ -293,7 +292,6 @@ sudo chown root:root /etc/systemd/system/kubelet.service sudo mv $TEMPLATE_DIR/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json sudo chown root:root /etc/kubernetes/kubelet/kubelet-config.json - sudo systemctl daemon-reload # Disable the kubelet until the proper dropins have been configured sudo systemctl disable kubelet @@ -311,28 +309,28 @@ sudo chmod +x /etc/eks/max-pods-calculator.sh SONOBUOY_E2E_REGISTRY="${SONOBUOY_E2E_REGISTRY:-}" if [[ -n "$SONOBUOY_E2E_REGISTRY" ]]; then - sudo mv $TEMPLATE_DIR/sonobuoy-e2e-registry-config /etc/eks/sonobuoy-e2e-registry-config - sudo sed -i s,SONOBUOY_E2E_REGISTRY,$SONOBUOY_E2E_REGISTRY,g /etc/eks/sonobuoy-e2e-registry-config + sudo mv $TEMPLATE_DIR/sonobuoy-e2e-registry-config /etc/eks/sonobuoy-e2e-registry-config + sudo sed -i s,SONOBUOY_E2E_REGISTRY,$SONOBUOY_E2E_REGISTRY,g /etc/eks/sonobuoy-e2e-registry-config fi ################################################################################ ### ECR CREDENTIAL PROVIDER #################################################### ################################################################################ if [[ ! $KUBERNETES_VERSION =~ "1.19"* && ! $KUBERNETES_VERSION =~ "1.20"* && ! $KUBERNETES_VERSION =~ "1.21"* ]]; then - ECR_BINARY="ecr-credential-provider" - if [[ -n "$AWS_ACCESS_KEY_ID" ]]; then - echo "AWS cli present - using it to copy ecr-credential-provider binaries from s3." - aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/$ECR_BINARY . - else - echo "AWS cli missing - using wget to fetch ecr-credential-provider binaries from s3. Note: This won't work for private bucket." - sudo wget "$S3_URL_BASE/$ECR_BINARY" - fi - sudo chmod +x $ECR_BINARY - sudo mkdir -p /etc/eks/ecr-credential-provider - sudo mv $ECR_BINARY /etc/eks/ecr-credential-provider - - # copying credential provider config file to eks folder - sudo mv $TEMPLATE_DIR/ecr-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config + ECR_BINARY="ecr-credential-provider" + if [[ -n "$AWS_ACCESS_KEY_ID" ]]; then + echo "AWS cli present - using it to copy ecr-credential-provider binaries from s3." + aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/$ECR_BINARY . + else + echo "AWS cli missing - using wget to fetch ecr-credential-provider binaries from s3. Note: This won't work for private bucket." + sudo wget "$S3_URL_BASE/$ECR_BINARY" + fi + sudo chmod +x $ECR_BINARY + sudo mkdir -p /etc/eks/ecr-credential-provider + sudo mv $ECR_BINARY /etc/eks/ecr-credential-provider + + # copying credential provider config file to eks folder + sudo mv $TEMPLATE_DIR/ecr-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config fi ################################################################################ @@ -346,7 +344,7 @@ sudo yum install -y amazon-ssm-agent ################################################################################ BASE_AMI_ID=$(imds /latest/meta-data/ami-id) -cat < /tmp/release +cat << EOF > /tmp/release BASE_AMI_ID="$BASE_AMI_ID" BUILD_TIME="$(date)" BUILD_KERNEL="$(uname -r)" @@ -359,7 +357,7 @@ sudo chown -R root:root /etc/eks ### Stuff required by "protectKernelDefaults=true" ############################# ################################################################################ -cat < Should use default API server QPS for K8s 1.21-" exit_code=0 export KUBELET_VERSION=v1.21.0-eks-ba74326 /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 fi # values should not be set @@ -21,11 +21,11 @@ expected_api_burst="null" actual_api_qps=$(jq -r '.kubeAPIQPS' < /etc/kubernetes/kubelet/kubelet-config.json) actual_api_burst=$(jq -r '.kubeAPIBurst' < /etc/kubernetes/kubelet/kubelet-config.json) if [[ ${actual_api_qps} != ${expected_api_qps} ]]; then - echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" - exit 1 + echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" + exit 1 fi if [[ ${actual_api_burst} != ${expected_api_burst} ]]; then - echo "❌ Test Failed: expected kubeAPIBurst = '${expected_api_burst}' but got '${actual_api_burst}'" - exit 1 + echo "❌ Test Failed: expected kubeAPIBurst = '${expected_api_burst}' but got '${actual_api_burst}'" + exit 1 fi diff --git a/test/cases/api-qps-k8s-1.22-above.sh b/test/cases/api-qps-k8s-1.22-above.sh index 103868c93..cbc242ac2 100755 --- a/test/cases/api-qps-k8s-1.22-above.sh +++ b/test/cases/api-qps-k8s-1.22-above.sh @@ -5,13 +5,13 @@ echo "--> Should increase API server QPS for K8s 1.22+" exit_code=0 export KUBELET_VERSION=v1.22.0-eks-ba74326 /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 fi expected_api_qps="10" @@ -20,11 +20,11 @@ expected_api_burst="20" actual_api_qps=$(jq -r '.kubeAPIQPS' < /etc/kubernetes/kubelet/kubelet-config.json) actual_api_burst=$(jq -r '.kubeAPIBurst' < /etc/kubernetes/kubelet/kubelet-config.json) if [[ ${actual_api_qps} != ${expected_api_qps} ]]; then - echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" - exit 1 + echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" + exit 1 fi if [[ ${actual_api_burst} != ${expected_api_burst} ]]; then - echo "❌ Test Failed: expected kubeAPIBurst = '${expected_api_burst}' but got '${actual_api_burst}'" - exit 1 + echo "❌ Test Failed: expected kubeAPIBurst = '${expected_api_burst}' but got '${actual_api_burst}'" + exit 1 fi diff --git a/test/cases/container-runtime-defaults.sh b/test/cases/container-runtime-defaults.sh index 03333a190..d4aaf407e 100755 --- a/test/cases/container-runtime-defaults.sh +++ b/test/cases/container-runtime-defaults.sh @@ -8,91 +8,91 @@ echo "--> Should allow dockerd as container runtime when below k8s version 1.24" # This variable is used to override the default value in the kubelet mock export KUBELET_VERSION=v1.20.15-eks-ba74326 /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - --container-runtime dockerd \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --container-runtime dockerd \ + test || exit_code=$? if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 fi echo "--> Should allow containerd as container runtime when below k8s version 1.24" export KUBELET_VERSION=v1.20.15-eks-ba74326 /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - --container-runtime containerd \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --container-runtime containerd \ + test || exit_code=$? if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 fi echo "--> Should have default container runtime when below k8s version 1.24" export KUBELET_VERSION=v1.20.15-eks-ba74326 /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 fi echo "--> Should not allow dockerd as container runtime when at or above k8s version 1.24" export KUBELET_VERSION=v1.24.15-eks-ba74326 /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - --container-runtime dockerd \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --container-runtime dockerd \ + test || exit_code=$? echo "EXIT CODE $exit_code" if [[ ${exit_code} -eq 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 fi exit_code=0 echo "--> Should allow containerd as container runtime when at or above k8s version 1.24" export KUBELET_VERSION=v1.24.15-eks-ba74326 /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - --container-runtime containerd \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --container-runtime containerd \ + test || exit_code=$? if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 fi echo "--> Should have default container runtime when at or above k8s version 1.24" export KUBELET_VERSION=v1.24.15-eks-ba74326 /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 fi echo "--> Should ignore docker-specific flags when at or above k8s version 1.24" export KUBELET_VERSION=v1.24.15-eks-ba74326 /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - --enable-docker-bridge true \ - --docker-config-json "{\"some\":\"json\"}" \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --enable-docker-bridge true \ + --docker-config-json "{\"some\":\"json\"}" \ + test || exit_code=$? if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 fi diff --git a/test/cases/imds-token-refresh.sh b/test/cases/imds-token-refresh.sh index 0947ec61c..1f4ca7039 100755 --- a/test/cases/imds-token-refresh.sh +++ b/test/cases/imds-token-refresh.sh @@ -12,40 +12,34 @@ export IMDS_TOKEN_TTL_SECONDS=$TTL export IMDS_DEBUG=true imds /latest/meta-data/instance-id || exit_code=$? -if [[ ${exit_code} -ne 0 ]] -then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 -elif [[ $(ls $TOKEN_DIR | wc -l) -ne 1 ]] -then - echo "❌ Test Failed: expected one token to be present after first IMDS call but got '$(ls $TOKEN_DIR)'" - exit 1 +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +elif [[ $(ls $TOKEN_DIR | wc -l) -ne 1 ]]; then + echo "❌ Test Failed: expected one token to be present after first IMDS call but got '$(ls $TOKEN_DIR)'" + exit 1 fi imds /latest/meta-data/instance-id || exit_code=$? -if [[ ${exit_code} -ne 0 ]] -then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 -elif [[ $(ls $TOKEN_DIR | wc -l) -ne 1 ]] -then - echo "❌ Test Failed: expected one token to be present after second IMDS call but got '$(ls $TOKEN_DIR)'" - exit 1 +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +elif [[ $(ls $TOKEN_DIR | wc -l) -ne 1 ]]; then + echo "❌ Test Failed: expected one token to be present after second IMDS call but got '$(ls $TOKEN_DIR)'" + exit 1 fi sleep $(($TTL + 1)) imds /latest/meta-data/instance-id || exit_code=$? -if [[ ${exit_code} -ne 0 ]] -then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 -elif [[ $(ls $TOKEN_DIR | wc -l) -ne 2 ]] -then - echo "❌ Test Failed: expected two tokens to be present after third IMDS call but got '$(ls $TOKEN_DIR)'" - exit 1 +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +elif [[ $(ls $TOKEN_DIR | wc -l) -ne 2 ]]; then + echo "❌ Test Failed: expected two tokens to be present after third IMDS call but got '$(ls $TOKEN_DIR)'" + exit 1 fi sleep $(($TTL + 1)) @@ -54,26 +48,22 @@ sleep $(($TTL + 1)) IMDS_MAX_TOKEN_TTL_SECONDS=$TTL imds /latest/meta-data/instance-id || exit_code=$? -if [[ ${exit_code} -ne 0 ]] -then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 -elif [[ $(ls $TOKEN_DIR | wc -l) -ne 2 ]] -then - echo "❌ Test Failed: expected two tokens to be present after first garbage-collection but got '$(ls $TOKEN_DIR)'" - exit 1 +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +elif [[ $(ls $TOKEN_DIR | wc -l) -ne 2 ]]; then + echo "❌ Test Failed: expected two tokens to be present after first garbage-collection but got '$(ls $TOKEN_DIR)'" + exit 1 fi # the other expired token should be removed with a window of 0 IMDS_MAX_TOKEN_TTL_SECONDS=0 imds /latest/meta-data/instance-id || exit_code=$? -if [[ ${exit_code} -ne 0 ]] -then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 -elif [[ $(ls $TOKEN_DIR | wc -l) -ne 1 ]] -then - echo "❌ Test Failed: expected one token to be present after second garbage-collection but got '$(ls $TOKEN_DIR)'" - exit 1 -fi \ No newline at end of file +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +elif [[ $(ls $TOKEN_DIR | wc -l) -ne 1 ]]; then + echo "❌ Test Failed: expected one token to be present after second garbage-collection but got '$(ls $TOKEN_DIR)'" + exit 1 +fi diff --git a/test/cases/ip-family-service-ipv6-cidr-mismatch.sh b/test/cases/ip-family-service-ipv6-cidr-mismatch.sh index f39ed8807..fe835ee03 100755 --- a/test/cases/ip-family-service-ipv6-cidr-mismatch.sh +++ b/test/cases/ip-family-service-ipv6-cidr-mismatch.sh @@ -4,13 +4,13 @@ set -euo pipefail echo "-> Should fail validation - ip-family mismatch" exit_code=0 /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - --ip-family ipv4 \ - --service-ipv6-cidr 192.168.0.1/24 \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv4 \ + --service-ipv6-cidr 192.168.0.1/24 \ + test || exit_code=$? if [[ ${exit_code} -eq 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 fi diff --git a/test/cases/ipv4-cluster-dns-ip.sh b/test/cases/ipv4-cluster-dns-ip.sh index 03074fc07..b0e05355b 100755 --- a/test/cases/ipv4-cluster-dns-ip.sh +++ b/test/cases/ipv4-cluster-dns-ip.sh @@ -5,19 +5,19 @@ echo "--> Should return IPv4 DNS Cluster IP when given dns-cluster-ip" exit_code=0 expected_cluster_dns="192.168.0.1" /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - --ip-family ipv4 \ - --dns-cluster-ip "${expected_cluster_dns}" \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv4 \ + --dns-cluster-ip "${expected_cluster_dns}" \ + test || exit_code=$? if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 fi actual_cluster_dns=$(jq -r '.clusterDNS[0]' < /etc/kubernetes/kubelet/kubelet-config.json) if [[ ${actual_cluster_dns} != "${expected_cluster_dns}" ]]; then - echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" - exit 1 -fi \ No newline at end of file + echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" + exit 1 +fi diff --git a/test/cases/ipv6-cluster-dns-ip.sh b/test/cases/ipv6-cluster-dns-ip.sh index 0f7d7451d..bfb5553c9 100755 --- a/test/cases/ipv6-cluster-dns-ip.sh +++ b/test/cases/ipv6-cluster-dns-ip.sh @@ -5,19 +5,19 @@ echo "-> Should return ipv6 DNS Cluster IP when given dns-cluster-ip" exit_code=0 expected_cluster_dns="fe80::2a" /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - --ip-family ipv6 \ - --dns-cluster-ip "${expected_cluster_dns}" \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv6 \ + --dns-cluster-ip "${expected_cluster_dns}" \ + test || exit_code=$? if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 fi actual_cluster_dns=$(jq -r '.clusterDNS[0]' < /etc/kubernetes/kubelet/kubelet-config.json) if [[ ${actual_cluster_dns} != "${expected_cluster_dns}" ]]; then - echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" - exit 1 -fi \ No newline at end of file + echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" + exit 1 +fi diff --git a/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh b/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh index f503f01ad..ec84ee391 100755 --- a/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh +++ b/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh @@ -5,20 +5,20 @@ echo "-> Should return IPv6 DNS cluster IP when given service-ipv6-cidr" exit_code=0 TEMP_DIR=$(mktemp -d) /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - --ip-family ipv6 \ - --service-ipv6-cidr fe80::1 \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv6 \ + --service-ipv6-cidr fe80::1 \ + test || exit_code=$? if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 fi expected_cluster_dns="fe80::1a" actual_cluster_dns=$(jq -r '.clusterDNS[0]' < /etc/kubernetes/kubelet/kubelet-config.json) if [[ ${actual_cluster_dns} != "${expected_cluster_dns}" ]]; then - echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" - exit 1 -fi \ No newline at end of file + echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" + exit 1 +fi diff --git a/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh b/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh index e984a223e..d93561585 100755 --- a/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh +++ b/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh @@ -4,12 +4,12 @@ set -euo pipefail echo "-> Should fail w/ \"service-ipv6-cidr must be provided when ip-family is specified as ipv6\"" exit_code=0 /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - --ip-family ipv6 \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv6 \ + test || exit_code=$? if [[ ${exit_code} -eq 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 -fi \ No newline at end of file + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi diff --git a/test/cases/max-pods-cni-1-11-2-delegation.sh b/test/cases/max-pods-cni-1-11-2-delegation.sh index dca43f5e6..5f64100fa 100755 --- a/test/cases/max-pods-cni-1-11-2-delegation.sh +++ b/test/cases/max-pods-cni-1-11-2-delegation.sh @@ -4,18 +4,18 @@ set -euo pipefail echo "-> Should calc max-pods successfully for VPC CNI 1.11.2" exit_code=0 out=$(/etc/eks/max-pods-calculator.sh \ - --instance-type m5.8xlarge \ - --cni-version 1.11.2 \ - --cni-prefix-delegation-enabled || exit_code=$?) + --instance-type m5.8xlarge \ + --cni-version 1.11.2 \ + --cni-prefix-delegation-enabled || exit_code=$?) echo $out if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 fi expected_max_pods="250" actual_max_pods=$(grep -o '[0-9]\+' <<< ${out}) if [[ ${actual_max_pods} -ne ${expected_max_pods} ]]; then - echo "❌ Test Failed: expected max-pods for m4.xlarge w/ CNI 1.11.2 to be '${expected_max_pods}', but got '${actual_max_pods}'" - exit 1 + echo "❌ Test Failed: expected max-pods for m4.xlarge w/ CNI 1.11.2 to be '${expected_max_pods}', but got '${actual_max_pods}'" + exit 1 fi diff --git a/test/cases/max-pods-cni-1-11-2.sh b/test/cases/max-pods-cni-1-11-2.sh index ec47c0730..c1311b707 100755 --- a/test/cases/max-pods-cni-1-11-2.sh +++ b/test/cases/max-pods-cni-1-11-2.sh @@ -4,17 +4,17 @@ set -euo pipefail echo "-> Should calc max-pods successfully for m5.8xlarge VPC CNI 1.11.2" exit_code=0 out=$(/etc/eks/max-pods-calculator.sh \ - --instance-type m5.8xlarge \ - --cni-version 1.11.2 || exit_code=$?) + --instance-type m5.8xlarge \ + --cni-version 1.11.2 || exit_code=$?) echo $out if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 fi expected_max_pods="234" actual_max_pods=$(grep -o '[0-9]\+' <<< ${out}) if [[ ${actual_max_pods} -ne ${expected_max_pods} ]]; then - echo "❌ Test Failed: expected max-pods for m4.xlarge w/ CNI 1.11.2 to be '${expected_max_pods}', but got '${actual_max_pods}'" - exit 1 + echo "❌ Test Failed: expected max-pods for m4.xlarge w/ CNI 1.11.2 to be '${expected_max_pods}', but got '${actual_max_pods}'" + exit 1 fi diff --git a/test/cases/max-pods-cni-1-7-5.sh b/test/cases/max-pods-cni-1-7-5.sh index 619767256..262ca81ed 100755 --- a/test/cases/max-pods-cni-1-7-5.sh +++ b/test/cases/max-pods-cni-1-7-5.sh @@ -5,17 +5,17 @@ echo "-> Should calc max-pods successfully for VPC CNI 1.7.5" exit_code=0 export IMDS_DEBUG=true out=$(/etc/eks/max-pods-calculator.sh \ - --instance-type-from-imds \ - --cni-version 1.7.5 || exit_code=$?) + --instance-type-from-imds \ + --cni-version 1.7.5 || exit_code=$?) echo $out if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 fi expected_max_pods="58" actual_max_pods=$(grep -o '[0-9]\+' <<< ${out}) if [[ ${actual_max_pods} -ne ${expected_max_pods} ]]; then - echo "❌ Test Failed: expected max-pods for m4.xlarge w/ CNI 1.7.5 to be '${expected_max_pods}', but got '${actual_max_pods}'" - exit 1 -fi \ No newline at end of file + echo "❌ Test Failed: expected max-pods for m4.xlarge w/ CNI 1.7.5 to be '${expected_max_pods}', but got '${actual_max_pods}'" + exit 1 +fi diff --git a/test/entrypoint.sh b/test/entrypoint.sh index fdd437768..9cf68701b 100755 --- a/test/entrypoint.sh +++ b/test/entrypoint.sh @@ -1,10 +1,13 @@ #!/usr/bin/env bash set -euo pipefail -SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" +SCRIPTPATH="$( + cd "$(dirname "$0")" + pwd -P +)" ## Start IMDS mock /sbin/ec2-metadata-mock --imdsv2 &> /var/log/ec2-metadata-mock.log & sleep 1 ## execute any other params -/test.sh \ No newline at end of file +/test.sh diff --git a/test/mocks/aws b/test/mocks/aws index 5d9d57079..b752fb6e3 100755 --- a/test/mocks/aws +++ b/test/mocks/aws @@ -1,16 +1,19 @@ #!/usr/bin/env bash set -euo pipefail -SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" +SCRIPTPATH="$( + cd "$(dirname "$0")" + pwd -P +)" if [[ $1 == "ec2" ]]; then - if [[ $2 == "describe-instance-types" ]]; then - instance_type=$(echo "${@}" | grep -o '[a-z]\+[0-9]\+[a-z]*\.[0-9a-z]\+' | tr '.' '-') - if [[ -f "${SCRIPTPATH}/describe-instance-types/${instance_type}.json" ]]; then - cat "${SCRIPTPATH}/describe-instance-types/${instance_type}.json" - exit 0 - fi - echo "instance type not found" - exit 1 + if [[ $2 == "describe-instance-types" ]]; then + instance_type=$(echo "${@}" | grep -o '[a-z]\+[0-9]\+[a-z]*\.[0-9a-z]\+' | tr '.' '-') + if [[ -f "${SCRIPTPATH}/describe-instance-types/${instance_type}.json" ]]; then + cat "${SCRIPTPATH}/describe-instance-types/${instance_type}.json" + exit 0 fi -fi \ No newline at end of file + echo "instance type not found" + exit 1 + fi +fi diff --git a/test/mocks/iptables-save b/test/mocks/iptables-save index b2bd12826..b4d037268 100755 --- a/test/mocks/iptables-save +++ b/test/mocks/iptables-save @@ -1,4 +1,4 @@ #!/usr/bin/env bash set -euo pipefail -echo "mocking iptables-save with params $@" \ No newline at end of file +echo "mocking iptables-save with params $@" diff --git a/test/mocks/kubelet b/test/mocks/kubelet index c689a18dc..b3fed23c1 100755 --- a/test/mocks/kubelet +++ b/test/mocks/kubelet @@ -5,7 +5,7 @@ set -euo pipefail # so we'll set a default here to avoid test failures, and you can # override by setting the KUBELET_VERSION environment variable. if [ $# == 1 ] && [ $1 == "--version" ]; then - echo "Kubernetes ${KUBELET_VERSION:-v1.23.9-eks-ba74326}" + echo "Kubernetes ${KUBELET_VERSION:-v1.23.9-eks-ba74326}" else - echo "mocking kubelet with params $@" + echo "mocking kubelet with params $@" fi diff --git a/test/mocks/sudo b/test/mocks/sudo index f91c5ff33..e485cef2e 100755 --- a/test/mocks/sudo +++ b/test/mocks/sudo @@ -1,4 +1,4 @@ #!/usr/bin/env bash set -euo pipefail -echo "mocking sudo with params $@" \ No newline at end of file +echo "mocking sudo with params $@" diff --git a/test/mocks/systemctl b/test/mocks/systemctl index 47846145d..5efdd955e 100755 --- a/test/mocks/systemctl +++ b/test/mocks/systemctl @@ -1,4 +1,4 @@ #!/usr/bin/env bash set -euo pipefail -echo "mocking systemctl with $@" \ No newline at end of file +echo "mocking systemctl with $@" diff --git a/test/test-harness.sh b/test/test-harness.sh index f632194a4..a04558086 100755 --- a/test/test-harness.sh +++ b/test/test-harness.sh @@ -1,10 +1,14 @@ #!/usr/bin/env bash -export SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" +export SCRIPTPATH="$( + cd "$(dirname "$0")" + pwd -P +)" set -euo pipefail TEST_CASE_SCRIPT="" -USAGE=$(cat << 'EOM' +USAGE=$( + cat << 'EOM' Usage: test-harness.sh [-c ] Executes the test harness for the EKS Optimized AL2 AMI. By default the test harness executes all scripts in the cases directory. @@ -16,16 +20,16 @@ EOM while getopts "c:h" opt; do case ${opt} in - c ) # Case Script Path - TEST_CASE_SCRIPT="$OPTARG" + c) # Case Script Path + TEST_CASE_SCRIPT="$OPTARG" ;; - h ) # help - echo "$USAGE" 1>&2 - exit + h) # help + echo "$USAGE" 1>&2 + exit ;; - \? ) - echo "$USAGE" 1>&2 - exit + \?) + echo "$USAGE" 1>&2 + exit ;; esac done @@ -33,38 +37,38 @@ done docker build -t eks-optimized-ami -f "${SCRIPTPATH}/Dockerfile" "${SCRIPTPATH}/../" overall_status=0 -function run(){ - docker run -v ${SCRIPTPATH}/../files/:/etc/eks/ \ - -v "$(realpath $1):/test.sh" \ - --attach STDOUT \ - --attach STDERR \ - --rm \ - eks-optimized-ami +function run() { + docker run -v ${SCRIPTPATH}/../files/:/etc/eks/ \ + -v "$(realpath $1):/test.sh" \ + --attach STDOUT \ + --attach STDERR \ + --rm \ + eks-optimized-ami } if [[ ! -z ${TEST_CASE_SCRIPT} ]]; then - test_cases=${TEST_CASE_SCRIPT} + test_cases=${TEST_CASE_SCRIPT} else - test_cases=($(find ${SCRIPTPATH}/cases -name "*.sh" -type f)) + test_cases=($(find ${SCRIPTPATH}/cases -name "*.sh" -type f)) fi for case in "${test_cases[@]}"; do - status=0 - echo "=================================================================================================================" - echo "-> Executing Test Case: $(basename ${case})" - run ${case} || status=1 - if [[ ${status} -eq 0 ]]; then - echo "✅ ✅ $(basename ${case}) Tests Passed! ✅ ✅" - else - echo "❌ ❌ $(basename ${case}) Tests Failed! ❌ ❌" - overall_status=1 - fi - echo "=================================================================================================================" + status=0 + echo "=================================================================================================================" + echo "-> Executing Test Case: $(basename ${case})" + run ${case} || status=1 + if [[ ${status} -eq 0 ]]; then + echo "✅ ✅ $(basename ${case}) Tests Passed! ✅ ✅" + else + echo "❌ ❌ $(basename ${case}) Tests Failed! ❌ ❌" + overall_status=1 + fi + echo "=================================================================================================================" done if [[ ${overall_status} -eq 0 ]]; then - echo "✅ ✅ All Tests Passed! ✅ ✅" + echo "✅ ✅ All Tests Passed! ✅ ✅" else - echo "❌ ❌ Some Tests Failed! ❌ ❌" + echo "❌ ❌ Some Tests Failed! ❌ ❌" fi exit $overall_status From 6cdf8390777f38c1fe531901cf8707394d888482 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 3 Nov 2022 15:41:55 -0700 Subject: [PATCH 350/621] Ignore shell formatting commit in git blame (#1083) --- .git-blame-ignore-revs | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .git-blame-ignore-revs diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 000000000..b78d5db21 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,3 @@ +# Applied code style rules to shell files +6014c4e6872a23f82ca295afa93b033207042876 + From ac6f2297a1e87c84c7e2f88b2cd428d04b621d65 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 3 Nov 2022 15:42:44 -0700 Subject: [PATCH 351/621] Add lint make target (#1068) --- .github/workflows/ci.yaml | 7 +++++++ Makefile | 11 +++++++++++ 2 files changed, 18 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 45b749ed9..dfc7f6804 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -10,6 +10,13 @@ on: - reopened - synchronize jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - run: echo "$(go env GOPATH)/bin" >> $GITHUB_PATH + - run: go install mvdan.cc/sh/v3/cmd/shfmt@latest + - run: make lint test: runs-on: ubuntu-latest steps: diff --git a/Makefile b/Makefile index 97d3374a6..30ea43e98 100644 --- a/Makefile +++ b/Makefile @@ -50,6 +50,17 @@ endif fmt: ## Format the source files $(SHFMT_COMMAND) $(SHFMT_FLAGS) --write $(MAKEFILE_DIR) +SHELLCHECK_COMMAND := $(shell which shellcheck) +ifeq (, $(SHELLCHECK_COMMAND)) +SHELLCHECK_COMMAND = docker run --rm -v $(MAKEFILE_DIR):$(MAKEFILE_DIR) koalaman/shellcheck:stable +endif +SHELL_FILES := $(shell find $(MAKEFILE_DIR) -type f -name '*.sh') + +.PHONY: lint +lint: ## Check the source files for syntax and format issues + $(SHFMT_COMMAND) $(SHFMT_FLAGS) --diff $(MAKEFILE_DIR) + $(SHELLCHECK_COMMAND) --format gcc --severity error $(SHELL_FILES) + .PHONY: test test: ## run the test-harness test/test-harness.sh From ee841f3eabd886bffbc1c98262b991ad4b9326c6 Mon Sep 17 00:00:00 2001 From: Saurav Agarwalla Date: Fri, 4 Nov 2022 11:29:47 -0400 Subject: [PATCH 352/621] Updated CHANGELOG for v20221101 (#1085) --- CHANGELOG.md | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f268cc05..4eea99b25 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,47 @@ # Changelog +### AMI Release v20221101 +* amazon-eks-gpu-node-1.23-v20221101 +* amazon-eks-gpu-node-1.22-v20221101 +* amazon-eks-gpu-node-1.21-v20221101 +* amazon-eks-gpu-node-1.20-v20221101 +* amazon-eks-arm64-node-1.23-v20221101 +* amazon-eks-arm64-node-1.22-v20221101 +* amazon-eks-arm64-node-1.21-v20221101 +* amazon-eks-arm64-node-1.20-v20221101 +* amazon-eks-node-1.23-v20221101 +* amazon-eks-node-1.22-v20221101 +* amazon-eks-node-1.21-v20221101 +* amazon-eks-node-1.20-v20221101 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.23.9-20221101` +* `1.22.12-20221101` +* `1.21.14-20221101` +* `1.20.15-20221101` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.23.9/2022-07-27/ +* s3://amazon-eks/1.22.12/2022-07-27/ +* s3://amazon-eks/1.21.14/2022-07-27/ +* s3://amazon-eks/1.20.15/2022-07-27/ + +AMI details: +* kernel: 5.4.209-116.367.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: 1.1.3-1.amzn2.0.2 +* cuda: 470.141.03-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +* Pin Kernel 5.4 to 5.4.209-116.367 to prevent nodes from going into Unready [#1072](https://github.com/awslabs/amazon-eks-ami/pull/1072) +* Increase the kube-api-server QPS from 5/10 to 10/20 [#1030](https://github.com/awslabs/amazon-eks-ami/pull/1030) +* Update docker and containerd for [ALASDOCKER-2022-021](https://alas.aws.amazon.com/AL2/ALASDOCKER-2022-021.html) [#1056](https://github.com/awslabs/amazon-eks-ami/pull/1056) +* runc version is updated to 1.1.3-1.amzn2.0.2 to include ALAS2DOCKER-2022-020 [#1055](https://github.com/awslabs/amazon-eks-ami/pull/1055) +* Release AMI in me-central-1 with version 1.21, 1.22, 1.23. 1.20 is not supported in this region since it will be deprecated soon. + ### [Recalled] AMI Release v20221027 * amazon-eks-gpu-node-1.23-v20221027 * amazon-eks-gpu-node-1.22-v20221027 From 6f629e1ea90f9289a5ef503dfed54ee0105bceb9 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 4 Nov 2022 11:31:27 -0400 Subject: [PATCH 353/621] Revert "Pin Kernel 5.4 to 5.4.209-116.367 to prevent nodes from going into Unready (#1072)" (#1086) This reverts commit ff27e2440b6a02d51ebcc5fec2ae42d315b31310. --- scripts/upgrade_kernel.sh | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh index 232d59c22..5736d0f5c 100755 --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -13,12 +13,7 @@ fi if [[ $KERNEL_VERSION == "4.14" ]]; then sudo yum update -y kernel elif [[ $KERNEL_VERSION == "5.4" ]]; then - # Pinning Kernel to 5.4.209-116.367 since we're investigating issues with later Kernel versions which cause nodes to become Unready. - # sudo amazon-linux-extras install -y kernel-5.4 - sudo amazon-linux-extras enable kernel-5.4=latest - sudo yum -y install kernel-5.4.209-116.367.amzn2 - sudo yum install -y yum-plugin-versionlock - sudo yum versionlock kernel-5.4* + sudo amazon-linux-extras install -y kernel-5.4 elif [[ $KERNEL_VERSION == "5.10" ]]; then sudo amazon-linux-extras install -y kernel-5.10 else From 112e919c44397da2c1f5cbadcd0d521f4aac0218 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 4 Nov 2022 15:28:28 -0700 Subject: [PATCH 354/621] Add stale issue workflow (#1088) --- .github/workflows/stale-issues.yaml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 .github/workflows/stale-issues.yaml diff --git a/.github/workflows/stale-issues.yaml b/.github/workflows/stale-issues.yaml new file mode 100644 index 000000000..a56181160 --- /dev/null +++ b/.github/workflows/stale-issues.yaml @@ -0,0 +1,21 @@ +name: 'Close stale issues' +on: + schedule: + # once a day at noon + - cron: '0 12 * * *' +permissions: + issues: write +jobs: + stale: + runs-on: ubuntu-latest + steps: + - uses: actions/stale@v6 + with: + days-before-stale: 90 + days-before-close: 14 + stale-issue-message: 'Please update this issue if it applies to the latest AMI release; otherwise it will be closed soon.' + stale-issue-label: 'stale' + exempt-issue-labels: 'never-stale' + # empty message will prevent PR's from being staled + stale-pr-message: '' + debug-only: true \ No newline at end of file From 832c4bb597c977d21382d1f384e56a930dbc7eb0 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 9 Nov 2022 10:28:18 -0800 Subject: [PATCH 355/621] AMI Release v20221104 (#1093) --- CHANGELOG.md | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4eea99b25..b5d9cef46 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,49 @@ # Changelog +### AMI Release v20221104 +* amazon-eks-gpu-node-1.24-v20221104 +* amazon-eks-gpu-node-1.23-v20221104 +* amazon-eks-gpu-node-1.22-v20221104 +* amazon-eks-gpu-node-1.21-v20221104 +* amazon-eks-gpu-node-1.20-v20221104 +* amazon-eks-arm64-node-1.24-v20221104 +* amazon-eks-arm64-node-1.23-v20221104 +* amazon-eks-arm64-node-1.22-v20221104 +* amazon-eks-arm64-node-1.21-v20221104 +* amazon-eks-arm64-node-1.20-v20221104 +* amazon-eks-node-1.24-v20221104 +* amazon-eks-node-1.23-v20221104 +* amazon-eks-node-1.22-v20221104 +* amazon-eks-node-1.21-v20221104 +* amazon-eks-node-1.20-v20221104 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.24.6-20221104` +* `1.23.9-20221104` +* `1.22.12-20221104` +* `1.21.14-20221104` +* `1.20.15-20221104` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.24.6/2022-10-05/ +* s3://amazon-eks/1.23.9/2022-07-27/ +* s3://amazon-eks/1.22.12/2022-07-27/ +* s3://amazon-eks/1.21.14/2022-07-27/ +* s3://amazon-eks/1.20.15/2022-07-27/ + +AMI details: +* kernel: 5.4.219-126.411.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: 1.1.3-1.amzn2.0.2 +* cuda: 470.141.03-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +* Adds support for 1.24 with version 1.24.6 +* Upgrades kernel at `5.4.219-126.411.amzn2` to address [known issues with the previous kernel version](https://github.com/awslabs/amazon-eks-ami/issues/1071) + ### AMI Release v20221101 * amazon-eks-gpu-node-1.23-v20221101 * amazon-eks-gpu-node-1.22-v20221101 From 36cbdeb3363e59009548f17e27df1bb9f2a46d57 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 9 Nov 2022 11:10:58 -0800 Subject: [PATCH 356/621] Add vercmp helper (#1094) --- files/bin/vercmp | 90 +++++++++++++++++ files/bootstrap.sh | 11 +-- scripts/install-worker.sh | 6 +- test/cases/vercmp.sh | 199 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 294 insertions(+), 12 deletions(-) create mode 100755 files/bin/vercmp create mode 100755 test/cases/vercmp.sh diff --git a/files/bin/vercmp b/files/bin/vercmp new file mode 100755 index 000000000..8edf7b920 --- /dev/null +++ b/files/bin/vercmp @@ -0,0 +1,90 @@ +#!/usr/bin/env bash + +# Comparison expressions for semantic versions. +# only supports semver standard MAJOR.MINOR.PATCH syntax; +# pre-release or build-metadata extensions have undefined behavior. + +set -o errexit +set -o pipefail + +function usage() { + echo "Comparison expressions for semantic versions." + echo + echo "usage: vercmp VERSION_A OPERATOR VERSION_B" + echo + echo "OPERATORS" + echo + echo " lt - Less than" + echo " lteq - Less than or equal to" + echo " eq - Equal to" + echo " gteq - Grater than or equal to" + echo " gt - Greater than" + echo +} + +if [ "$#" -ne 3 ]; then + usage + exit 1 +fi + +LEFT="$1" +OPERATOR="$2" +RIGHT="$3" + +if [ "$LEFT" = "$RIGHT" ]; then + COMPARISON=0 +else + SORTED=($(for VER in "$LEFT" "$RIGHT"; do echo "$VER"; done | sort -V)) + if [ "${SORTED[0]}" = "$LEFT" ]; then + COMPARISON=-1 + else + COMPARISON=1 + fi +fi + +OUTCOME=false + +case $OPERATOR in + lt) + if [ "$COMPARISON" -eq -1 ]; then + OUTCOME=true + fi + ;; + + lteq) + if [ "$COMPARISON" -lt 1 ]; then + OUTCOME=true + fi + ;; + + eq) + if [ "$COMPARISON" -eq 0 ]; then + OUTCOME=true + fi + ;; + + gteq) + if [ "$COMPARISON" -gt -1 ]; then + OUTCOME=true + fi + ;; + + gt) + if [ "$COMPARISON" -eq 1 ]; then + OUTCOME=true + fi + ;; + + *) + usage + exit 1 + ;; +esac + +echo "$OUTCOME" + +if [ "$OUTCOME" = "true" ]; then + exit 0 +else + exit 1 +fi diff --git a/files/bootstrap.sh b/files/bootstrap.sh index c7aa93c22..18f69dd2d 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -138,18 +138,11 @@ set -u KUBELET_VERSION=$(kubelet --version | grep -Eo '[0-9]\.[0-9]+\.[0-9]+') echo "Using kubelet version $KUBELET_VERSION" -function is_greater_than_or_equal_to_version() { - local actual_version="$1" - local compared_version="$2" - - [ $actual_version = "$(echo -e "$actual_version\n$compared_version" | sort -V | tail -n1)" ] -} - # As of Kubernetes version 1.24, we will start defaulting the container runtime to containerd # and no longer support docker as a container runtime. IS_124_OR_GREATER=false DEFAULT_CONTAINER_RUNTIME=dockerd -if is_greater_than_or_equal_to_version $KUBELET_VERSION "1.24.0"; then +if vercmp "$KUBELET_VERSION" gteq "1.24.0"; then IS_124_OR_GREATER=true DEFAULT_CONTAINER_RUNTIME=containerd fi @@ -467,7 +460,7 @@ else fi INSTANCE_TYPE=$(imds 'latest/meta-data/instance-type') -if is_greater_than_or_equal_to_version $KUBELET_VERSION "1.22.0"; then +if vercmp "$KUBELET_VERSION" gteq "1.22.0"; then # for K8s versions that suport API Priority & Fairness, increase our API server QPS echo $(jq ".kubeAPIQPS=( .kubeAPIQPS // 10)|.kubeAPIBurst=( .kubeAPIBurst // 20)" $KUBELET_CONFIG) > $KUBELET_CONFIG fi diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index f329cd996..744fd659a 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -166,7 +166,7 @@ else sudo mv $TEMPLATE_DIR/containerd-config.toml /etc/eks/containerd/containerd-config.toml fi -if [[ ! $KUBERNETES_VERSION =~ "1.19"* && ! $KUBERNETES_VERSION =~ "1.20"* && ! $KUBERNETES_VERSION =~ "1.21"* ]]; then +if vercmp "$KUBERNETES_VERSION" gteq "1.22.0"; then # enable CredentialProviders features in kubelet-containerd service file IMAGE_CREDENTIAL_PROVIDER_FLAGS='\\\n --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \\\n --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider' sudo sed -i s,"aws","aws $IMAGE_CREDENTIAL_PROVIDER_FLAGS", $TEMPLATE_DIR/kubelet-containerd.service @@ -278,7 +278,7 @@ if [[ $KUBERNETES_VERSION == "1.20"* ]]; then echo $KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED > $TEMPLATE_DIR/kubelet-config.json fi -if [[ ! $KUBERNETES_VERSION =~ "1.19"* && ! $KUBERNETES_VERSION =~ "1.20"* && ! $KUBERNETES_VERSION =~ "1.21"* ]]; then +if vercmp "$KUBERNETES_VERSION" gteq "1.22.0"; then # enable CredentialProviders feature flags in kubelet service file IMAGE_CREDENTIAL_PROVIDER_FLAGS='\\\n --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \\\n --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider' sudo sed -i s,"aws","aws $IMAGE_CREDENTIAL_PROVIDER_FLAGS", $TEMPLATE_DIR/kubelet.service @@ -316,7 +316,7 @@ fi ################################################################################ ### ECR CREDENTIAL PROVIDER #################################################### ################################################################################ -if [[ ! $KUBERNETES_VERSION =~ "1.19"* && ! $KUBERNETES_VERSION =~ "1.20"* && ! $KUBERNETES_VERSION =~ "1.21"* ]]; then +if vercmp "$KUBERNETES_VERSION" gteq "1.22.0"; then ECR_BINARY="ecr-credential-provider" if [[ -n "$AWS_ACCESS_KEY_ID" ]]; then echo "AWS cli present - using it to copy ecr-credential-provider binaries from s3." diff --git a/test/cases/vercmp.sh b/test/cases/vercmp.sh new file mode 100755 index 000000000..d020c7d81 --- /dev/null +++ b/test/cases/vercmp.sh @@ -0,0 +1,199 @@ +#!/usr/bin/env bash + +set -o nounset +set -o errexit +set -o pipefail + +echo "--> Should compare strictly less-than" +# should succeed +EXIT_CODE=0 +vercmp "1.0.0" lt "2.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" lt "1.0.1" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" lt "1.1.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +# should fail +EXIT_CODE=0 +vercmp "1.0.0" lt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.1" lt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.1.0" lt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "2.0.0" lt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi + +echo "--> Should compare less-than-or-equal-to" +# should succeed +EXIT_CODE=0 +vercmp "1.0.0" lteq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" lteq "1.0.1" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" lteq "2.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +# should fail +EXIT_CODE=0 +vercmp "1.0.1" lteq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.1.0" lteq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "2.0.0" lteq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi + +echo "--> Should compare strictly equal-to" +# should succeed +EXIT_CODE=0 +vercmp "1.0.0" eq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +# should fail +EXIT_CODE=0 +vercmp "1.0.1" eq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" eq "1.0.1" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi + +echo "--> Should compare greater-than-or-equal-to" +# should succeed +EXIT_CODE=0 +vercmp "1.0.0" gteq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.1" gteq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "2.0.0" gteq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +# should fail +EXIT_CODE=0 +vercmp "1.0.0" gteq "1.0.1" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" gteq "1.1.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" gteq "2.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi + +echo "--> Should compare strictly greater-than" +# should succeed +EXIT_CODE=0 +vercmp "2.0.0" gt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.1" gt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.1.0" gt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +# should fail +EXIT_CODE=0 +vercmp "1.0.0" gt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" gt "1.0.1" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" gt "1.1.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" gt "2.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi From 794ed5f10842b436e10c9bc89ee41491a6494ade Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 9 Nov 2022 11:21:36 -0800 Subject: [PATCH 357/621] Install awscli v2 bundle when possible (#1066) * Install awscli v2 bundle when possible * Correct awscli version parsing * Apply formatting --- scripts/generate-version-info.sh | 1 + scripts/install-worker.sh | 22 +++++++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/scripts/generate-version-info.sh b/scripts/generate-version-info.sh index 22ef83a45..f35243936 100644 --- a/scripts/generate-version-info.sh +++ b/scripts/generate-version-info.sh @@ -17,3 +17,4 @@ sudo rpm --query --all --queryformat '\{"%{NAME}": "%{VERSION}-%{RELEASE}"\}\n' # binaries echo $(jq ".binaries.kubelet = \"$(kubelet --version | awk '{print $2}')\"" $OUTPUT_FILE) > $OUTPUT_FILE +echo $(jq ".binaries.awscli = \"$(aws --version | awk '{print $1}' | cut -d '/' -f 2)\"" $OUTPUT_FILE) > $OUTPUT_FILE diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 744fd659a..0b1bc8964 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -62,7 +62,6 @@ sudo yum update -y # Install necessary packages sudo yum install -y \ aws-cfn-bootstrap \ - awscli \ chrony \ conntrack \ curl \ @@ -118,6 +117,27 @@ sudo systemctl restart sshd.service sudo mkdir -p /etc/eks sudo mv $TEMPLATE_DIR/iptables-restore.service /etc/eks/iptables-restore.service +################################################################################ +### awscli ##################################################### +################################################################################ + +if [[ "$BINARY_BUCKET_REGION" != "us-iso-east-1" && "$BINARY_BUCKET_REGION" != "us-isob-east-1" ]]; then + # https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html + echo "Installing awscli v2 bundle" + AWSCLI_DIR=$(mktemp -d) + curl \ + --silent \ + --show-error \ + --retry 10 \ + --retry-delay 1 \ + -L "https://awscli.amazonaws.com/awscli-exe-linux-${MACHINE}.zip" -o "${AWSCLI_DIR}/awscliv2.zip" + unzip -q "${AWSCLI_DIR}/awscliv2.zip" -d ${AWSCLI_DIR} + sudo "${AWSCLI_DIR}/aws/install" +else + echo "Installing awscli package" + sudo yum install -y awscli +fi + ################################################################################ ### Docker ##################################################################### ################################################################################ From 614d623c807b33d482f3830d1035ea66754d056c Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Thu, 10 Nov 2022 10:33:53 -0600 Subject: [PATCH 358/621] Log collector enhancements (#1092) * save region information This can be looked up from the instance ID, but it's convenient to have it easily parsable from the log archive. * dump containerd goroutines Send containerd a USR1 signal which triggers it to dump goroutines. These are saved to the log as well as dumped to a separate log that we archive. * bump log collector version --- .../linux/eks-log-collector.sh | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index b538dd97a..e7d45f9d1 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -20,7 +20,7 @@ export LANG="C" export LC_ALL="C" # Global options -readonly PROGRAM_VERSION="0.7.2" +readonly PROGRAM_VERSION="0.7.3" readonly PROGRAM_SOURCE="https://github.com/awslabs/amazon-eks-ami/blob/master/log-collector-script/" readonly PROGRAM_NAME="$(basename "$0" .sh)" readonly PROGRAM_DIR="/opt/log-collector" @@ -195,7 +195,7 @@ get_instance_id() { cp ${INSTANCE_ID_FILE} "${COLLECT_DIR}"/system/instance-id.txt readonly INSTANCE_ID=$(cat "${COLLECT_DIR}"/system/instance-id.txt) else - readonly INSTANCE_ID=$(curl --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/instance-id) + readonly INSTANCE_ID=$(curl -f -s --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/instance-id) if [ 0 -eq $? ]; then # Check if previous command was successful. echo "${INSTANCE_ID}" > "${COLLECT_DIR}"/system/instance-id.txt else @@ -204,6 +204,20 @@ get_instance_id() { fi } +get_region() { + if REGION=$(curl -f -s --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/placement/region); then + echo "${REGION}" > "${COLLECT_DIR}"/system/region.txt + else + warning "Unable to find EC2 Region, skipping." + fi + + if AZ=$(curl -f -s --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/placement/availability-zone); then + echo "${AZ}" > "${COLLECT_DIR}"/system/availability-zone.txt + else + warning "Unable to find EC2 AZ, skipping." + fi +} + is_diskfull() { local threshold local result @@ -242,6 +256,7 @@ collect() { init is_diskfull get_instance_id + get_region get_common_logs get_kernel_info get_mounts_info @@ -578,8 +593,11 @@ get_containerd_info() { try "Collect Containerd daemon information" if [[ "$(pgrep -o containerd)" -ne 0 ]]; then + # force containerd to dump goroutines + timeout 75 killall -sUSR1 containerd timeout 75 containerd config dump > "${COLLECT_DIR}"/containerd/containerd-config.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " timeout 75 journalctl -u containerd > "${COLLECT_DIR}"/containerd/containerd-log.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 cp -f /tmp/containerd.*.stacks.log "${COLLECT_DIR}"/containerd/ else warning "The Containerd daemon is not running." fi From a521047d1b097b9c3dbb562ca9bdab5a641f347f Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 10 Nov 2022 16:27:18 -0500 Subject: [PATCH 359/621] Update credential provider API to beta for 1.24+ (#1089) --- CONTRIBUTING.md | 15 +++++ files/bootstrap.sh | 8 +++ files/ecr-credential-provider-config | 6 +- test/Dockerfile | 3 + test/cases/ecr-credential-provider-config.sh | 70 ++++++++++++++++++++ test/cases/vercmp.sh | 60 +++++++++++++++++ test/test-harness.sh | 3 +- 7 files changed, 160 insertions(+), 5 deletions(-) create mode 100755 test/cases/ecr-credential-provider-config.sh diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0576691bc..2d6946816 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -115,6 +115,21 @@ Delete the cluster: eksctl delete cluster -f cluster.yaml ``` +## Troubleshooting + +**Tests fail with `realpath: command not found`** + +When running `make test`, you may see a message like below: + +``` +test/test-harness.sh: line 41: realpath: command not found +/entrypoint.sh: line 13: /test.sh: No such file or directory +``` + +The issue is discussed in [this StackExchange post](https://unix.stackexchange.com/questions/101080/realpath-command-not-found). + +On OSX, running `brew install coreutils` resolves the issue. + ## Finding contributions to work on Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels ((enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/aws-samples/amazon-eks-ami/labels/help%20wanted) issues is a great place to start. diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 18f69dd2d..24af30233 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -145,6 +145,14 @@ DEFAULT_CONTAINER_RUNTIME=dockerd if vercmp "$KUBELET_VERSION" gteq "1.24.0"; then IS_124_OR_GREATER=true DEFAULT_CONTAINER_RUNTIME=containerd +elif vercmp "$KUBELET_VERSION" gteq "1.22.0"; then + # Ensure that these exist for testing purposes + mkdir -p /etc/eks/ecr-credential-provider + touch /etc/eks/ecr-credential-provider/ecr-credential-provider-config + # These APIs are only available in alpha pre-1.24. + # This can be removed when version 1.23 is no longer supported. + sed -i s,kubelet.config.k8s.io/v1beta1,kubelet.config.k8s.io/v1alpha1,g /etc/eks/ecr-credential-provider/ecr-credential-provider-config + sed -i s,credentialprovider.kubelet.k8s.io/v1beta1,credentialprovider.kubelet.k8s.io/v1alpha1,g /etc/eks/ecr-credential-provider/ecr-credential-provider-config fi # Set container runtime related variables diff --git a/files/ecr-credential-provider-config b/files/ecr-credential-provider-config index d6117ceb4..ae1f9d7a5 100644 --- a/files/ecr-credential-provider-config +++ b/files/ecr-credential-provider-config @@ -1,4 +1,4 @@ -apiVersion: kubelet.config.k8s.io/v1alpha1 +apiVersion: kubelet.config.k8s.io/v1beta1 kind: CredentialProviderConfig providers: - name: ecr-credential-provider @@ -9,6 +9,6 @@ providers: - "*.dkr.ecr.us-iso-east-1.c2s.ic.gov" - "*.dkr.ecr.us-isob-east-1.sc2s.sgov.gov" defaultCacheDuration: "12h" - apiVersion: credentialprovider.kubelet.k8s.io/v1alpha1 + apiVersion: credentialprovider.kubelet.k8s.io/v1beta1 args: - - get-credentials \ No newline at end of file + - get-credentials diff --git a/test/Dockerfile b/test/Dockerfile index cf0d7c020..4114980b1 100644 --- a/test/Dockerfile +++ b/test/Dockerfile @@ -1,6 +1,9 @@ FROM public.ecr.aws/aws-ec2/amazon-ec2-metadata-mock:v1.11.2 as aemm FROM public.ecr.aws/amazonlinux/amazonlinux:2 RUN yum install -y jq +RUN yum install -y wget +RUN wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 +RUN chmod a+x /usr/local/bin/yq ENV IMDS_ENDPOINT=127.0.0.1:1338 COPY --from=aemm /ec2-metadata-mock /sbin/ec2-metadata-mock diff --git a/test/cases/ecr-credential-provider-config.sh b/test/cases/ecr-credential-provider-config.sh new file mode 100755 index 000000000..e8339540c --- /dev/null +++ b/test/cases/ecr-credential-provider-config.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +set -euo pipefail + +exit_code=0 +TEMP_DIR=$(mktemp -d) + +# Setup test cases +mkdir -p /etc/eks/ecr-credential-provider +export CRED_PROVIDER_FILE="/etc/eks/ecr-credential-provider/ecr-credential-provider-config" + +echo "--> Should default to credentialprovider.kubelet.k8s.io/v1alpha1 and kubelet.config.k8s.io/v1alpha1 when below k8s version 1.24" + +# Ensure the credential provider config is present and fresh +cp /etc/eks/ecr-credential-provider-config $CRED_PROVIDER_FILE +# This variable is used to override the default value in the kubelet mock +export KUBELET_VERSION=v1.22.15-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +expected_cred_provider_api="credentialprovider.kubelet.k8s.io/v1alpha1" +actual=$(yq e '.providers[0].apiVersion' $CRED_PROVIDER_FILE) +if [[ "$expected_cred_provider_api" != "$actual" ]]; then + echo "❌ Test Failed: expected 1.22 credential provider file to contain $expected_cred_provider_api" + exit 1 +fi + +expected_kubelet_config_api="kubelet.config.k8s.io/v1alpha1" +actual=$(yq e '.apiVersion' $CRED_PROVIDER_FILE) +if [[ "$expected_kubelet_config_api" != "$actual" ]]; then + echo "❌ Test Failed: expected 1.22 credential provider file to contain $expected_kubelet_config_api" + exit 1 +fi + +echo "--> Should default to credentialprovider.kubelet.k8s.io/v1beta1 and kubelet.config.k8s.io/v1beta1 when at or above k8s version 1.24" + +# Ensure the credential provider config is present and fresh +cp /etc/eks/ecr-credential-provider-config $CRED_PROVIDER_FILE +export KUBELET_VERSION=v1.24.15-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +expected_cred_provider_api="credentialprovider.kubelet.k8s.io/v1beta1" +actual=$(yq e '.providers[0].apiVersion' $CRED_PROVIDER_FILE) +if [[ "$expected_cred_provider_api" != "$actual" ]]; then + echo "❌ Test Failed: expected 1.24 credential provider file to contain $expected_cred_provider_api" + exit 1 +fi + +expected_kubelet_config_api="kubelet.config.k8s.io/v1beta1" +actual=$(yq e '.apiVersion' $CRED_PROVIDER_FILE) +if [[ "$expected_kubelet_config_api" != "$actual" ]]; then + echo "❌ Test Failed: expected 1.24 credential provider file to contain $expected_kubelet_config_api" + exit 1 +fi + +exit_code=0 diff --git a/test/cases/vercmp.sh b/test/cases/vercmp.sh index d020c7d81..93c8b24df 100755 --- a/test/cases/vercmp.sh +++ b/test/cases/vercmp.sh @@ -24,6 +24,12 @@ if [[ ${EXIT_CODE} -ne 0 ]]; then echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" exit 1 fi +EXIT_CODE=0 +vercmp "v1.0.0" lt "v1.1.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi # should fail EXIT_CODE=0 vercmp "1.0.0" lt "1.0.0" || EXIT_CODE=$? @@ -49,6 +55,12 @@ if [[ ${EXIT_CODE} -eq 0 ]]; then echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" exit 1 fi +EXIT_CODE=0 +vercmp "v2.0.0" lt "v1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi echo "--> Should compare less-than-or-equal-to" # should succeed @@ -70,6 +82,12 @@ if [[ ${EXIT_CODE} -ne 0 ]]; then echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" exit 1 fi +EXIT_CODE=0 +vercmp "v1.0.0" lteq "v2.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi # should fail EXIT_CODE=0 vercmp "1.0.1" lteq "1.0.0" || EXIT_CODE=$? @@ -89,6 +107,12 @@ if [[ ${EXIT_CODE} -eq 0 ]]; then echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" exit 1 fi +EXIT_CODE=0 +vercmp "v2.0.0" lteq "v1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi echo "--> Should compare strictly equal-to" # should succeed @@ -98,6 +122,12 @@ if [[ ${EXIT_CODE} -ne 0 ]]; then echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" exit 1 fi +EXIT_CODE=0 +vercmp "v1.0.0" eq "v1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi # should fail EXIT_CODE=0 vercmp "1.0.1" eq "1.0.0" || EXIT_CODE=$? @@ -111,6 +141,12 @@ if [[ ${EXIT_CODE} -eq 0 ]]; then echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" exit 1 fi +EXIT_CODE=0 +vercmp "v1.0.0" eq "v1.0.1" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi echo "--> Should compare greater-than-or-equal-to" # should succeed @@ -132,6 +168,12 @@ if [[ ${EXIT_CODE} -ne 0 ]]; then echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" exit 1 fi +EXIT_CODE=0 +vercmp "v2.0.0" gteq "v1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi # should fail EXIT_CODE=0 vercmp "1.0.0" gteq "1.0.1" || EXIT_CODE=$? @@ -151,6 +193,12 @@ if [[ ${EXIT_CODE} -eq 0 ]]; then echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" exit 1 fi +EXIT_CODE=0 +vercmp "v1.0.0" gteq "v2.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi echo "--> Should compare strictly greater-than" # should succeed @@ -172,6 +220,12 @@ if [[ ${EXIT_CODE} -ne 0 ]]; then echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" exit 1 fi +EXIT_CODE=0 +vercmp "v1.1.0" gt "v1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi # should fail EXIT_CODE=0 vercmp "1.0.0" gt "1.0.0" || EXIT_CODE=$? @@ -197,3 +251,9 @@ if [[ ${EXIT_CODE} -eq 0 ]]; then echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" exit 1 fi +EXIT_CODE=0 +vercmp "v1.0.0" gt "v2.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi diff --git a/test/test-harness.sh b/test/test-harness.sh index a04558086..b0cc2180f 100755 --- a/test/test-harness.sh +++ b/test/test-harness.sh @@ -38,8 +38,7 @@ docker build -t eks-optimized-ami -f "${SCRIPTPATH}/Dockerfile" "${SCRIPTPATH}/. overall_status=0 function run() { - docker run -v ${SCRIPTPATH}/../files/:/etc/eks/ \ - -v "$(realpath $1):/test.sh" \ + docker run -v "$(realpath $1):/test.sh" \ --attach STDOUT \ --attach STDERR \ --rm \ From ce1c11f9db5bf5a730e978e74e13174d4b9f73a3 Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 10 Nov 2022 18:00:52 -0500 Subject: [PATCH 360/621] Update client.authentication.k8s.io to v1beta1 (#1095) --- Makefile | 12 ++++++++---- files/bootstrap.sh | 3 --- files/kubelet-kubeconfig | 4 ++-- scripts/install-worker.sh | 10 ++++++++++ test/Dockerfile | 9 +++++---- test/cases/ecr-credential-provider-config.sh | 17 +++++++++++------ 6 files changed, 36 insertions(+), 19 deletions(-) diff --git a/Makefile b/Makefile index 30ea43e98..43dd76ca0 100644 --- a/Makefile +++ b/Makefile @@ -78,19 +78,23 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI .PHONY: 1.20 1.20: ## Build EKS Optimized AL2 AMI - K8s 1.20 - $(MAKE) k8s kubernetes_version=1.20.15 kubernetes_build_date=2022-07-27 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.20.15 kubernetes_build_date=2022-10-31 pull_cni_from_github=true .PHONY: 1.21 1.21: ## Build EKS Optimized AL2 AMI - K8s 1.21 - $(MAKE) k8s kubernetes_version=1.21.14 kubernetes_build_date=2022-07-27 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.21.14 kubernetes_build_date=2022-10-31 pull_cni_from_github=true .PHONY: 1.22 1.22: ## Build EKS Optimized AL2 AMI - K8s 1.22 - $(MAKE) k8s kubernetes_version=1.22.12 kubernetes_build_date=2022-07-27 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.22.15 kubernetes_build_date=2022-10-31 pull_cni_from_github=true .PHONY: 1.23 1.23: ## Build EKS Optimized AL2 AMI - K8s 1.23 - $(MAKE) k8s kubernetes_version=1.23.9 kubernetes_build_date=2022-07-27 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.23.13 kubernetes_build_date=2022-10-31 pull_cni_from_github=true + +.PHONY: 1.24 +1.24: ## Build EKS Optimized AL2 AMI - K8s 1.24 + $(MAKE) k8s kubernetes_version=1.24.7 kubernetes_build_date=2022-10-31 pull_cni_from_github=true .PHONY: help help: ## Display help diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 24af30233..dddb136b7 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -146,9 +146,6 @@ if vercmp "$KUBELET_VERSION" gteq "1.24.0"; then IS_124_OR_GREATER=true DEFAULT_CONTAINER_RUNTIME=containerd elif vercmp "$KUBELET_VERSION" gteq "1.22.0"; then - # Ensure that these exist for testing purposes - mkdir -p /etc/eks/ecr-credential-provider - touch /etc/eks/ecr-credential-provider/ecr-credential-provider-config # These APIs are only available in alpha pre-1.24. # This can be removed when version 1.23 is no longer supported. sed -i s,kubelet.config.k8s.io/v1beta1,kubelet.config.k8s.io/v1alpha1,g /etc/eks/ecr-credential-provider/ecr-credential-provider-config diff --git a/files/kubelet-kubeconfig b/files/kubelet-kubeconfig index 91c25cf29..e8ad7efd4 100644 --- a/files/kubelet-kubeconfig +++ b/files/kubelet-kubeconfig @@ -15,11 +15,11 @@ users: - name: kubelet user: exec: - apiVersion: client.authentication.k8s.io/v1alpha1 + apiVersion: client.authentication.k8s.io/v1beta1 command: /usr/bin/aws-iam-authenticator args: - "token" - "-i" - "CLUSTER_NAME" - --region - - "AWS_REGION" \ No newline at end of file + - "AWS_REGION" diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 0b1bc8964..023c6cd84 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -260,6 +260,16 @@ for binary in ${BINARIES[*]}; do sudo mv $binary /usr/bin/ done +# Verify that the aws-iam-authenticator is at last v0.5.9 or greater. Otherwise, nodes will be +# unable to join clusters due to upgrading to client.authentication.k8s.io/v1beta1 +iam_auth_version=$(sudo /usr/bin/aws-iam-authenticator version | jq -r .Version) +if vercmp "$iam_auth_version" lt "v0.5.9"; then + # To resolve this issue, you need to update the aws-iam-authenticator binary. Using binaries distributed by EKS + # with kubernetes_build_date 2022-10-31 or later include v0.5.10 or greater. + echo "❌ The aws-iam-authenticator should be on version v0.5.9 or later. Found $iam_auth_version" + exit 1 +fi + # Since CNI 0.7.0, all releases are done in the plugins repo. CNI_PLUGIN_FILENAME="cni-plugins-linux-${ARCH}-${CNI_PLUGIN_VERSION}" diff --git a/test/Dockerfile b/test/Dockerfile index 4114980b1..f30bf14ef 100644 --- a/test/Dockerfile +++ b/test/Dockerfile @@ -1,14 +1,15 @@ FROM public.ecr.aws/aws-ec2/amazon-ec2-metadata-mock:v1.11.2 as aemm FROM public.ecr.aws/amazonlinux/amazonlinux:2 -RUN yum install -y jq -RUN yum install -y wget -RUN wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -RUN chmod a+x /usr/local/bin/yq +RUN yum install -y jq && \ + yum install -y wget && \ + wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 && \ + chmod a+x /usr/local/bin/yq ENV IMDS_ENDPOINT=127.0.0.1:1338 COPY --from=aemm /ec2-metadata-mock /sbin/ec2-metadata-mock COPY files/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json COPY files/kubelet-kubeconfig /var/lib/kubelet/kubeconfig +COPY files/ecr-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config COPY test/entrypoint.sh /entrypoint.sh COPY files /etc/eks COPY files/bin/* /usr/bin/ diff --git a/test/cases/ecr-credential-provider-config.sh b/test/cases/ecr-credential-provider-config.sh index e8339540c..5d4856ed1 100755 --- a/test/cases/ecr-credential-provider-config.sh +++ b/test/cases/ecr-credential-provider-config.sh @@ -4,14 +4,20 @@ set -euo pipefail exit_code=0 TEMP_DIR=$(mktemp -d) -# Setup test cases -mkdir -p /etc/eks/ecr-credential-provider export CRED_PROVIDER_FILE="/etc/eks/ecr-credential-provider/ecr-credential-provider-config" +export CRED_PROVIDER_RESET_FILE="./cred-provider-config" + +# Store the original version of the config +cp $CRED_PROVIDER_FILE $CRED_PROVIDER_RESET_FILE +# Reset the file that may have changed +function reset_scenario { + echo "Resetting test scenario" + cp $CRED_PROVIDER_RESET_FILE $CRED_PROVIDER_FILE +} echo "--> Should default to credentialprovider.kubelet.k8s.io/v1alpha1 and kubelet.config.k8s.io/v1alpha1 when below k8s version 1.24" +reset_scenario -# Ensure the credential provider config is present and fresh -cp /etc/eks/ecr-credential-provider-config $CRED_PROVIDER_FILE # This variable is used to override the default value in the kubelet mock export KUBELET_VERSION=v1.22.15-eks-ba74326 /etc/eks/bootstrap.sh \ @@ -39,9 +45,8 @@ if [[ "$expected_kubelet_config_api" != "$actual" ]]; then fi echo "--> Should default to credentialprovider.kubelet.k8s.io/v1beta1 and kubelet.config.k8s.io/v1beta1 when at or above k8s version 1.24" +reset_scenario -# Ensure the credential provider config is present and fresh -cp /etc/eks/ecr-credential-provider-config $CRED_PROVIDER_FILE export KUBELET_VERSION=v1.24.15-eks-ba74326 /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ From 4dc2e4377ea02bf67d337ddd6e763ffd2841e40e Mon Sep 17 00:00:00 2001 From: Steven Davidovitz Date: Fri, 11 Nov 2022 18:03:04 -0800 Subject: [PATCH 361/621] Configure containerd registry certificates by default (#1049) --- files/bootstrap.sh | 2 ++ files/containerd-config.toml | 3 ++ test/Dockerfile | 8 ++++-- test/cases/container-runtime-defaults.sh | 1 - test/cases/containerd-config.sh | 35 ++++++++++++++++++++++++ test/mocks/sudo | 2 +- 6 files changed, 46 insertions(+), 5 deletions(-) create mode 100755 test/cases/containerd-config.sh diff --git a/files/bootstrap.sh b/files/bootstrap.sh index dddb136b7..76228fa63 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -540,6 +540,8 @@ EOF sudo cp -v /etc/eks/containerd/kubelet-containerd.service /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/sandbox-image.service + # Validate containerd config + sudo containerd config dump > /dev/null systemctl daemon-reload systemctl enable containerd systemctl restart containerd diff --git a/files/containerd-config.toml b/files/containerd-config.toml index 8a668ce84..1cddeb2f6 100644 --- a/files/containerd-config.toml +++ b/files/containerd-config.toml @@ -11,6 +11,9 @@ default_runtime_name = "runc" [plugins."io.containerd.grpc.v1.cri"] sandbox_image = "SANDBOX_IMAGE" +[plugins."io.containerd.grpc.v1.cri".registry] +config_path = "/etc/containerd/certs.d:/etc/docker/certs.d" + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] runtime_type = "io.containerd.runc.v2" diff --git a/test/Dockerfile b/test/Dockerfile index f30bf14ef..9aaa44905 100644 --- a/test/Dockerfile +++ b/test/Dockerfile @@ -1,17 +1,19 @@ FROM public.ecr.aws/aws-ec2/amazon-ec2-metadata-mock:v1.11.2 as aemm FROM public.ecr.aws/amazonlinux/amazonlinux:2 -RUN yum install -y jq && \ - yum install -y wget && \ +RUN amazon-linux-extras enable docker && \ + yum install -y jq containerd wget && \ wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 && \ chmod a+x /usr/local/bin/yq ENV IMDS_ENDPOINT=127.0.0.1:1338 COPY --from=aemm /ec2-metadata-mock /sbin/ec2-metadata-mock +RUN mkdir -p /etc/eks/containerd +COPY files/ /etc/eks/ +COPY files/containerd-config.toml files/kubelet-containerd.service files/pull-sandbox-image.sh files/sandbox-image.service /etc/eks/containerd/ COPY files/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json COPY files/kubelet-kubeconfig /var/lib/kubelet/kubeconfig COPY files/ecr-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config COPY test/entrypoint.sh /entrypoint.sh -COPY files /etc/eks COPY files/bin/* /usr/bin/ COPY test/mocks/ /sbin/ ENTRYPOINT ["/entrypoint.sh"] diff --git a/test/cases/container-runtime-defaults.sh b/test/cases/container-runtime-defaults.sh index d4aaf407e..8218867c5 100755 --- a/test/cases/container-runtime-defaults.sh +++ b/test/cases/container-runtime-defaults.sh @@ -2,7 +2,6 @@ set -euo pipefail exit_code=0 -TEMP_DIR=$(mktemp -d) echo "--> Should allow dockerd as container runtime when below k8s version 1.24" # This variable is used to override the default value in the kubelet mock diff --git a/test/cases/containerd-config.sh b/test/cases/containerd-config.sh new file mode 100755 index 000000000..048ea9dfe --- /dev/null +++ b/test/cases/containerd-config.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +set -euo pipefail + +exit_code=0 + +echo "--> Default containerd config file should be valid" +STDERR_FILE=$(mktemp) +containerd -c /etc/eks/containerd/containerd-config.toml config dump > /dev/null 2> "$STDERR_FILE" || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: default containerd config file is invalid! $(cat "$STDERR_FILE")" + exit 1 +fi + +echo "--> Should fail when given an invalid containerd config" +CONTAINERD_TOML=$(mktemp containerd-XXXXX.toml) +cat > "$CONTAINERD_TOML" << EOF +[cgroup] +path = "foo" +[cgroup] +path = "bar" +EOF + +export KUBELET_VERSION=v1.24.15-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --container-runtime containerd \ + --containerd-config-file "$CONTAINERD_TOML" \ + test || exit_code=$? + +if [[ ${exit_code} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi diff --git a/test/mocks/sudo b/test/mocks/sudo index e485cef2e..7bc3d2ffb 100755 --- a/test/mocks/sudo +++ b/test/mocks/sudo @@ -1,4 +1,4 @@ #!/usr/bin/env bash set -euo pipefail -echo "mocking sudo with params $@" +exec "$@" From 90c5e73fbda727d200dfd0d787337a3adfc55d13 Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 14 Nov 2022 21:05:27 -0800 Subject: [PATCH 362/621] Update CHANGELOG.md for release v20221112 (#1101) --- CHANGELOG.md | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b5d9cef46..a305f1061 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,52 @@ # Changelog +### AMI Release v20221112 +* amazon-eks-gpu-node-1.24-v20221112 +* amazon-eks-gpu-node-1.23-v20221112 +* amazon-eks-gpu-node-1.22-v20221112 +* amazon-eks-gpu-node-1.21-v20221112 +* amazon-eks-gpu-node-1.20-v20221112 +* amazon-eks-arm64-node-1.24-v20221112 +* amazon-eks-arm64-node-1.23-v20221112 +* amazon-eks-arm64-node-1.22-v20221112 +* amazon-eks-arm64-node-1.21-v20221112 +* amazon-eks-arm64-node-1.20-v20221112 +* amazon-eks-node-1.24-v20221112 +* amazon-eks-node-1.23-v20221112 +* amazon-eks-node-1.22-v20221112 +* amazon-eks-node-1.21-v20221112 +* amazon-eks-node-1.20-v20221112 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.24.7-20221112` +* `1.23.13-20221112` +* `1.22.15-20221112` +* `1.21.14-20221112` +* `1.20.15-20221112` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.24.7/2022-10-31/ +* s3://amazon-eks/1.23.13/2022-10-31/ +* s3://amazon-eks/1.22.15/2022-10-31/ +* s3://amazon-eks/1.21.14/2022-10-31/ +* s3://amazon-eks/1.20.15/2022-10-31/ + +AMI details: +* kernel: 5.4.219-126.411.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: runc-1.1.4-1.amzn2 +* cuda: 470.141.03-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +* Upgrades `runc` to version `1.1.4` +* Updates [aws-iam-authenticator](https://github.com/kubernetes-sigs/aws-iam-authenticator) to version `0.5.10` and updates `kubelet` versions to `1.22.15`, `1.23.13` and `1.24.7` +* [Updates `client.authentication.k8s.io` to `v1beta1`](https://github.com/awslabs/amazon-eks-ami/commit/ce1c11f9db5bf5a730e978e74e13174d4b9f73a3) +* [Updates credential provider API to beta for Kubernetes versions `1.24+`](https://github.com/awslabs/amazon-eks-ami/commit/a521047d1b097b9c3dbb562ca9bdab5a641f347f) +* [Installs awscli v2 bundle when possible](https://github.com/awslabs/amazon-eks-ami/commit/794ed5f10842b436e10c9bc89ee41491a6494ade) + ### AMI Release v20221104 * amazon-eks-gpu-node-1.24-v20221104 * amazon-eks-gpu-node-1.23-v20221104 From 524312f6d5f9763a641cbc4a2414ac0fcca38fb6 Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Thu, 17 Nov 2022 10:22:11 -0600 Subject: [PATCH 363/621] sym-link awscli to /bin (#1102) --- scripts/install-worker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 023c6cd84..6c3fa305a 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -132,7 +132,7 @@ if [[ "$BINARY_BUCKET_REGION" != "us-iso-east-1" && "$BINARY_BUCKET_REGION" != " --retry-delay 1 \ -L "https://awscli.amazonaws.com/awscli-exe-linux-${MACHINE}.zip" -o "${AWSCLI_DIR}/awscliv2.zip" unzip -q "${AWSCLI_DIR}/awscliv2.zip" -d ${AWSCLI_DIR} - sudo "${AWSCLI_DIR}/aws/install" + sudo "${AWSCLI_DIR}/aws/install" --bin-dir /bin/ else echo "Installing awscli package" sudo yum install -y awscli From 670b3f2a1a24fee9b44a962347b28ad7a8dd971d Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 18 Nov 2022 09:58:12 -0800 Subject: [PATCH 364/621] =?UTF-8?q?Add=201.24=20to=20=E2=80=98all=E2=80=99?= =?UTF-8?q?=20target=20(#1104)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 43dd76ca0..e66a9988b 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.20 1.21 1.22 1.23 ## Build all versions of EKS Optimized AL2 AMI +all: 1.20 1.21 1.22 1.23 1.24 ## Build all versions of EKS Optimized AL2 AMI # ensure that these flags are equivalent to the rules in the .editorconfig SHFMT_FLAGS := --list \ From 057f3e47b6c26f26f46a40d380112ea8598ca537 Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Fri, 18 Nov 2022 13:07:53 -0600 Subject: [PATCH 365/621] Cache pause, vpc-cni, and kube-proxy images during build (#938) --- Makefile | 2 +- README.md | 98 +++++++++++++++++++++ eks-worker-al2.json | 9 +- files/bootstrap.sh | 74 +++------------- files/get-ecr-uri.sh | 54 ++++++++++++ files/pull-image.sh | 27 ++++++ files/pull-sandbox-image.sh | 28 +----- scripts/install-worker.sh | 171 ++++++++++++++++++++++++++++-------- 8 files changed, 338 insertions(+), 125 deletions(-) create mode 100755 files/get-ecr-uri.sh create mode 100755 files/pull-image.sh diff --git a/Makefile b/Makefile index e66a9988b..f3986ee9a 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ PACKER_BINARY ?= packer -PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date kernel_version docker_version containerd_version runc_version cni_plugin_version source_ami_id source_ami_owners source_ami_filter_name arch instance_type security_group_id additional_yum_repos pull_cni_from_github sonobuoy_e2e_registry ami_regions volume_type +PACKER_VARIABLES := $(shell $(PACKER_BINARY) inspect -machine-readable eks-worker-al2.json | grep 'template-variable' | awk -F ',' '{print $$4}') K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) diff --git a/README.md b/README.md index 21c4b6641..1158ef702 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,104 @@ Provisioner](https://www.packer.io/docs/provisioners/shell.html) runs the necessary configuration tasks. Then, Packer creates an AMI from the instance and terminates the instance after the AMI is created. +### Container Image Caching + +Optionally, some container images can be cached during the AMI build process in order to reduce the latency of the node getting to a `Ready` state when launched. + +To turn on container image caching: + +``` +cache_container_images=true make 1.23 +``` + +When container image caching is enabled, the following images are cached: + - 602401143452.dkr.ecr..amazonaws.com/eks/kube-proxy:-eksbuild. + - 602401143452.dkr.ecr..amazonaws.com/eks/kube-proxy:-minimal-eksbuild. + - 602401143452.dkr.ecr..amazonaws.com/eks/pause:3.5 + - 602401143452.dkr.ecr..amazonaws.com/amazon-k8s-cni-init: + - 602401143452.dkr.ecr..amazonaws.com/amazon-k8s-cni: + +The account ID can be different depending on the region and partition you are building the AMI in. See [here](https://docs.aws.amazon.com/eks/latest/userguide/add-ons-images.html) for more details. + +Since the VPC CNI is not versioned with K8s itself, the latest version of the VPC CNI and the default version, based on the response from the EKS DescribeAddonVersions at the time of the AMI build, will be cached. + +The images listed above are also tagged with each region in the partition the AMI is built in, since images are often built in one region and copied to others within the same partition. Images that are available to pull from an ECR FIPS endpoint are also tagged as such (i.e. `602401143452.dkr.ecr-fips.us-east-1.amazonaws.com/eks/pause:3.5`). + +When listing images on a node, you'll notice a long list of images. However, most of these images are simply tagged in different ways with no storage overhead. Images cached in the AMI total around 1.0 GiB. In general, a node with no images cached using the VPC CNI will use around 500 MiB of images when in a `Ready` state with no other pods running on the node. + +### IAM Permissions + +To build the EKS Optimized AMI, you will need the following permissions: + +``` +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "ec2:AttachVolume", + "ec2:AuthorizeSecurityGroupIngress", + "ec2:CopyImage", + "ec2:CreateImage", + "ec2:CreateKeypair", + "ec2:CreateSecurityGroup", + "ec2:CreateSnapshot", + "ec2:CreateTags", + "ec2:CreateVolume", + "ec2:DeleteKeyPair", + "ec2:DeleteSecurityGroup", + "ec2:DeleteSnapshot", + "ec2:DeleteVolume", + "ec2:DeregisterImage", + "ec2:DescribeImageAttribute", + "ec2:DescribeImages", + "ec2:DescribeInstances", + "ec2:DescribeInstanceStatus", + "ec2:DescribeRegions", + "ec2:DescribeSecurityGroups", + "ec2:DescribeSnapshots", + "ec2:DescribeSubnets", + "ec2:DescribeTags", + "ec2:DescribeVolumes", + "ec2:DetachVolume", + "ec2:GetPasswordData", + "ec2:ModifyImageAttribute", + "ec2:ModifyInstanceAttribute", + "ec2:ModifySnapshotAttribute", + "ec2:RegisterImage", + "ec2:RunInstances", + "ec2:StopInstances", + "ec2:TerminateInstances", + "eks:DescribeAddonVersions", + "ecr:GetAuthorizationToken" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "ecr:BatchGetImage", + "ecr:BatchCheckLayerAvailability", + "ecr:GetDownloadUrlForLayer" + ], + "Resource": "arn:aws:ecr:us-west-2:602401143452:repository/*" + }, + { + "Effect": "Allow", + "Action": [ + "s3:GetObject" + ], + "Resource": "arn:aws:s3:::amazon-eks/*" + } + ] +} +``` + +You will need to use the region you are building the AMI in to specify the ECR repository resource in the second IAM statement. You may also need to change the account if you are building the AMI in a different partition or special region. You can see a mapping of regions to account ID [here](https://docs.aws.amazon.com/eks/latest/userguide/add-ons-images.html). +If you're using a custom s3 bucket to vend different K8s binaries, you will need to change the resource in the third IAM statement above to reference your custom bucket. +For more information about the permissions required by Packer with different configurations, see the [docs](https://www.packer.io/plugins/builders/amazon#iam-task-or-instance-role). + ## Using the AMI If you are just getting started with Amazon EKS, we recommend that you follow diff --git a/eks-worker-al2.json b/eks-worker-al2.json index dda34dd39..6d3278cb0 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -13,6 +13,7 @@ "aws_session_token": "{{env `AWS_SESSION_TOKEN`}}", "binary_bucket_name": "amazon-eks", "binary_bucket_region": "us-west-2", + "cache_container_images": "false", "cni_plugin_version": "v0.8.6", "containerd_version": "1.6.6-1.amzn2.0.2", "creator": "{{env `USER`}}", @@ -23,7 +24,8 @@ "kms_key_id": "", "kubernetes_build_date": null, "kubernetes_version": null, - "launch_block_device_mappings_volume_size": "4", + "launch_block_device_mappings_volume_size": "8", + "pause_container_version": "3.5", "pull_cni_from_github": "true", "remote_folder": "", "runc_version": "1.1.3-1.amzn2.0.2", @@ -161,7 +163,10 @@ "AWS_ACCESS_KEY_ID={{user `aws_access_key_id`}}", "AWS_SECRET_ACCESS_KEY={{user `aws_secret_access_key`}}", "AWS_SESSION_TOKEN={{user `aws_session_token`}}", - "SONOBUOY_E2E_REGISTRY={{user `sonobuoy_e2e_registry`}}" + "SONOBUOY_E2E_REGISTRY={{user `sonobuoy_e2e_registry`}}", + "PAUSE_CONTAINER_VERSION={{user `pause_container_version`}}", + "KUBE_PROXY_VERSION_SUFFIX={{user `kube_proxy_version_suffix`}}", + "CACHE_CONTAINER_IMAGES={{user `cache_container_images`}}" ] }, { diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 76228fa63..78e9d4fab 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -178,51 +178,6 @@ SERVICE_IPV6_CIDR="${SERVICE_IPV6_CIDR:-}" ENABLE_LOCAL_OUTPOST="${ENABLE_LOCAL_OUTPOST:-}" CLUSTER_ID="${CLUSTER_ID:-}" -function get_pause_container_account_for_region() { - local region="$1" - case "${region}" in - ap-east-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-800184023465}" - ;; - me-south-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-558608220178}" - ;; - cn-north-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-918309763551}" - ;; - cn-northwest-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-961992271922}" - ;; - us-gov-west-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-013241004608}" - ;; - us-gov-east-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-151742754352}" - ;; - us-iso-east-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-725322719131}" - ;; - us-isob-east-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-187977181151}" - ;; - af-south-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-877085696533}" - ;; - eu-south-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-590381155156}" - ;; - ap-southeast-3) - echo "${PAUSE_CONTAINER_ACCOUNT:-296578399912}" - ;; - me-central-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-759879836304}" - ;; - *) - echo "${PAUSE_CONTAINER_ACCOUNT:-602401143452}" - ;; - esac -} - # Helper function which calculates the amount of the given resource (either CPU or memory) # to reserve in a given resource range, specified by a start and end of the range and a percentage # of the resource to reserve. Note that we return zero if the start of the resource range is @@ -314,8 +269,8 @@ if [[ "$MACHINE" != "x86_64" && "$MACHINE" != "aarch64" ]]; then exit 1 fi -PAUSE_CONTAINER_ACCOUNT=$(get_pause_container_account_for_region "${AWS_DEFAULT_REGION}") -PAUSE_CONTAINER_IMAGE=${PAUSE_CONTAINER_IMAGE:-$PAUSE_CONTAINER_ACCOUNT.dkr.ecr.$AWS_DEFAULT_REGION.$AWS_SERVICES_DOMAIN/eks/pause} +ECR_URI=$(/etc/eks/get-ecr-uri.sh "${AWS_DEFAULT_REGION}" "${AWS_SERVICES_DOMAIN}" "${PAUSE_CONTAINER_ACCOUNT:-}") +PAUSE_CONTAINER_IMAGE=${PAUSE_CONTAINER_IMAGE:-$ECR_URI/eks/pause} PAUSE_CONTAINER="$PAUSE_CONTAINER_IMAGE:$PAUSE_CONTAINER_VERSION" ### kubelet kubeconfig @@ -525,29 +480,26 @@ if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then sudo mkdir -p /etc/containerd sudo mkdir -p /etc/cni/net.d - mkdir -p /etc/systemd/system/containerd.service.d - cat << EOF > /etc/systemd/system/containerd.service.d/10-compat-symlink.conf -[Service] -ExecStartPre=/bin/ln -sf /run/containerd/containerd.sock /run/dockershim.sock -EOF if [[ -n "$CONTAINERD_CONFIG_FILE" ]]; then sudo cp -v $CONTAINERD_CONFIG_FILE /etc/eks/containerd/containerd-config.toml fi echo "$(jq '.cgroupDriver="systemd"' $KUBELET_CONFIG)" > $KUBELET_CONFIG sudo sed -i s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g /etc/eks/containerd/containerd-config.toml - sudo cp -v /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml - sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service + + # Check if the containerd config file is the same as the one used in the image build. + # If different, then restart containerd w/ proper config + if ! cmp -s /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml; then + sudo cp -v /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml + sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service + sudo chown root:root /etc/systemd/system/sandbox-image.service + systemctl daemon-reload + systemctl enable containerd sandbox-image + systemctl restart sandbox-image containerd + fi sudo cp -v /etc/eks/containerd/kubelet-containerd.service /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/kubelet.service - sudo chown root:root /etc/systemd/system/sandbox-image.service # Validate containerd config sudo containerd config dump > /dev/null - systemctl daemon-reload - systemctl enable containerd - systemctl restart containerd - systemctl enable sandbox-image - systemctl start sandbox-image - elif [[ "$CONTAINER_RUNTIME" = "dockerd" ]]; then mkdir -p /etc/docker bash -c "/sbin/iptables-save > /etc/sysconfig/iptables" diff --git a/files/get-ecr-uri.sh b/files/get-ecr-uri.sh new file mode 100755 index 000000000..f5e87b932 --- /dev/null +++ b/files/get-ecr-uri.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +set -euo pipefail + +# More details about the mappings in this file can be found here https://docs.aws.amazon.com/eks/latest/userguide/add-ons-images.html + +region=$1 +aws_domain=$2 +if [[ $# -eq 3 ]] && [[ ! -z $3 ]]; then + acct=$3 +else + case "${region}" in + ap-east-1) + acct="800184023465" + ;; + me-south-1) + acct="558608220178" + ;; + cn-north-1) + acct="918309763551" + ;; + cn-northwest-1) + acct="961992271922" + ;; + us-gov-west-1) + acct="013241004608" + ;; + us-gov-east-1) + acct="151742754352" + ;; + us-iso-east-1) + acct="725322719131" + ;; + us-isob-east-1) + acct="187977181151" + ;; + af-south-1) + acct="877085696533" + ;; + eu-south-1) + acct="590381155156" + ;; + ap-southeast-3) + acct="296578399912" + ;; + me-central-1) + acct="759879836304" + ;; + *) + acct="602401143452" + ;; + esac +fi + +echo "${acct}.dkr.ecr.${region}.${aws_domain}" diff --git a/files/pull-image.sh b/files/pull-image.sh new file mode 100755 index 000000000..2d37d88b9 --- /dev/null +++ b/files/pull-image.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +img=$1 +region=$(echo "${img}" | cut -f4 -d ".") +MAX_RETRIES=3 + +function retry() { + local rc=0 + for attempt in $(seq 0 $MAX_RETRIES); do + rc=0 + [[ $attempt -gt 0 ]] && echo "Attempt $attempt of $MAX_RETRIES" 1>&2 + "$@" + rc=$? + [[ $rc -eq 0 ]] && break + [[ $attempt -eq $MAX_RETRIES ]] && exit $rc + local jitter=$((1 + RANDOM % 10)) + local sleep_sec="$(($((5 << $((1 + $attempt)))) + $jitter))" + sleep $sleep_sec + done +} + +ecr_password=$(retry aws ecr get-login-password --region $region) +if [[ -z ${ecr_password} ]]; then + echo >&2 "Unable to retrieve the ECR password." + exit 1 +fi +retry sudo ctr --namespace k8s.io image pull "${img}" --user AWS:${ecr_password} diff --git a/files/pull-sandbox-image.sh b/files/pull-sandbox-image.sh index 270be7d32..523e7ed4c 100644 --- a/files/pull-sandbox-image.sh +++ b/files/pull-sandbox-image.sh @@ -1,27 +1,5 @@ #!/usr/bin/env bash +set -euo pipefail -### fetching sandbox image from /etc/containerd/config.toml -sandbox_image=$(awk -F'[ ="]+' '$1 == "sandbox_image" { print $2 }' /etc/containerd/config.toml) -region=$(echo "$sandbox_image" | cut -f4 -d ".") -ecr_password=$(aws ecr get-login-password --region $region) -API_RETRY_ATTEMPTS=5 - -for attempt in $(seq 0 $API_RETRY_ATTEMPTS); do - rc=0 - if [[ $attempt -gt 0 ]]; then - echo "Attempt $attempt of $API_RETRY_ATTEMPTS" - fi - ### pull sandbox image from ecr - ### username will always be constant i.e; AWS - sudo ctr --namespace k8s.io image pull $sandbox_image --user AWS:$ecr_password - rc=$? - if [[ $rc -eq 0 ]]; then - break - fi - if [[ $attempt -eq $API_RETRY_ATTEMPTS ]]; then - exit $rc - fi - jitter=$((1 + RANDOM % 10)) - sleep_sec="$(($((5 << $((1 + $attempt)))) + $jitter))" - sleep $sleep_sec -done +sandbox_image="$(awk -F'[ ="]+' '$1 == "sandbox_image" { print $2 }' /etc/containerd/config.toml)" +/etc/eks/containerd/pull-image.sh "${sandbox_image}" diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 6c3fa305a..154e6dfa3 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -4,6 +4,7 @@ set -o pipefail set -o nounset set -o errexit IFS=$'\n\t' +export AWS_DEFAULT_OUTPUT="json" TEMPLATE_DIR=${TEMPLATE_DIR:-/tmp/worker} @@ -30,6 +31,8 @@ validate_env_set CNI_PLUGIN_VERSION validate_env_set KUBERNETES_VERSION validate_env_set KUBERNETES_BUILD_DATE validate_env_set PULL_CNI_FROM_GITHUB +validate_env_set PAUSE_CONTAINER_VERSION +validate_env_set CACHE_CONTAINER_IMAGES ################################################################################ ### Machine Architecture ####################################################### @@ -138,46 +141,18 @@ else sudo yum install -y awscli fi -################################################################################ -### Docker ##################################################################### -################################################################################ - -sudo yum install -y device-mapper-persistent-data lvm2 - -INSTALL_DOCKER="${INSTALL_DOCKER:-true}" -if [[ "$INSTALL_DOCKER" == "true" ]]; then - sudo amazon-linux-extras enable docker - sudo groupadd -og 1950 docker - sudo useradd --gid $(getent group docker | cut -d: -f3) docker - - # install runc and lock version - sudo yum install -y runc-${RUNC_VERSION} - sudo yum versionlock runc-* - - # install containerd and lock version - sudo yum install -y containerd-${CONTAINERD_VERSION} - sudo yum versionlock containerd-* - - # install docker and lock version - sudo yum install -y docker-${DOCKER_VERSION}* - sudo yum versionlock docker-* - sudo usermod -aG docker $USER - - # Remove all options from sysconfig docker. - sudo sed -i '/OPTIONS/d' /etc/sysconfig/docker - - sudo mkdir -p /etc/docker - sudo mv $TEMPLATE_DIR/docker-daemon.json /etc/docker/daemon.json - sudo chown root:root /etc/docker/daemon.json - - # Enable docker daemon to start on boot. - sudo systemctl daemon-reload -fi - ############################################################################### ### Containerd setup ########################################################## ############################################################################### +# install runc and lock version +sudo yum install -y runc-${RUNC_VERSION} +sudo yum versionlock runc-* + +# install containerd and lock version +sudo yum install -y containerd-${CONTAINERD_VERSION} +sudo yum versionlock containerd-* + sudo mkdir -p /etc/eks/containerd if [ -f "/etc/eks/containerd/containerd-config.toml" ]; then ## this means we are building a gpu ami and have already placed a containerd configuration file in /etc/eks @@ -195,7 +170,15 @@ fi sudo mv $TEMPLATE_DIR/kubelet-containerd.service /etc/eks/containerd/kubelet-containerd.service sudo mv $TEMPLATE_DIR/sandbox-image.service /etc/eks/containerd/sandbox-image.service sudo mv $TEMPLATE_DIR/pull-sandbox-image.sh /etc/eks/containerd/pull-sandbox-image.sh +sudo mv $TEMPLATE_DIR/pull-image.sh /etc/eks/containerd/pull-image.sh sudo chmod +x /etc/eks/containerd/pull-sandbox-image.sh +sudo chmod +x /etc/eks/containerd/pull-image.sh + +sudo mkdir -p /etc/systemd/system/containerd.service.d +cat << EOF | sudo tee /etc/systemd/system/containerd.service.d/10-compat-symlink.conf +[Service] +ExecStartPre=/bin/ln -sf /run/containerd/containerd.sock /run/dockershim.sock +EOF cat << EOF | sudo tee -a /etc/modules-load.d/containerd.conf overlay @@ -208,6 +191,34 @@ net.bridge.bridge-nf-call-iptables = 1 net.ipv4.ip_forward = 1 EOF +################################################################################ +### Docker ##################################################################### +################################################################################ + +sudo yum install -y device-mapper-persistent-data lvm2 + +INSTALL_DOCKER="${INSTALL_DOCKER:-true}" +if [[ "$INSTALL_DOCKER" == "true" ]]; then + sudo amazon-linux-extras enable docker + sudo groupadd -og 1950 docker + sudo useradd --gid $(getent group docker | cut -d: -f3) docker + + # install docker and lock version + sudo yum install -y docker-${DOCKER_VERSION}* + sudo yum versionlock docker-* + sudo usermod -aG docker $USER + + # Remove all options from sysconfig docker. + sudo sed -i '/OPTIONS/d' /etc/sysconfig/docker + + sudo mkdir -p /etc/docker + sudo mv $TEMPLATE_DIR/docker-daemon.json /etc/docker/daemon.json + sudo chown root:root /etc/docker/daemon.json + + # Enable docker daemon to start on boot. + sudo systemctl daemon-reload +fi + ################################################################################ ### Logrotate ################################################################## ################################################################################ @@ -331,6 +342,8 @@ sudo systemctl disable kubelet ################################################################################ sudo mkdir -p /etc/eks +sudo mv $TEMPLATE_DIR/get-ecr-uri.sh /etc/eks/get-ecr-uri.sh +sudo chmod +x /etc/eks/get-ecr-uri.sh sudo mv $TEMPLATE_DIR/eni-max-pods.txt /etc/eks/eni-max-pods.txt sudo mv $TEMPLATE_DIR/bootstrap.sh /etc/eks/bootstrap.sh sudo chmod +x /etc/eks/bootstrap.sh @@ -363,6 +376,92 @@ if vercmp "$KUBERNETES_VERSION" gteq "1.22.0"; then sudo mv $TEMPLATE_DIR/ecr-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config fi +################################################################################ +### Cache Images ############################################################### +################################################################################ +if [[ "$CACHE_CONTAINER_IMAGES" == "true" && "$BINARY_BUCKET_REGION" != "us-iso-east-1" && "$BINARY_BUCKET_REGION" != "us-isob-east-1" ]]; then + AWS_DOMAIN=$(imds 'latest/meta-data/services/domain') + ECR_URI=$(/etc/eks/get-ecr-uri.sh "${BINARY_BUCKET_REGION}" "${AWS_DOMAIN}") + + PAUSE_CONTAINER="${ECR_URI}/eks/pause:${PAUSE_CONTAINER_VERSION}" + cat /etc/eks/containerd/containerd-config.toml | sed s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g | sudo tee /etc/eks/containerd/containerd-cached-pause-config.toml + sudo cp -v /etc/eks/containerd/containerd-cached-pause-config.toml /etc/containerd/config.toml + sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service + sudo chown root:root /etc/systemd/system/sandbox-image.service + sudo systemctl daemon-reload + sudo systemctl start containerd + sudo systemctl enable containerd sandbox-image + + K8S_MINOR_VERSION=$(echo "${KUBERNETES_VERSION}" | cut -d'.' -f1-2) + KUBE_PROXY_ADDON_VERSIONS=$(aws eks describe-addon-versions --addon-name kube-proxy --kubernetes-version=${K8S_MINOR_VERSION}) + + DEFAULT_KUBE_PROXY_FULL_VERSION=$(echo "${KUBE_PROXY_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] | select(.compatibilities[] .defaultVersion==true).addonVersion') + DEFAULT_KUBE_PROXY_VERSION=$(echo "${DEFAULT_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f1) + DEFAULT_KUBE_PROXY_PLATFORM_VERSION=$(echo "${DEFAULT_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f2) + + LATEST_KUBE_PROXY_FULL_VERSION=$(echo "${KUBE_PROXY_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] .addonVersion' | sort -V | tail -n1) + LATEST_KUBE_PROXY_VERSION=$(echo "${LATEST_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f1) + LATEST_KUBE_PROXY_PLATFORM_VERSION=$(echo "${LATEST_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f2) + + KUBE_PROXY_IMGS=( + ## Default kube-proxy images + "${ECR_URI}/eks/kube-proxy:${DEFAULT_KUBE_PROXY_VERSION}-${DEFAULT_KUBE_PROXY_PLATFORM_VERSION}" + "${ECR_URI}/eks/kube-proxy:${DEFAULT_KUBE_PROXY_VERSION}-minimal-${DEFAULT_KUBE_PROXY_PLATFORM_VERSION}" + + ## Latest kube-proxy images + "${ECR_URI}/eks/kube-proxy:${LATEST_KUBE_PROXY_VERSION}-${LATEST_KUBE_PROXY_PLATFORM_VERSION}" + "${ECR_URI}/eks/kube-proxy:${LATEST_KUBE_PROXY_VERSION}-minimal-${LATEST_KUBE_PROXY_PLATFORM_VERSION}" + ) + + #### Cache VPC CNI images starting with the addon default version and the latest version + VPC_CNI_ADDON_VERSIONS=$(aws eks describe-addon-versions --addon-name vpc-cni --kubernetes-version=${K8S_MINOR_VERSION}) + DEFAULT_VPC_CNI_VERSION=$(echo "${VPC_CNI_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] | select(.compatibilities[] .defaultVersion==true).addonVersion') + LATEST_VPC_CNI_VERSION=$(echo "${VPC_CNI_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] .addonVersion' | sort -V | tail -n1) + CNI_IMG="${ECR_URI}/amazon-k8s-cni" + CNI_INIT_IMG="${CNI_IMG}-init" + CNI_IMGS=( + ## Default VPC CNI Images + "${CNI_IMG}:${DEFAULT_VPC_CNI_VERSION}" + "${CNI_INIT_IMG}:${DEFAULT_VPC_CNI_VERSION}" + + ## Latest VPC CNI Images + "${CNI_IMG}:${LATEST_VPC_CNI_VERSION}" + "${CNI_INIT_IMG}:${LATEST_VPC_CNI_VERSION}" + ) + + CACHED_IMGS=( + "${PAUSE_CONTAINER}" + ${KUBE_PROXY_IMGS[@]} + ${CNI_IMGS[@]} + ) + + for img in "${CACHED_IMGS[@]}"; do + ## only kube-proxy-minimal is vended for K8s 1.24+ + if [[ "${img}" == *"kube-proxy:"* ]] && [[ "${img}" != *"-minimal-"* ]] && vercmp "${K8S_MINOR_VERSION}" gteq "1.24"; then + continue + fi + /etc/eks/containerd/pull-image.sh "${img}" + done + + #### Tag the pulled down image for all other regions in the partition + for region in $(aws ec2 describe-regions --all-regions | jq -r '.Regions[] .RegionName'); do + for img in "${CACHED_IMGS[@]}"; do + regional_img="${img/$BINARY_BUCKET_REGION/$region}" + sudo ctr -n k8s.io image tag "${img}" "${regional_img}" || : + ## Tag ECR fips endpoint for supported regions + if [[ "${region}" =~ (us-east-1|us-east-2|us-west-1|us-west-2|us-gov-east-1|us-gov-east-2) ]]; then + regional_fips_img="${regional_img/.ecr./.ecr-fips.}" + sudo ctr -n k8s.io image tag "${img}" "${regional_fips_img}" || : + sudo ctr -n k8s.io image tag "${img}" "${regional_fips_img/-eksbuild.1/}" || : + fi + ## Cache the non-addon VPC CNI images since "v*.*.*-eksbuild.1" is equivalent to leaving off the eksbuild suffix + if [[ "${img}" == *"-cni"*"-eksbuild.1" ]]; then + sudo ctr -n k8s.io image tag "${img}" "${regional_img/-eksbuild.1/}" || : + fi + done + done +fi + ################################################################################ ### SSM Agent ################################################################## ################################################################################ From 21870b9f3be2c25e3a95b3adc6fef831e65bda40 Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Fri, 18 Nov 2022 13:09:41 -0600 Subject: [PATCH 366/621] Disable yum updates in cloud-init (#1074) --- scripts/install-worker.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 154e6dfa3..669eb4330 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -501,7 +501,14 @@ echo fs.inotify.max_user_instances=8192 | sudo tee -a /etc/sysctl.conf echo vm.max_map_count=524288 | sudo tee -a /etc/sysctl.conf ################################################################################ -### adding log-collector-script ############################################### +### adding log-collector-script ################################################ ################################################################################ sudo mkdir -p /etc/eks/log-collector-script/ sudo cp $TEMPLATE_DIR/log-collector-script/eks-log-collector.sh /etc/eks/log-collector-script/ + +################################################################################ +### Remove Yum Update from cloud-init config ################################### +################################################################################ +sudo sed -i \ + 's/ - package-update-upgrade-install/# Removed so that nodes do not have version skew based on when the node was started.\n# - package-update-upgrade-install/' \ + /etc/cloud/cloud.cfg From 5fcd34e7d5bf5666e5b86120421ee3111ac5439b Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 21 Nov 2022 10:30:04 -0800 Subject: [PATCH 367/621] Move variable defaults to -var-file (#1079) --- Makefile | 12 ++++-- README.md | 12 ++++++ eks-worker-al2-variables.json | 36 ++++++++++++++++++ eks-worker-al2.json | 71 ++++++++++++++++++----------------- 4 files changed, 93 insertions(+), 38 deletions(-) create mode 100644 eks-worker-al2-variables.json diff --git a/Makefile b/Makefile index f3986ee9a..6eb4e4b67 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ PACKER_BINARY ?= packer -PACKER_VARIABLES := $(shell $(PACKER_BINARY) inspect -machine-readable eks-worker-al2.json | grep 'template-variable' | awk -F ',' '{print $$4}') +AVAILABLE_PACKER_VARIABLES := $(shell $(PACKER_BINARY) inspect -machine-readable eks-worker-al2.json | grep 'template-variable' | awk -F ',' '{print $$4}') K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) @@ -65,14 +65,20 @@ lint: ## Check the source files for syntax and format issues test: ## run the test-harness test/test-harness.sh +# include only variables which have a defined value +PACKER_VARIABLES := $(foreach packerVar,$(AVAILABLE_PACKER_VARIABLES),$(if $($(packerVar)),$(packerVar))) +PACKER_VAR_FLAGS := -var-file eks-worker-al2-variables.json \ +$(if $(PACKER_VARIABLE_FILE),--var-file=$(PACKER_VARIABLE_FILE),) \ +$(foreach packerVar,$(PACKER_VARIABLES),-var $(packerVar)='$($(packerVar))') + .PHONY: validate validate: ## Validate packer config - $(PACKER_BINARY) validate $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json + $(PACKER_BINARY) validate $(PACKER_VAR_FLAGS) eks-worker-al2.json .PHONY: k8s k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI @echo "$(T_GREEN)Building AMI for version $(T_YELLOW)$(kubernetes_version)$(T_GREEN) on $(T_YELLOW)$(arch)$(T_RESET)" - $(PACKER_BINARY) build -timestamp-ui -color=false $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json + $(PACKER_BINARY) build -timestamp-ui -color=false $(PACKER_VAR_FLAGS) eks-worker-al2.json # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html diff --git a/README.md b/README.md index 1158ef702..04f1bca1d 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,18 @@ To build an Amazon EKS Worker AMI for a particular Kubernetes version run the fo ```bash make 1.23 ## Build a Amazon EKS Worker AMI for k8s 1.23 ``` + +### AMI template variables + +Default values for most variables are defined in [a default variable file](eks-worker-al2-variables.json). + +Users have the following options for specifying their own values: + +1. Provide a variable file with the `PACKER_VARIABLE_FILE` argument to `make`. Values in this file will override values in the default variable file. Your variable file does not need to include all possible variables, as it will be merged with the default variable file. +2. Pass a key-value pair for any template variable to `make`. These values will override any values specified using the first method. + +**Note** that some variables (such as `arch` and `kubernetes_version`) do not have a sensible, static default, and are satisfied by the Makefile. Such variables do not appear in the default variable file, and must be overridden (if necessary) by the second method described above. + ### Building against other versions of Kubernetes binaries To build an Amazon EKS Worker AMI with other versions of Kubernetes that are not listed above run the following AWS Command Line Interface (AWS CLI) commands to obtain values for KUBERNETES_VERSION, KUBERNETES_BUILD_DATE, PLATFORM, ARCH from S3 diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json new file mode 100644 index 000000000..5f1caaa0b --- /dev/null +++ b/eks-worker-al2-variables.json @@ -0,0 +1,36 @@ +{ + "additional_yum_repos": "", + "ami_description": "EKS Kubernetes Worker AMI with AmazonLinux2 image", + "ami_regions": "", + "ami_users": "", + "associate_public_ip_address": "", + "aws_access_key_id": "{{env `AWS_ACCESS_KEY_ID`}}", + "aws_region": "us-west-2", + "aws_secret_access_key": "{{env `AWS_SECRET_ACCESS_KEY`}}", + "aws_session_token": "{{env `AWS_SESSION_TOKEN`}}", + "binary_bucket_name": "amazon-eks", + "binary_bucket_region": "us-west-2", + "cache_container_images": "false", + "cni_plugin_version": "v0.8.6", + "containerd_version": "1.6.6-1.amzn2.0.2", + "creator": "{{env `USER`}}", + "docker_version": "20.10.17-1.amzn2.0.1", + "encrypted": "false", + "kernel_version": "", + "kms_key_id": "", + "launch_block_device_mappings_volume_size": "8", + "pause_container_version": "3.5", + "pull_cni_from_github": "true", + "remote_folder": "", + "runc_version": "1.1.3-1.amzn2.0.2", + "security_group_id": "", + "sonobuoy_e2e_registry": "", + "source_ami_filter_name": "amzn2-ami-minimal-hvm-*", + "source_ami_id": "", + "source_ami_owners": "137112412989", + "ssh_interface": "", + "ssh_username": "ec2-user", + "subnet_id": "", + "temporary_security_group_source_cidrs": "", + "volume_type": "gp2" +} \ No newline at end of file diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 6d3278cb0..96394457c 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -1,44 +1,45 @@ { + "_comment": "All template variables are enumerated here; and most variables have a default value defined in eks-worker-al2-variables.json", "variables": { - "additional_yum_repos": "", - "ami_description": "EKS Kubernetes Worker AMI with AmazonLinux2 image", + "additional_yum_repos": null, + "ami_description": null, "ami_name": null, - "ami_regions": "", - "ami_users": "", + "ami_regions": null, + "ami_users": null, "arch": null, - "associate_public_ip_address": "", - "aws_access_key_id": "{{env `AWS_ACCESS_KEY_ID`}}", - "aws_region": "us-west-2", - "aws_secret_access_key": "{{env `AWS_SECRET_ACCESS_KEY`}}", - "aws_session_token": "{{env `AWS_SESSION_TOKEN`}}", - "binary_bucket_name": "amazon-eks", - "binary_bucket_region": "us-west-2", - "cache_container_images": "false", - "cni_plugin_version": "v0.8.6", - "containerd_version": "1.6.6-1.amzn2.0.2", - "creator": "{{env `USER`}}", - "docker_version": "20.10.17-1.amzn2.0.1", - "encrypted": "false", + "associate_public_ip_address": null, + "aws_access_key_id": null, + "aws_region": null, + "aws_secret_access_key": null, + "aws_session_token": null, + "binary_bucket_name": null, + "binary_bucket_region": null, + "cache_container_images": null, + "cni_plugin_version": null, + "containerd_version": null, + "creator": null, + "docker_version": null, + "encrypted": null, "instance_type": null, - "kernel_version": "", - "kms_key_id": "", + "kernel_version": null, + "kms_key_id": null, "kubernetes_build_date": null, "kubernetes_version": null, - "launch_block_device_mappings_volume_size": "8", - "pause_container_version": "3.5", - "pull_cni_from_github": "true", - "remote_folder": "", - "runc_version": "1.1.3-1.amzn2.0.2", - "security_group_id": "", - "sonobuoy_e2e_registry": "", - "source_ami_filter_name": "amzn2-ami-minimal-hvm-*", - "source_ami_id": "", - "source_ami_owners": "137112412989", - "ssh_interface": "", - "ssh_username": "ec2-user", - "subnet_id": "", - "temporary_security_group_source_cidrs": "", - "volume_type": "gp2" + "launch_block_device_mappings_volume_size": null, + "pause_container_version": null, + "pull_cni_from_github": null, + "remote_folder": null, + "runc_version": null, + "security_group_id": null, + "sonobuoy_e2e_registry": null, + "source_ami_filter_name": null, + "source_ami_id": null, + "source_ami_owners": null, + "ssh_interface": null, + "ssh_username": null, + "subnet_id": null, + "temporary_security_group_source_cidrs": null, + "volume_type": null }, "builders": [ { @@ -223,4 +224,4 @@ } } ] -} +} \ No newline at end of file From 5e18bbbdb987ba384a21ad1a8c08e0abf7544332 Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 21 Nov 2022 16:53:39 -0800 Subject: [PATCH 368/621] Reorganize documentation (#1105) --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- PLAN.md | 30 -- README.md | 252 +---------------- USER_GUIDE.md | 25 -- CHANGELOG.md => doc/CHANGELOG.md | 0 CODE_OF_CONDUCT.md => doc/CODE_OF_CONDUCT.md | 0 CONTRIBUTING.md => doc/CONTRIBUTING.md | 0 doc/USER_GUIDE.md | 276 +++++++++++++++++++ 8 files changed, 290 insertions(+), 295 deletions(-) delete mode 100644 PLAN.md delete mode 100644 USER_GUIDE.md rename CHANGELOG.md => doc/CHANGELOG.md (100%) rename CODE_OF_CONDUCT.md => doc/CODE_OF_CONDUCT.md (100%) rename CONTRIBUTING.md => doc/CONTRIBUTING.md (100%) create mode 100644 doc/USER_GUIDE.md diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 09b7a68ad..360447696 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -11,4 +11,4 @@ By submitting this pull request, I confirm that you can use, modify, copy, and r -*[See this guide for recommended testing for PRs.](https://github.com/awslabs/amazon-eks-ami/blob/master/CONTRIBUTING.md#testing-changes) Some tests may not apply. Completing tests and providing additional validation steps are not required, but it is recommended and may reduce review time and time to merge.* +*[See this guide for recommended testing for PRs.](../doc/CONTRIBUTING.md#testing-changes) Some tests may not apply. Completing tests and providing additional validation steps are not required, but it is recommended and may reduce review time and time to merge.* diff --git a/PLAN.md b/PLAN.md deleted file mode 100644 index c60bb07d7..000000000 --- a/PLAN.md +++ /dev/null @@ -1,30 +0,0 @@ -### Goal - -The goal of this plan is to move the `amazon-eks-ami` package from Amazon Web Services - Labs to Amazon Web Services. EKS and EKS customers depend on this package to build and vend AMIs used in production, and while EKS does test the AMIs before releasing, we'd like to enable more rigorous testing and provide customers more visibility into the process AMIs go through before releasing. - -To achieve the higher level goal of moving the project to an AWS project, here are the following goals: - -1. As much as possible, move all scripts, processes, etc. to the open by including maintaining all related scripts in a GitHub repo and using common tools for testing and releasing OSS software. -1. Implement processes that enable timely support for issues and PRs -1. Improve the safety and reliablity of releases by improving testing - -### Stage 1: Improve GitHub Repo Hygiene - -1. Create GitHub project board for tracking progress on current stage -1. Create GitHub project roadmap, similar to [this one](https://github.com/aws/aws-controllers-k8s/projects/1) -1. Triage 100% of current GitHub issues and set SLA to 3 days going forward -1. Review 100% of current PRs and set SLA to 3 days going forward for initial review -1. Update README.md so that customers are comfortable building AMIs, understand how it works and know how to test custom AMIs manually - -### Stage 2: Improve Safety and Reliability - -1. Build AMIs as part of PR process -1. Enable running Kubernetes conformance tests (or similar) with built AMIs -1. Enable adding additional tests to validate built AMIs -1. Run end-to-end tests are part of the PR process -1. All EKS Linux AMIs can be built from GitHub repo, including ARM, GPU, Bottlerocket, etc. - -### Stage 3: Productionalize Release Process - -1. Customers have some visibility into releases and the release process -1. New AMIs are built and released from the GitHub repo automatically, either on a schedule or after PRs are merged diff --git a/README.md b/README.md index 04f1bca1d..97baeed2f 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,16 @@ custom Amazon EKS AMI with [HashiCorp Packer](https://www.packer.io/). This is the same configuration that Amazon EKS uses to create the official Amazon EKS-optimized AMI. -## Setup +## Getting started + +If you are just getting started with Amazon EKS, we recommend that you follow +our [Getting Started](https://docs.aws.amazon.com/eks/latest/userguide/getting-started.html) +chapter in the Amazon EKS User Guide. If you already have a cluster, and you +want to launch a node group with your new AMI, see [Launching Amazon EKS Worker +Nodes](https://docs.aws.amazon.com/eks/latest/userguide/launch-workers.html) +in the Amazon EKS User Guide. + +## Pre-requisites You must have [Packer](https://www.packer.io/) version 1.8.0 or later installed on your local system. For more information, see [Installing Packer](https://www.packer.io/docs/install/index.html) @@ -14,10 +23,6 @@ configured so that Packer can make calls to AWS API operations on your behalf. For more information, see [Authentication](https://www.packer.io/docs/builders/amazon.html#specifying-amazon-credentials) in the Packer documentation. -**Note** -The default instance type to build this AMI does not qualify for the AWS free tier. You are charged for any instances created -when building this AMI. - ## Building the AMI A Makefile is provided to build the Amazon EKS Worker AMI, but it is just a small wrapper around @@ -33,240 +38,9 @@ To build an Amazon EKS Worker AMI for a particular Kubernetes version run the fo make 1.23 ## Build a Amazon EKS Worker AMI for k8s 1.23 ``` -### AMI template variables - -Default values for most variables are defined in [a default variable file](eks-worker-al2-variables.json). - -Users have the following options for specifying their own values: - -1. Provide a variable file with the `PACKER_VARIABLE_FILE` argument to `make`. Values in this file will override values in the default variable file. Your variable file does not need to include all possible variables, as it will be merged with the default variable file. -2. Pass a key-value pair for any template variable to `make`. These values will override any values specified using the first method. - -**Note** that some variables (such as `arch` and `kubernetes_version`) do not have a sensible, static default, and are satisfied by the Makefile. Such variables do not appear in the default variable file, and must be overridden (if necessary) by the second method described above. - -### Building against other versions of Kubernetes binaries -To build an Amazon EKS Worker AMI with other versions of Kubernetes that are not listed above run the following AWS Command -Line Interface (AWS CLI) commands to obtain values for KUBERNETES_VERSION, KUBERNETES_BUILD_DATE, PLATFORM, ARCH from S3 -```bash -#List of all avalable Kuberenets Versions: -aws s3 ls s3://amazon-eks -KUBERNETES_VERSION=1.23.9 # Chose a version and set the variable - -#List of all builds for the specified Kubernetes Version: -aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/ -KUBERNETES_BUILD_DATE=2022-07-27 # Chose a date and set the variable - -#List of all platforms available for the selected Kubernetes Version and build date -aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/ -PLATFORM=linux # Chose a platform and set the variable - -#List of all architectures for the selected Kubernetes Version, build date and platform -aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/linux/ -ARCH=x86_64 #Chose an architecture and set the variable -``` -Run the following command to build an Amazon EKS Worker AMI based on the chosen parameters in the previous step -```bash -make k8s \ - kubernetes_version=$KUBERNETES_VERSION \ - kubernetes_build_date=$KUBERNETES_BUILD_DATE \ - arch=$ARCH -``` - -### Providing your own Kubernetes Binaries - -By default, binaries are downloaded from the Amazon EKS public Amazon Simple Storage Service (Amazon S3) -bucket amazon-eks in us-west-2. You can instead choose to provide your own version of Kubernetes binaries to be used. To use your own binaries - -1. Copy the binaries to your own S3 bucket using the AWS CLI. Here is an example that uses Kubelet binary -```bash - aws s3 cp kubelet s3://my-custom-bucket/kubernetes_version/kubernetes_build_date/bin/linux/arch/kubelet -``` -**Note**: Replace my-custom-bucket, amazon-eks, kubernetes_version, kubernetes_build_date, and arch with your values. - -**Important**: You must provide all the binaries listed in the default amazon-eks bucket for a specific kubernetes_version, kubernetes_build_date, and arch combination. These binaries must be accessible through AWS Identity and Access Management (IAM) credentials configured in the Install and configure HashiCorp Packer section. - -2. Run the following command to start the build process to use your own Kubernetes binaries -```bash -make k8s \ - binary_bucket_name=my-custom-bucket \ - binary_bucket_region=eu-west-1 \ - kubernetes_version=1.14.9 \ - kubernetes_build_date=2020-01-22 -``` -**Note**: Confirm that the binary_bucket_name, binary_bucket_region, kubernetes_version, and kubernetes_build_date parameters match the path to your binaries in Amazon S3. - -The Makefile runs Packer with the `eks-worker-al2.json` build specification -template and the [amazon-ebs](https://www.packer.io/docs/builders/amazon-ebs.html) -builder. An instance is launched and the Packer [Shell -Provisioner](https://www.packer.io/docs/provisioners/shell.html) runs the -`install-worker.sh` script on the instance to install software and perform other -necessary configuration tasks. Then, Packer creates an AMI from the instance -and terminates the instance after the AMI is created. - -### Container Image Caching - -Optionally, some container images can be cached during the AMI build process in order to reduce the latency of the node getting to a `Ready` state when launched. - -To turn on container image caching: - -``` -cache_container_images=true make 1.23 -``` - -When container image caching is enabled, the following images are cached: - - 602401143452.dkr.ecr..amazonaws.com/eks/kube-proxy:-eksbuild. - - 602401143452.dkr.ecr..amazonaws.com/eks/kube-proxy:-minimal-eksbuild. - - 602401143452.dkr.ecr..amazonaws.com/eks/pause:3.5 - - 602401143452.dkr.ecr..amazonaws.com/amazon-k8s-cni-init: - - 602401143452.dkr.ecr..amazonaws.com/amazon-k8s-cni: - -The account ID can be different depending on the region and partition you are building the AMI in. See [here](https://docs.aws.amazon.com/eks/latest/userguide/add-ons-images.html) for more details. - -Since the VPC CNI is not versioned with K8s itself, the latest version of the VPC CNI and the default version, based on the response from the EKS DescribeAddonVersions at the time of the AMI build, will be cached. - -The images listed above are also tagged with each region in the partition the AMI is built in, since images are often built in one region and copied to others within the same partition. Images that are available to pull from an ECR FIPS endpoint are also tagged as such (i.e. `602401143452.dkr.ecr-fips.us-east-1.amazonaws.com/eks/pause:3.5`). - -When listing images on a node, you'll notice a long list of images. However, most of these images are simply tagged in different ways with no storage overhead. Images cached in the AMI total around 1.0 GiB. In general, a node with no images cached using the VPC CNI will use around 500 MiB of images when in a `Ready` state with no other pods running on the node. - -### IAM Permissions - -To build the EKS Optimized AMI, you will need the following permissions: - -``` -{ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Action": [ - "ec2:AttachVolume", - "ec2:AuthorizeSecurityGroupIngress", - "ec2:CopyImage", - "ec2:CreateImage", - "ec2:CreateKeypair", - "ec2:CreateSecurityGroup", - "ec2:CreateSnapshot", - "ec2:CreateTags", - "ec2:CreateVolume", - "ec2:DeleteKeyPair", - "ec2:DeleteSecurityGroup", - "ec2:DeleteSnapshot", - "ec2:DeleteVolume", - "ec2:DeregisterImage", - "ec2:DescribeImageAttribute", - "ec2:DescribeImages", - "ec2:DescribeInstances", - "ec2:DescribeInstanceStatus", - "ec2:DescribeRegions", - "ec2:DescribeSecurityGroups", - "ec2:DescribeSnapshots", - "ec2:DescribeSubnets", - "ec2:DescribeTags", - "ec2:DescribeVolumes", - "ec2:DetachVolume", - "ec2:GetPasswordData", - "ec2:ModifyImageAttribute", - "ec2:ModifyInstanceAttribute", - "ec2:ModifySnapshotAttribute", - "ec2:RegisterImage", - "ec2:RunInstances", - "ec2:StopInstances", - "ec2:TerminateInstances", - "eks:DescribeAddonVersions", - "ecr:GetAuthorizationToken" - ], - "Resource": "*" - }, - { - "Effect": "Allow", - "Action": [ - "ecr:BatchGetImage", - "ecr:BatchCheckLayerAvailability", - "ecr:GetDownloadUrlForLayer" - ], - "Resource": "arn:aws:ecr:us-west-2:602401143452:repository/*" - }, - { - "Effect": "Allow", - "Action": [ - "s3:GetObject" - ], - "Resource": "arn:aws:s3:::amazon-eks/*" - } - ] -} -``` - -You will need to use the region you are building the AMI in to specify the ECR repository resource in the second IAM statement. You may also need to change the account if you are building the AMI in a different partition or special region. You can see a mapping of regions to account ID [here](https://docs.aws.amazon.com/eks/latest/userguide/add-ons-images.html). -If you're using a custom s3 bucket to vend different K8s binaries, you will need to change the resource in the third IAM statement above to reference your custom bucket. -For more information about the permissions required by Packer with different configurations, see the [docs](https://www.packer.io/plugins/builders/amazon#iam-task-or-instance-role). - -## Using the AMI - -If you are just getting started with Amazon EKS, we recommend that you follow -our [Getting Started](https://docs.aws.amazon.com/eks/latest/userguide/getting-started.html) -chapter in the Amazon EKS User Guide. If you already have a cluster, and you -want to launch a node group with your new AMI, see [Launching Amazon EKS Worker -Nodes](https://docs.aws.amazon.com/eks/latest/userguide/launch-workers.html) -in the Amazon EKS User Guide. - -## AL2 / Linux Kernel Information - -By default, the `amazon-eks-ami` uses a [source_ami_filter](https://github.com/awslabs/amazon-eks-ami/blob/e3f1b910f83ad1f27e68312e50474ea6059f052d/eks-worker-al2.json#L46) that selects the latest [hvm](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/virtualization_types.html) AL2 AMI for the given architecture as the base AMI. For more information on what kernel versions are running on published Amazon EKS optimized Linux AMIs, see [the public documentation](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html). - -When building an AMI, you can set the `kernel_version` to `4.14` or `5.4` to customize the kernel version. The [upgrade_kernel.sh script](https://github.com/awslabs/amazon-eks-ami/blob/master/scripts/upgrade_kernel.sh#L26) contains the logic for updating and upgrading the kernel. For Kubernetes versions 1.18 and below, it uses the `4.14` kernel if not set, and it will install the latest patches. For Kubernetes version 1.19 and above, it uses the `5.4` kernel if not set. - -## Customizing Kubelet Config - -In some cases, customers may want to customize the [kubelet configuration](https://kubernetes.io/docs/reference/config-api/kubelet-config.v1beta1/#kubelet-config-k8s-io-v1beta1-KubeletConfiguration) on their nodes, and there are two mechanisms to do that with the EKS Optimized AMI. - -**Set the "--kubelet-extra-args" flag when invoking bootstrap.sh** - -`bootstrap.sh`, the script that bootstraps nodes when using the EKS Optimized AMI, supports a flag called `--kubelet-extra-args` that allows you to pass in additional `kubelet` configuration. If you invoke the bootstrap script yourself (self-managed nodegroups or EKS managed nodegroups with custom AMIs), you can use that to customize your configuration. For example, you can use something like the following in your userdata: - -``` -/etc/eks/bootstrap.sh my-cluster --kubelet-extra-args '--registry-qps=20 --registry-burst=40' -``` - -In this case, it will set `registryPullQPS` to 20 and `registryBurst` to 40 in `kubelet`. Some of the flags, like the ones above, are marked as deprecated and you're encouraged to set them in the `kubelet` config file (described below), but they continue to work as of 1.23. - -**Update the kubelet config file** - -You can update the `kubelet` config file directly with new configuration. On EKS Optimized AMIs, the file is stored at `/etc/kubernetes/kubelet/kubelet-config.json`. It must be valid JSON. You can use a utility like `jq` (or your tool of choice) to edit the config in your user data: - -``` -echo "$(jq ".registryPullQPS=20 | .registryBurst=40" /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json -``` - -There are a couple of important caveats here: - -1. If you update the `kubelet` config file after `kubelet` has already started (i.e. `bootstrap.sh` already ran), you'll need to restart `kubelet` to pick up the latest configuration. -2. [bootstrap.sh](https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh) does modify a few fields, like `kubeReserved` and `evictionHard`, so you'd need to modify the config after the bootstrap script is run and restart `kubelet` to overwrite those properties. - -**View active kubelet config** - -When `kubelet` starts up, it logs all possible flags, including unset flags. The unset flags get logged with default values. *These logs do not necessarily reflect the actual active configuration.* This has caused confusion in the past when customers have configured the `kubelet` config file with one value and notice the default value is logged. Here is an example of the referenced log: - -``` -Aug 16 21:53:49 ip-192-168-92-220.us-east-2.compute.internal kubelet[3935]: I0816 21:53:49.202824 3935 flags.go:59] FLAG: --registry-burst="10" -Aug 16 21:53:49 ip-192-168-92-220.us-east-2.compute.internal kubelet[3935]: I0816 21:53:49.202829 3935 flags.go:59] FLAG: --registry-qps="5" -``` - -To view the actual `kubelet` config on your node, you can use the Kubernetes API to confirm that your configuration has applied. - -``` -$ kubectl proxy -$ curl -sSL "http://localhost:8001/api/v1/nodes/ip-192-168-92-220.us-east-2.compute.internal/proxy/configz" | jq - -{ - "kubeletconfig": { - ... - "registryPullQPS": 20, - "registryBurst": 40, - ... - } -} -``` +**Note** +The default instance type to build this AMI does not qualify for the AWS free tier. You are charged for any instances created +when building this AMI. ## Security diff --git a/USER_GUIDE.md b/USER_GUIDE.md deleted file mode 100644 index 03f8c445d..000000000 --- a/USER_GUIDE.md +++ /dev/null @@ -1,25 +0,0 @@ -## User Guide - -This guide will provide more detailed usage information on this repo. - -## Updating known instance types - -`files/bootstrap.sh` configures the maximum number of pods on a node based off of the number of ENIs available, which is determined by the instance type. Larger instances generally have more ENIs. The number of ENIs limits how many IPV4 addresses are available on an instance, and we need one IP address per pod. You can [see this file](https://github.com/aws/amazon-vpc-cni-k8s/blob/master/scripts/gen_vpc_ip_limits.go) for the code that calculates the max pods for more information. - -To add support for new instance types, at a minimum, we need to update `files/eni-max-pods.txt` using the [amazon-vpc-cni-k8s package.](https://github.com/aws/amazon-vpc-cni-k8s) to set the number of max pods available for those instance types. If the instance type is not on the list, `bootstrap.sh` will fail when the node is started. - -``` -$ git clone git@github.com:aws/amazon-vpc-cni-k8s.git - -# AWS credentials required at this point -$ make generate-limits -# misc/eni-max-pods.txt should be generated - -# Copy the generated file to this repo, something like this: -$ cp misc/eni-max-pods.txt ../amazon-eks-ami/files/ - -# Verify that expected types were added -$ git diff -``` - -At this point, you can build an AMI and it will include the updated list of instance types. diff --git a/CHANGELOG.md b/doc/CHANGELOG.md similarity index 100% rename from CHANGELOG.md rename to doc/CHANGELOG.md diff --git a/CODE_OF_CONDUCT.md b/doc/CODE_OF_CONDUCT.md similarity index 100% rename from CODE_OF_CONDUCT.md rename to doc/CODE_OF_CONDUCT.md diff --git a/CONTRIBUTING.md b/doc/CONTRIBUTING.md similarity index 100% rename from CONTRIBUTING.md rename to doc/CONTRIBUTING.md diff --git a/doc/USER_GUIDE.md b/doc/USER_GUIDE.md new file mode 100644 index 000000000..0ed73532b --- /dev/null +++ b/doc/USER_GUIDE.md @@ -0,0 +1,276 @@ +# User Guide + +This guide will provide more detailed usage information on this repo. + +1. [AMI template variables](#ami-template-variables) +1. [Building against other versions of Kubernetes binaries](#building-against-other-versions-of-kubernetes-binaries) +1. [Providing your own Kubernetes binaries](#providing-your-own-kubernetes-binaries) +1. [Container image caching](#container-image-caching) +1. [IAM permissions](#iam-permissions) +1. [Customizing kubelet config](#customizing-kubelet-config) +1. [AL2 and Linux kernel information](#al2-and-linux-kernel-information) +1. [Updating known instance types](#updating-known-instance-types) + +--- + +## AMI template variables + +Default values for most variables are defined in [a default variable file](eks-worker-al2-variables.json). + +Users have the following options for specifying their own values: + +1. Provide a variable file with the `PACKER_VARIABLE_FILE` argument to `make`. Values in this file will override values in the default variable file. Your variable file does not need to include all possible variables, as it will be merged with the default variable file. +2. Pass a key-value pair for any template variable to `make`. These values will override any values that were specified with the first method. + +**Note** that some variables (such as `arch` and `kubernetes_version`) do not have a sensible, static default, and are satisfied by the Makefile. Such variables do not appear in the default variable file, and must be overridden (if necessary) by the second method described above. + +--- + +## Building against other versions of Kubernetes binaries +To build an Amazon EKS Worker AMI with other versions of Kubernetes that are not listed above run the following AWS Command +Line Interface (AWS CLI) commands to obtain values for KUBERNETES_VERSION, KUBERNETES_BUILD_DATE, PLATFORM, ARCH from S3 +```bash +#List of all avalable Kuberenets Versions: +aws s3 ls s3://amazon-eks +KUBERNETES_VERSION=1.23.9 # Chose a version and set the variable + +#List of all builds for the specified Kubernetes Version: +aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/ +KUBERNETES_BUILD_DATE=2022-07-27 # Chose a date and set the variable + +#List of all platforms available for the selected Kubernetes Version and build date +aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/ +PLATFORM=linux # Chose a platform and set the variable + +#List of all architectures for the selected Kubernetes Version, build date and platform +aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/linux/ +ARCH=x86_64 #Chose an architecture and set the variable +``` +Run the following command to build an Amazon EKS Worker AMI based on the chosen parameters in the previous step +```bash +make k8s \ + kubernetes_version=$KUBERNETES_VERSION \ + kubernetes_build_date=$KUBERNETES_BUILD_DATE \ + arch=$ARCH +``` + +--- + +## Providing your own Kubernetes Binaries + +By default, binaries are downloaded from the Amazon EKS public Amazon Simple Storage Service (Amazon S3) +bucket amazon-eks in us-west-2. You can instead choose to provide your own version of Kubernetes binaries to be used. To use your own binaries + +1. Copy the binaries to your own S3 bucket using the AWS CLI. Here is an example that uses Kubelet binary +```bash + aws s3 cp kubelet s3://my-custom-bucket/kubernetes_version/kubernetes_build_date/bin/linux/arch/kubelet +``` +**Note**: Replace my-custom-bucket, amazon-eks, kubernetes_version, kubernetes_build_date, and arch with your values. + +**Important**: You must provide all the binaries listed in the default amazon-eks bucket for a specific kubernetes_version, kubernetes_build_date, and arch combination. These binaries must be accessible through AWS Identity and Access Management (IAM) credentials configured in the Install and configure HashiCorp Packer section. + +2. Run the following command to start the build process to use your own Kubernetes binaries +```bash +make k8s \ + binary_bucket_name=my-custom-bucket \ + binary_bucket_region=eu-west-1 \ + kubernetes_version=1.14.9 \ + kubernetes_build_date=2020-01-22 +``` +**Note**: Confirm that the binary_bucket_name, binary_bucket_region, kubernetes_version, and kubernetes_build_date parameters match the path to your binaries in Amazon S3. + +The Makefile runs Packer with the `eks-worker-al2.json` build specification +template and the [amazon-ebs](https://www.packer.io/docs/builders/amazon-ebs.html) +builder. An instance is launched and the Packer [Shell +Provisioner](https://www.packer.io/docs/provisioners/shell.html) runs the +`install-worker.sh` script on the instance to install software and perform other +necessary configuration tasks. Then, Packer creates an AMI from the instance +and terminates the instance after the AMI is created. + +--- + +## Container Image Caching + +Optionally, some container images can be cached during the AMI build process in order to reduce the latency of the node getting to a `Ready` state when launched. + +To turn on container image caching: + +``` +cache_container_images=true make 1.23 +``` + +When container image caching is enabled, the following images are cached: + - 602401143452.dkr.ecr..amazonaws.com/eks/kube-proxy:-eksbuild. + - 602401143452.dkr.ecr..amazonaws.com/eks/kube-proxy:-minimal-eksbuild. + - 602401143452.dkr.ecr..amazonaws.com/eks/pause:3.5 + - 602401143452.dkr.ecr..amazonaws.com/amazon-k8s-cni-init: + - 602401143452.dkr.ecr..amazonaws.com/amazon-k8s-cni: + +The account ID can be different depending on the region and partition you are building the AMI in. See [here](https://docs.aws.amazon.com/eks/latest/userguide/add-ons-images.html) for more details. + +Since the VPC CNI is not versioned with K8s itself, the latest version of the VPC CNI and the default version, based on the response from the EKS DescribeAddonVersions at the time of the AMI build, will be cached. + +The images listed above are also tagged with each region in the partition the AMI is built in, since images are often built in one region and copied to others within the same partition. Images that are available to pull from an ECR FIPS endpoint are also tagged as such (i.e. `602401143452.dkr.ecr-fips.us-east-1.amazonaws.com/eks/pause:3.5`). + +When listing images on a node, you'll notice a long list of images. However, most of these images are simply tagged in different ways with no storage overhead. Images cached in the AMI total around 1.0 GiB. In general, a node with no images cached using the VPC CNI will use around 500 MiB of images when in a `Ready` state with no other pods running on the node. + +--- + +## IAM Permissions + +To build the EKS Optimized AMI, you will need the following permissions: + +``` +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "ec2:AttachVolume", + "ec2:AuthorizeSecurityGroupIngress", + "ec2:CopyImage", + "ec2:CreateImage", + "ec2:CreateKeypair", + "ec2:CreateSecurityGroup", + "ec2:CreateSnapshot", + "ec2:CreateTags", + "ec2:CreateVolume", + "ec2:DeleteKeyPair", + "ec2:DeleteSecurityGroup", + "ec2:DeleteSnapshot", + "ec2:DeleteVolume", + "ec2:DeregisterImage", + "ec2:DescribeImageAttribute", + "ec2:DescribeImages", + "ec2:DescribeInstances", + "ec2:DescribeInstanceStatus", + "ec2:DescribeRegions", + "ec2:DescribeSecurityGroups", + "ec2:DescribeSnapshots", + "ec2:DescribeSubnets", + "ec2:DescribeTags", + "ec2:DescribeVolumes", + "ec2:DetachVolume", + "ec2:GetPasswordData", + "ec2:ModifyImageAttribute", + "ec2:ModifyInstanceAttribute", + "ec2:ModifySnapshotAttribute", + "ec2:RegisterImage", + "ec2:RunInstances", + "ec2:StopInstances", + "ec2:TerminateInstances", + "eks:DescribeAddonVersions", + "ecr:GetAuthorizationToken" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "ecr:BatchGetImage", + "ecr:BatchCheckLayerAvailability", + "ecr:GetDownloadUrlForLayer" + ], + "Resource": "arn:aws:ecr:us-west-2:602401143452:repository/*" + }, + { + "Effect": "Allow", + "Action": [ + "s3:GetObject" + ], + "Resource": "arn:aws:s3:::amazon-eks/*" + } + ] +} +``` + +You will need to use the region you are building the AMI in to specify the ECR repository resource in the second IAM statement. You may also need to change the account if you are building the AMI in a different partition or special region. You can see a mapping of regions to account ID [here](https://docs.aws.amazon.com/eks/latest/userguide/add-ons-images.html). +If you're using a custom s3 bucket to vend different K8s binaries, you will need to change the resource in the third IAM statement above to reference your custom bucket. +For more information about the permissions required by Packer with different configurations, see the [docs](https://www.packer.io/plugins/builders/amazon#iam-task-or-instance-role). + +--- + +## Customizing Kubelet Config + +In some cases, customers may want to customize the [kubelet configuration](https://kubernetes.io/docs/reference/config-api/kubelet-config.v1beta1/#kubelet-config-k8s-io-v1beta1-KubeletConfiguration) on their nodes, and there are two mechanisms to do that with the EKS Optimized AMI. + +**Set the "--kubelet-extra-args" flag when invoking bootstrap.sh** + +`bootstrap.sh`, the script that bootstraps nodes when using the EKS Optimized AMI, supports a flag called `--kubelet-extra-args` that allows you to pass in additional `kubelet` configuration. If you invoke the bootstrap script yourself (self-managed nodegroups or EKS managed nodegroups with custom AMIs), you can use that to customize your configuration. For example, you can use something like the following in your userdata: + +``` +/etc/eks/bootstrap.sh my-cluster --kubelet-extra-args '--registry-qps=20 --registry-burst=40' +``` + +In this case, it will set `registryPullQPS` to 20 and `registryBurst` to 40 in `kubelet`. Some of the flags, like the ones above, are marked as deprecated and you're encouraged to set them in the `kubelet` config file (described below), but they continue to work as of 1.23. + +**Update the kubelet config file** + +You can update the `kubelet` config file directly with new configuration. On EKS Optimized AMIs, the file is stored at `/etc/kubernetes/kubelet/kubelet-config.json`. It must be valid JSON. You can use a utility like `jq` (or your tool of choice) to edit the config in your user data: + +``` +echo "$(jq ".registryPullQPS=20 | .registryBurst=40" /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json +``` + +There are a couple of important caveats here: + +1. If you update the `kubelet` config file after `kubelet` has already started (i.e. `bootstrap.sh` already ran), you'll need to restart `kubelet` to pick up the latest configuration. +2. [bootstrap.sh](https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh) does modify a few fields, like `kubeReserved` and `evictionHard`, so you'd need to modify the config after the bootstrap script is run and restart `kubelet` to overwrite those properties. + +**View active kubelet config** + +When `kubelet` starts up, it logs all possible flags, including unset flags. The unset flags get logged with default values. *These logs do not necessarily reflect the actual active configuration.* This has caused confusion in the past when customers have configured the `kubelet` config file with one value and notice the default value is logged. Here is an example of the referenced log: + +``` +Aug 16 21:53:49 ip-192-168-92-220.us-east-2.compute.internal kubelet[3935]: I0816 21:53:49.202824 3935 flags.go:59] FLAG: --registry-burst="10" +Aug 16 21:53:49 ip-192-168-92-220.us-east-2.compute.internal kubelet[3935]: I0816 21:53:49.202829 3935 flags.go:59] FLAG: --registry-qps="5" +``` + +To view the actual `kubelet` config on your node, you can use the Kubernetes API to confirm that your configuration has applied. + +``` +$ kubectl proxy +$ curl -sSL "http://localhost:8001/api/v1/nodes/ip-192-168-92-220.us-east-2.compute.internal/proxy/configz" | jq + +{ + "kubeletconfig": { + ... + "registryPullQPS": 20, + "registryBurst": 40, + ... + } +} +``` + +--- + +## AL2 and Linux Kernel Information + +By default, the `amazon-eks-ami` uses a [source_ami_filter](https://github.com/awslabs/amazon-eks-ami/blob/e3f1b910f83ad1f27e68312e50474ea6059f052d/eks-worker-al2.json#L46) that selects the latest [hvm](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/virtualization_types.html) AL2 AMI for the given architecture as the base AMI. For more information on what kernel versions are running on published Amazon EKS optimized Linux AMIs, see [the public documentation](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html). + +When building an AMI, you can set the `kernel_version` to `4.14` or `5.4` to customize the kernel version. The [upgrade_kernel.sh script](https://github.com/awslabs/amazon-eks-ami/blob/master/scripts/upgrade_kernel.sh#L26) contains the logic for updating and upgrading the kernel. For Kubernetes versions 1.18 and below, it uses the `4.14` kernel if not set, and it will install the latest patches. For Kubernetes version 1.19 and above, it uses the `5.4` kernel if not set. + +--- + +## Updating known instance types + +`files/bootstrap.sh` configures the maximum number of pods on a node based off of the number of ENIs available, which is determined by the instance type. Larger instances generally have more ENIs. The number of ENIs limits how many IPV4 addresses are available on an instance, and we need one IP address per pod. You can [see this file](https://github.com/aws/amazon-vpc-cni-k8s/blob/master/scripts/gen_vpc_ip_limits.go) for the code that calculates the max pods for more information. + +To add support for new instance types, at a minimum, we need to update `files/eni-max-pods.txt` using the [amazon-vpc-cni-k8s package.](https://github.com/aws/amazon-vpc-cni-k8s) to set the number of max pods available for those instance types. If the instance type is not on the list, `bootstrap.sh` will fail when the node is started. + +``` +$ git clone git@github.com:aws/amazon-vpc-cni-k8s.git + +# AWS credentials required at this point +$ make generate-limits +# misc/eni-max-pods.txt should be generated + +# Copy the generated file to this repo, something like this: +$ cp misc/eni-max-pods.txt ../amazon-eks-ami/files/ + +# Verify that expected types were added +$ git diff +``` + +At this point, you can build an AMI and it will include the updated list of instance types. From 82ba970d30f0376ad9c2e176c4b27fdcdd0ef8aa Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 21 Nov 2022 18:29:52 -0800 Subject: [PATCH 369/621] Remove unused variable (#1107) --- eks-worker-al2.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 96394457c..020758d36 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -166,7 +166,6 @@ "AWS_SESSION_TOKEN={{user `aws_session_token`}}", "SONOBUOY_E2E_REGISTRY={{user `sonobuoy_e2e_registry`}}", "PAUSE_CONTAINER_VERSION={{user `pause_container_version`}}", - "KUBE_PROXY_VERSION_SUFFIX={{user `kube_proxy_version_suffix`}}", "CACHE_CONTAINER_IMAGES={{user `cache_container_images`}}" ] }, @@ -224,4 +223,4 @@ } } ] -} \ No newline at end of file +} From aa02e745a9f767b8f4b5fb0bc79ffa8ddca57f36 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 22 Nov 2022 08:53:34 -0800 Subject: [PATCH 370/621] Move CHANGELOG back to root dir (#1108) --- doc/CHANGELOG.md => CHANGELOG.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename doc/CHANGELOG.md => CHANGELOG.md (100%) diff --git a/doc/CHANGELOG.md b/CHANGELOG.md similarity index 100% rename from doc/CHANGELOG.md rename to CHANGELOG.md From 06fb2fcc0d5697b874ab165cd1495f9e67184880 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 22 Nov 2022 11:27:59 -0800 Subject: [PATCH 371/621] Add eks-worker-al2-variables.json to archive (#1109) --- ArchiveBuildConfig.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/ArchiveBuildConfig.yaml b/ArchiveBuildConfig.yaml index 2c5bf850c..ba146715d 100644 --- a/ArchiveBuildConfig.yaml +++ b/ArchiveBuildConfig.yaml @@ -13,6 +13,7 @@ dependencies: files: - src: Makefile - src: eks-worker-al2.json + - src: eks-worker-al2-variables.json archive: name: amazon-eks-ami.tar.gz type: tgz From 667600536b93b089c805195bf00aa275b6bb76c4 Mon Sep 17 00:00:00 2001 From: Victor Pineda Gonzalez Date: Tue, 22 Nov 2022 14:48:10 -0800 Subject: [PATCH 372/621] Skip sandbox image pull if already present (#1090) --- files/pull-sandbox-image.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/files/pull-sandbox-image.sh b/files/pull-sandbox-image.sh index 523e7ed4c..e1c1a6eb8 100644 --- a/files/pull-sandbox-image.sh +++ b/files/pull-sandbox-image.sh @@ -2,4 +2,10 @@ set -euo pipefail sandbox_image="$(awk -F'[ ="]+' '$1 == "sandbox_image" { print $2 }' /etc/containerd/config.toml)" + +### Short-circuit fetching sandbox image if its already present +if [[ "$(sudo ctr --namespace k8s.io image ls | grep $sandbox_image)" != "" ]]; then + exit 0 +fi + /etc/eks/containerd/pull-image.sh "${sandbox_image}" From f6c96de6f588e44b2d4d254831d902dcf233a14c Mon Sep 17 00:00:00 2001 From: Victor Pineda Gonzalez Date: Wed, 23 Nov 2022 15:26:49 -0800 Subject: [PATCH 373/621] Fix typo in comment (#1110) --- files/bootstrap.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 78e9d4fab..51a12405b 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -355,7 +355,7 @@ fi ### - append entries to /etc/hosts with the mappings of control plane host IP address and API server ### domain name. So that the domain name can be resolved to IP addresses locally. ### - use aws-iam-authenticator as bootstrap auth for kubelet TLS bootstrapping which downloads client -### X.509 certificate and generate kubelet kubeconfig file which uses the cleint cert. So that the +### X.509 certificate and generate kubelet kubeconfig file which uses the client cert. So that the ### worker node can be authentiacated through X.509 certificate which works for both connected and #### disconnected state. if [[ "${ENABLE_LOCAL_OUTPOST}" == "true" ]]; then From 4305812a0ccf0f5f4fd778614b0880ae9bdaed0b Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 1 Dec 2022 15:08:08 -0800 Subject: [PATCH 374/621] Add link to doc/USER_GUIDE.md (#1114) --- README.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 97baeed2f..cbb911f78 100644 --- a/README.md +++ b/README.md @@ -5,16 +5,17 @@ custom Amazon EKS AMI with [HashiCorp Packer](https://www.packer.io/). This is the same configuration that Amazon EKS uses to create the official Amazon EKS-optimized AMI. -## Getting started +**Check out the AMI's [user guide](doc/USER_GUIDE.md) for more information.** -If you are just getting started with Amazon EKS, we recommend that you follow +## 🚀 Getting started + +If you are new to Amazon EKS, we recommend that you follow our [Getting Started](https://docs.aws.amazon.com/eks/latest/userguide/getting-started.html) chapter in the Amazon EKS User Guide. If you already have a cluster, and you want to launch a node group with your new AMI, see [Launching Amazon EKS Worker -Nodes](https://docs.aws.amazon.com/eks/latest/userguide/launch-workers.html) -in the Amazon EKS User Guide. +Nodes](https://docs.aws.amazon.com/eks/latest/userguide/launch-workers.html). -## Pre-requisites +## 🔢 Pre-requisites You must have [Packer](https://www.packer.io/) version 1.8.0 or later installed on your local system. For more information, see [Installing Packer](https://www.packer.io/docs/install/index.html) @@ -23,7 +24,7 @@ configured so that Packer can make calls to AWS API operations on your behalf. For more information, see [Authentication](https://www.packer.io/docs/builders/amazon.html#specifying-amazon-credentials) in the Packer documentation. -## Building the AMI +## 👷 Building the AMI A Makefile is provided to build the Amazon EKS Worker AMI, but it is just a small wrapper around invoking Packer directly. You can initiate the build process by running the @@ -42,10 +43,10 @@ make 1.23 ## Build a Amazon EKS Worker AMI for k8s 1.23 The default instance type to build this AMI does not qualify for the AWS free tier. You are charged for any instances created when building this AMI. -## Security +## 🔒 Security For security issues or concerns, please do not open an issue or pull request on GitHub. Please report any suspected or confirmed security issues to AWS Security https://aws.amazon.com/security/vulnerability-reporting/ -## License Summary +## ⚖️ License Summary This sample code is made available under a modified MIT license. See the LICENSE file. From acf0d78478346de4dfd3b18aef396a483a3443a3 Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 5 Dec 2022 10:36:41 -0800 Subject: [PATCH 375/621] Remove aws_region and binary_bucket_region overrides (#1115) --- Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/Makefile b/Makefile index 6eb4e4b67..71c410343 100644 --- a/Makefile +++ b/Makefile @@ -6,8 +6,6 @@ K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS MAKEFILE_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) -aws_region ?= $(AWS_DEFAULT_REGION) -binary_bucket_region ?= $(AWS_DEFAULT_REGION) arch ?= x86_64 ifeq ($(arch), arm64) instance_type ?= m6g.large From cb9c8a8640cd1baba29cd6dd89d0ab6f105c3fe2 Mon Sep 17 00:00:00 2001 From: Gerald Barker Date: Fri, 9 Dec 2022 17:28:52 +0000 Subject: [PATCH 376/621] Lookup instanceId using IMDSv2 (#1116) --- log-collector-script/windows/eks-log-collector.ps1 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/log-collector-script/windows/eks-log-collector.ps1 b/log-collector-script/windows/eks-log-collector.ps1 index d3e03660b..f96916e7e 100644 --- a/log-collector-script/windows/eks-log-collector.ps1 +++ b/log-collector-script/windows/eks-log-collector.ps1 @@ -26,7 +26,8 @@ param( # Common options $basedir="C:\log-collector" -$instanceid = Invoke-RestMethod -uri http://169.254.169.254/latest/meta-data/instance-id +$token = Invoke-RestMethod -Headers @{"X-aws-ec2-metadata-token-ttl-seconds" = "5"} -Method PUT -Uri http://169.254.169.254/latest/api/token +$instanceId = Invoke-RestMethod -Headers @{"X-aws-ec2-metadata-token" = $token} -Method GET -Uri http://169.254.169.254/latest/meta-data/instance-id $curtime = Get-Date -Format FileDateTimeUniversal $outfilename = "eks_" + $instanceid + "_" + $curtime + ".zip" $infodir="$basedir\collect" From f1bb10b4b161fc43eddad04359b1ab03530dcb2e Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 9 Dec 2022 16:13:55 -0800 Subject: [PATCH 377/621] Handle indentation when parsing `sandbox_image` (#1119) --- files/pull-sandbox-image.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/pull-sandbox-image.sh b/files/pull-sandbox-image.sh index e1c1a6eb8..e6484a962 100644 --- a/files/pull-sandbox-image.sh +++ b/files/pull-sandbox-image.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -sandbox_image="$(awk -F'[ ="]+' '$1 == "sandbox_image" { print $2 }' /etc/containerd/config.toml)" +source <(grep "sandbox_image" /etc/containerd/config.toml | tr -d ' ') ### Short-circuit fetching sandbox image if its already present if [[ "$(sudo ctr --namespace k8s.io image ls | grep $sandbox_image)" != "" ]]; then From c5a09beba2c4bdb8ac18a3eaa319685716368637 Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Fri, 9 Dec 2022 18:40:48 -0600 Subject: [PATCH 378/621] Discover latest eksbuild version when caching container images (#1120) --- scripts/install-worker.sh | 46 ++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 669eb4330..ca40e2a7b 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -403,7 +403,16 @@ if [[ "$CACHE_CONTAINER_IMAGES" == "true" && "$BINARY_BUCKET_REGION" != "us-iso- LATEST_KUBE_PROXY_VERSION=$(echo "${LATEST_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f1) LATEST_KUBE_PROXY_PLATFORM_VERSION=$(echo "${LATEST_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f2) - KUBE_PROXY_IMGS=( + #### Cache VPC CNI images starting with the addon default version and the latest version + VPC_CNI_ADDON_VERSIONS=$(aws eks describe-addon-versions --addon-name vpc-cni --kubernetes-version=${K8S_MINOR_VERSION}) + DEFAULT_VPC_CNI_VERSION=$(echo "${VPC_CNI_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] | select(.compatibilities[] .defaultVersion==true).addonVersion') + LATEST_VPC_CNI_VERSION=$(echo "${VPC_CNI_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] .addonVersion' | sort -V | tail -n1) + CNI_IMG="${ECR_URI}/amazon-k8s-cni" + CNI_INIT_IMG="${CNI_IMG}-init" + + CACHE_IMGS=( + "${PAUSE_CONTAINER}" + ## Default kube-proxy images "${ECR_URI}/eks/kube-proxy:${DEFAULT_KUBE_PROXY_VERSION}-${DEFAULT_KUBE_PROXY_PLATFORM_VERSION}" "${ECR_URI}/eks/kube-proxy:${DEFAULT_KUBE_PROXY_VERSION}-minimal-${DEFAULT_KUBE_PROXY_PLATFORM_VERSION}" @@ -411,15 +420,7 @@ if [[ "$CACHE_CONTAINER_IMAGES" == "true" && "$BINARY_BUCKET_REGION" != "us-iso- ## Latest kube-proxy images "${ECR_URI}/eks/kube-proxy:${LATEST_KUBE_PROXY_VERSION}-${LATEST_KUBE_PROXY_PLATFORM_VERSION}" "${ECR_URI}/eks/kube-proxy:${LATEST_KUBE_PROXY_VERSION}-minimal-${LATEST_KUBE_PROXY_PLATFORM_VERSION}" - ) - #### Cache VPC CNI images starting with the addon default version and the latest version - VPC_CNI_ADDON_VERSIONS=$(aws eks describe-addon-versions --addon-name vpc-cni --kubernetes-version=${K8S_MINOR_VERSION}) - DEFAULT_VPC_CNI_VERSION=$(echo "${VPC_CNI_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] | select(.compatibilities[] .defaultVersion==true).addonVersion') - LATEST_VPC_CNI_VERSION=$(echo "${VPC_CNI_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] .addonVersion' | sort -V | tail -n1) - CNI_IMG="${ECR_URI}/amazon-k8s-cni" - CNI_INIT_IMG="${CNI_IMG}-init" - CNI_IMGS=( ## Default VPC CNI Images "${CNI_IMG}:${DEFAULT_VPC_CNI_VERSION}" "${CNI_INIT_IMG}:${DEFAULT_VPC_CNI_VERSION}" @@ -428,24 +429,33 @@ if [[ "$CACHE_CONTAINER_IMAGES" == "true" && "$BINARY_BUCKET_REGION" != "us-iso- "${CNI_IMG}:${LATEST_VPC_CNI_VERSION}" "${CNI_INIT_IMG}:${LATEST_VPC_CNI_VERSION}" ) + PULLED_IMGS=() - CACHED_IMGS=( - "${PAUSE_CONTAINER}" - ${KUBE_PROXY_IMGS[@]} - ${CNI_IMGS[@]} - ) - - for img in "${CACHED_IMGS[@]}"; do + for img in "${CACHE_IMGS[@]}"; do ## only kube-proxy-minimal is vended for K8s 1.24+ if [[ "${img}" == *"kube-proxy:"* ]] && [[ "${img}" != *"-minimal-"* ]] && vercmp "${K8S_MINOR_VERSION}" gteq "1.24"; then continue fi - /etc/eks/containerd/pull-image.sh "${img}" + ## Since eksbuild.x version may not match the image tag, we need to decrement the eksbuild version until we find the latest image tag within the app semver + eksbuild_version="1" + if [[ ${img} == *'eksbuild.'* ]]; then + eksbuild_version=$(echo "${img}" | grep -o 'eksbuild\.[0-9]\+' | cut -d'.' -f2) + fi + ## iterate through decrementing the build version each time + for build_version in $(seq "${eksbuild_version}" -1 1); do + img=$(echo "${img}" | sed -E "s/eksbuild.[0-9]+/eksbuild.${build_version}/") + if /etc/eks/containerd/pull-image.sh "${img}"; then + PULLED_IMGS+=("${img}") + break + elif [[ "${build_version}" -eq 1 ]]; then + exit 1 + fi + done done #### Tag the pulled down image for all other regions in the partition for region in $(aws ec2 describe-regions --all-regions | jq -r '.Regions[] .RegionName'); do - for img in "${CACHED_IMGS[@]}"; do + for img in "${PULLED_IMGS[@]}"; do regional_img="${img/$BINARY_BUCKET_REGION/$region}" sudo ctr -n k8s.io image tag "${img}" "${regional_img}" || : ## Tag ECR fips endpoint for supported regions From d022ac29f5c21f9b2d5f9bc35dad85d92efae829 Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Mon, 12 Dec 2022 12:48:07 -0600 Subject: [PATCH 379/621] Add cached images to version-info (#1122) --- scripts/generate-version-info.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/generate-version-info.sh b/scripts/generate-version-info.sh index f35243936..9a52f42ce 100644 --- a/scripts/generate-version-info.sh +++ b/scripts/generate-version-info.sh @@ -18,3 +18,6 @@ sudo rpm --query --all --queryformat '\{"%{NAME}": "%{VERSION}-%{RELEASE}"\}\n' # binaries echo $(jq ".binaries.kubelet = \"$(kubelet --version | awk '{print $2}')\"" $OUTPUT_FILE) > $OUTPUT_FILE echo $(jq ".binaries.awscli = \"$(aws --version | awk '{print $1}' | cut -d '/' -f 2)\"" $OUTPUT_FILE) > $OUTPUT_FILE + +# cached images +echo $(jq ".images = [ $(sudo ctr -n k8s.io image ls -q | cut -d'/' -f2- | sort | uniq | grep -v 'sha256' | xargs -r printf "\"%s\"," | sed 's/,$//') ]" $OUTPUT_FILE) > $OUTPUT_FILE From 9c0da3482c8f9693cea225f149b07fb61e33133c Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 12 Dec 2022 15:36:25 -0800 Subject: [PATCH 380/621] Document daemon.json change in GPU AMI (#1123) --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a305f1061..3e6d5c0d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -132,6 +132,8 @@ Notable changes: * Update docker and containerd for [ALASDOCKER-2022-021](https://alas.aws.amazon.com/AL2/ALASDOCKER-2022-021.html) [#1056](https://github.com/awslabs/amazon-eks-ami/pull/1056) * runc version is updated to 1.1.3-1.amzn2.0.2 to include ALAS2DOCKER-2022-020 [#1055](https://github.com/awslabs/amazon-eks-ami/pull/1055) * Release AMI in me-central-1 with version 1.21, 1.22, 1.23. 1.20 is not supported in this region since it will be deprecated soon. +* Fixes an issue with Docker daemon configuration on the GPU AMI (#351). + * **Note** that if you have a workaround in place for this issue, you'll likely need to revert it. ### [Recalled] AMI Release v20221027 * amazon-eks-gpu-node-1.23-v20221027 From d4e0921b4bd28188ccbbaf6701bb9aab28947c28 Mon Sep 17 00:00:00 2001 From: Zaid Ahmed Farooq <38226823+zaf6862@users.noreply.github.com> Date: Tue, 13 Dec 2022 09:17:56 -0800 Subject: [PATCH 381/621] Add ECR accounts for eu-south-2, eu-central-2, ap-south-2 (#1125) Co-authored-by: Zaid Farooq --- files/get-ecr-uri.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/files/get-ecr-uri.sh b/files/get-ecr-uri.sh index f5e87b932..e865ab24d 100755 --- a/files/get-ecr-uri.sh +++ b/files/get-ecr-uri.sh @@ -45,6 +45,15 @@ else me-central-1) acct="759879836304" ;; + eu-south-2) + acct="455263428931" + ;; + eu-central-2) + acct="900612956339" + ;; + ap-south-2) + acct="900889452093" + ;; *) acct="602401143452" ;; From 5da3eb794a1439e7515dc569cf6de5803a680a31 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 13 Dec 2022 11:18:27 -0800 Subject: [PATCH 382/621] Update to runc-1.1.4-1.amzn2 (#1124) --- eks-worker-al2-variables.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index 5f1caaa0b..7346acccb 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -22,7 +22,7 @@ "pause_container_version": "3.5", "pull_cni_from_github": "true", "remote_folder": "", - "runc_version": "1.1.3-1.amzn2.0.2", + "runc_version": "runc-1.1.4-1.amzn2", "security_group_id": "", "sonobuoy_e2e_registry": "", "source_ami_filter_name": "amzn2-ami-minimal-hvm-*", @@ -33,4 +33,4 @@ "subnet_id": "", "temporary_security_group_source_cidrs": "", "volume_type": "gp2" -} \ No newline at end of file +} From 671ed37c4328e8b8909cbdae325d520cd5f632f2 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 13 Dec 2022 11:42:15 -0800 Subject: [PATCH 383/621] Fix runc_version typo (#1127) --- eks-worker-al2-variables.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index 7346acccb..59d69ba28 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -22,7 +22,7 @@ "pause_container_version": "3.5", "pull_cni_from_github": "true", "remote_folder": "", - "runc_version": "runc-1.1.4-1.amzn2", + "runc_version": "1.1.4-1.amzn2", "security_group_id": "", "sonobuoy_e2e_registry": "", "source_ami_filter_name": "amzn2-ami-minimal-hvm-*", From 3e1c6c2c421be1e3b0b46f5bce7ac28c9b6123fb Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Mon, 19 Dec 2022 08:26:34 -0600 Subject: [PATCH 384/621] Do not cache addons images if none are available (#1133) --- scripts/install-worker.sh | 69 +++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 28 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index ca40e2a7b..cf1008410 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -393,41 +393,54 @@ if [[ "$CACHE_CONTAINER_IMAGES" == "true" && "$BINARY_BUCKET_REGION" != "us-iso- sudo systemctl enable containerd sandbox-image K8S_MINOR_VERSION=$(echo "${KUBERNETES_VERSION}" | cut -d'.' -f1-2) - KUBE_PROXY_ADDON_VERSIONS=$(aws eks describe-addon-versions --addon-name kube-proxy --kubernetes-version=${K8S_MINOR_VERSION}) - - DEFAULT_KUBE_PROXY_FULL_VERSION=$(echo "${KUBE_PROXY_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] | select(.compatibilities[] .defaultVersion==true).addonVersion') - DEFAULT_KUBE_PROXY_VERSION=$(echo "${DEFAULT_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f1) - DEFAULT_KUBE_PROXY_PLATFORM_VERSION=$(echo "${DEFAULT_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f2) - LATEST_KUBE_PROXY_FULL_VERSION=$(echo "${KUBE_PROXY_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] .addonVersion' | sort -V | tail -n1) - LATEST_KUBE_PROXY_VERSION=$(echo "${LATEST_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f1) - LATEST_KUBE_PROXY_PLATFORM_VERSION=$(echo "${LATEST_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f2) + #### Cache kube-proxy images starting with the addon default version and the latest version + KUBE_PROXY_ADDON_VERSIONS=$(aws eks describe-addon-versions --addon-name kube-proxy --kubernetes-version=${K8S_MINOR_VERSION}) + KUBE_PROXY_IMGS=() + if [[ $(jq '.addons | length' <<< $KUBE_PROXY_ADDON_VERSIONS) -gt 0 ]]; then + DEFAULT_KUBE_PROXY_FULL_VERSION=$(echo "${KUBE_PROXY_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] | select(.compatibilities[] .defaultVersion==true).addonVersion') + DEFAULT_KUBE_PROXY_VERSION=$(echo "${DEFAULT_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f1) + DEFAULT_KUBE_PROXY_PLATFORM_VERSION=$(echo "${DEFAULT_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f2) + + LATEST_KUBE_PROXY_FULL_VERSION=$(echo "${KUBE_PROXY_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] .addonVersion' | sort -V | tail -n1) + LATEST_KUBE_PROXY_VERSION=$(echo "${LATEST_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f1) + LATEST_KUBE_PROXY_PLATFORM_VERSION=$(echo "${LATEST_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f2) + + KUBE_PROXY_IMGS=( + ## Default kube-proxy images + "${ECR_URI}/eks/kube-proxy:${DEFAULT_KUBE_PROXY_VERSION}-${DEFAULT_KUBE_PROXY_PLATFORM_VERSION}" + "${ECR_URI}/eks/kube-proxy:${DEFAULT_KUBE_PROXY_VERSION}-minimal-${DEFAULT_KUBE_PROXY_PLATFORM_VERSION}" + + ## Latest kube-proxy images + "${ECR_URI}/eks/kube-proxy:${LATEST_KUBE_PROXY_VERSION}-${LATEST_KUBE_PROXY_PLATFORM_VERSION}" + "${ECR_URI}/eks/kube-proxy:${LATEST_KUBE_PROXY_VERSION}-minimal-${LATEST_KUBE_PROXY_PLATFORM_VERSION}" + ) + fi #### Cache VPC CNI images starting with the addon default version and the latest version VPC_CNI_ADDON_VERSIONS=$(aws eks describe-addon-versions --addon-name vpc-cni --kubernetes-version=${K8S_MINOR_VERSION}) - DEFAULT_VPC_CNI_VERSION=$(echo "${VPC_CNI_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] | select(.compatibilities[] .defaultVersion==true).addonVersion') - LATEST_VPC_CNI_VERSION=$(echo "${VPC_CNI_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] .addonVersion' | sort -V | tail -n1) - CNI_IMG="${ECR_URI}/amazon-k8s-cni" - CNI_INIT_IMG="${CNI_IMG}-init" + VPC_CNI_IMGS=() + if [[ $(jq '.addons | length' <<< $VPC_CNI_ADDON_VERSIONS) -gt 0 ]]; then + DEFAULT_VPC_CNI_VERSION=$(echo "${VPC_CNI_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] | select(.compatibilities[] .defaultVersion==true).addonVersion') + LATEST_VPC_CNI_VERSION=$(echo "${VPC_CNI_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] .addonVersion' | sort -V | tail -n1) + CNI_IMG="${ECR_URI}/amazon-k8s-cni" + CNI_INIT_IMG="${CNI_IMG}-init" + + VPC_CNI_IMGS=( + ## Default VPC CNI Images + "${CNI_IMG}:${DEFAULT_VPC_CNI_VERSION}" + "${CNI_INIT_IMG}:${DEFAULT_VPC_CNI_VERSION}" + + ## Latest VPC CNI Images + "${CNI_IMG}:${LATEST_VPC_CNI_VERSION}" + "${CNI_INIT_IMG}:${LATEST_VPC_CNI_VERSION}" + ) + fi CACHE_IMGS=( "${PAUSE_CONTAINER}" - - ## Default kube-proxy images - "${ECR_URI}/eks/kube-proxy:${DEFAULT_KUBE_PROXY_VERSION}-${DEFAULT_KUBE_PROXY_PLATFORM_VERSION}" - "${ECR_URI}/eks/kube-proxy:${DEFAULT_KUBE_PROXY_VERSION}-minimal-${DEFAULT_KUBE_PROXY_PLATFORM_VERSION}" - - ## Latest kube-proxy images - "${ECR_URI}/eks/kube-proxy:${LATEST_KUBE_PROXY_VERSION}-${LATEST_KUBE_PROXY_PLATFORM_VERSION}" - "${ECR_URI}/eks/kube-proxy:${LATEST_KUBE_PROXY_VERSION}-minimal-${LATEST_KUBE_PROXY_PLATFORM_VERSION}" - - ## Default VPC CNI Images - "${CNI_IMG}:${DEFAULT_VPC_CNI_VERSION}" - "${CNI_INIT_IMG}:${DEFAULT_VPC_CNI_VERSION}" - - ## Latest VPC CNI Images - "${CNI_IMG}:${LATEST_VPC_CNI_VERSION}" - "${CNI_INIT_IMG}:${LATEST_VPC_CNI_VERSION}" + ${KUBE_PROXY_IMGS[@]+"${KUBE_PROXY_IMGS[@]}"} + ${VPC_CNI_IMGS[@]+"${VPC_CNI_IMGS[@]}"} ) PULLED_IMGS=() From 6b1df1107d6e7ec2f46612ebb509c16101ded22d Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 29 Dec 2022 18:54:56 -0600 Subject: [PATCH 385/621] Update CHANGELOG for v20221222 (#1140) --- CHANGELOG.md | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e6d5c0d3..0a8d94770 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,64 @@ # Changelog +### AMI Release v20221222 +* amazon-eks-gpu-node-1.24-v20221222 +* amazon-eks-gpu-node-1.23-v20221222 +* amazon-eks-gpu-node-1.22-v20221222 +* amazon-eks-gpu-node-1.21-v20221222 +* amazon-eks-gpu-node-1.20-v20221222 +* amazon-eks-arm64-node-1.24-v20221222 +* amazon-eks-arm64-node-1.23-v20221222 +* amazon-eks-arm64-node-1.22-v20221222 +* amazon-eks-arm64-node-1.21-v20221222 +* amazon-eks-arm64-node-1.20-v20221222 +* amazon-eks-node-1.24-v20221222 +* amazon-eks-node-1.23-v20221222 +* amazon-eks-node-1.22-v20221222 +* amazon-eks-node-1.21-v20221222 +* amazon-eks-node-1.20-v20221222 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.24.7-20221222` +* `1.23.13-20221222` +* `1.22.15-20221222` +* `1.21.14-20221222` +* `1.20.15-20221222` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.24.7/2022-10-31/ +* s3://amazon-eks/1.23.13/2022-10-31/ +* s3://amazon-eks/1.22.15/2022-10-31/ +* s3://amazon-eks/1.21.14/2022-10-31/ +* s3://amazon-eks/1.20.15/2022-10-31/ + +AMI details: +* kernel: 5.4.226-129.415.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: 1.1.4-1.amzn2 +* cuda: 11.4.0-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +- Kernel updated to `5.4.226-129.415.amzn2` for: + - [ALASKERNEL-5.4-2022-040](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2022-040.html) + - [ALASKERNEL-5.4-2022-039](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2022-039.html) +- NVIDIA driver updated to `470.161.03-1` to address security issues. More information is available in [NVIDIA security bulletin #5415](https://nvidia.custhelp.com/app/answers/detail/a_id/5415). +- Cache pause, vpc-cni, and kube-proxy images during build ([#938](https://github.com/awslabs/amazon-eks-ami/pull/938)) + - *Note* that this has only been enabled for 1.24 AMIs at this time. +- Disable yum updates in cloud-init ([#1074](https://github.com/awslabs/amazon-eks-ami/pull/1074)) +- Skip sandbox image pull if already present ([#1090](https://github.com/awslabs/amazon-eks-ami/pull/1090)) +- Move variable defaults to `--var-file` ([#1079](https://github.com/awslabs/amazon-eks-ami/pull/1079)) + +Minor changes: +- Add ECR accounts for `eu-south-2`, `eu-central-2`, `ap-south-2` ([#1125](https://github.com/awslabs/amazon-eks-ami/pull/1125)) +- Handle indentation when parsing `sandbox_image` from `containerd` config ([#1119](https://github.com/awslabs/amazon-eks-ami/pull/1119)) +- Lookup instanceId using IMDSv2 in Windows log collector script ([#1116](https://github.com/awslabs/amazon-eks-ami/pull/1116)) +- Remove `aws_region` and `binary_bucket_region` overrides from Makefile ([#1115](https://github.com/awslabs/amazon-eks-ami/pull/1115)) +- Sym-link awscli to /bin ([#1102](https://github.com/awslabs/amazon-eks-ami/pull/1102)) +- Configure containerd registry certificates by default ([#1049](https://github.com/awslabs/amazon-eks-ami/pull/1049)) + ### AMI Release v20221112 * amazon-eks-gpu-node-1.24-v20221112 * amazon-eks-gpu-node-1.23-v20221112 From eab112a19877122e46a706d3a91d42b85218f268 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 4 Jan 2023 10:31:59 -0800 Subject: [PATCH 386/621] Decrease launch_block_device_mappings_volume_size to 4 (#1143) --- eks-worker-al2-variables.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index 59d69ba28..2685e5aa1 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -18,7 +18,7 @@ "encrypted": "false", "kernel_version": "", "kms_key_id": "", - "launch_block_device_mappings_volume_size": "8", + "launch_block_device_mappings_volume_size": "4", "pause_container_version": "3.5", "pull_cni_from_github": "true", "remote_folder": "", From 6fefb5000896b42e6fb1d916bb88b9efec1bf37e Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 9 Jan 2023 18:35:50 -0800 Subject: [PATCH 387/621] AMI Release v20230105 (#1146) --- CHANGELOG.md | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a8d94770..0c2d77b6f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,54 @@ # Changelog +### AMI Release v20230105 +* amazon-eks-gpu-node-1.24-v20230105 +* amazon-eks-gpu-node-1.23-v20230105 +* amazon-eks-gpu-node-1.22-v20230105 +* amazon-eks-gpu-node-1.21-v20230105 +* amazon-eks-gpu-node-1.20-v20230105 +* amazon-eks-arm64-node-1.24-v20230105 +* amazon-eks-arm64-node-1.23-v20230105 +* amazon-eks-arm64-node-1.22-v20230105 +* amazon-eks-arm64-node-1.21-v20230105 +* amazon-eks-arm64-node-1.20-v20230105 +* amazon-eks-node-1.24-v20230105 +* amazon-eks-node-1.23-v20230105 +* amazon-eks-node-1.22-v20230105 +* amazon-eks-node-1.21-v20230105 +* amazon-eks-node-1.20-v20230105 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.24.7-20230105` +* `1.23.13-20230105` +* `1.22.15-20230105` +* `1.21.14-20230105` +* `1.20.15-20230105` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.24.7/2022-10-31/ +* s3://amazon-eks/1.23.13/2022-10-31/ +* s3://amazon-eks/1.22.15/2022-10-31/ +* s3://amazon-eks/1.21.14/2022-10-31/ +* s3://amazon-eks/1.20.15/2022-10-31/ + +AMI details: +* kernel: 5.4.226-129.415.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: 1.1.4-1.amzn2 +* cuda: 11.4.0-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +- This will be the last release for 1.20 AMI's. +- Decrease `launch_block_device_mappings_volume_size` to 4 ([#1143](https://github.com/awslabs/amazon-eks-ami/pull/1143)). + - This fixes an issue with 4GiB launch block devices. More information is available in [#1142](https://github.com/awslabs/amazon-eks-ami/issues/1142). +- Container image caching has been disabled while we work to optimize the disk usage of this feature. This feature was only enabled for 1.24 AMI's in the previous release, [v20221222](https://github.com/awslabs/amazon-eks-ami/releases/tag/v20221222). + +Minor changes: +- Update AWS CLI to `2.9.12` + ### AMI Release v20221222 * amazon-eks-gpu-node-1.24-v20221222 * amazon-eks-gpu-node-1.23-v20221222 From e2ef710139a671b2a49b459c0b4654cdc7bab9ba Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 9 Jan 2023 18:44:02 -0800 Subject: [PATCH 388/621] Cache image content without unpacking/snapshotting (#1144) --- files/pull-image.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/pull-image.sh b/files/pull-image.sh index 2d37d88b9..ca89e2550 100755 --- a/files/pull-image.sh +++ b/files/pull-image.sh @@ -24,4 +24,4 @@ if [[ -z ${ecr_password} ]]; then echo >&2 "Unable to retrieve the ECR password." exit 1 fi -retry sudo ctr --namespace k8s.io image pull "${img}" --user AWS:${ecr_password} +retry sudo ctr --namespace k8s.io content fetch "${img}" --user AWS:${ecr_password} From b95c3e671fdbc82a4651968335e69205af70b549 Mon Sep 17 00:00:00 2001 From: Victor Pineda Gonzalez Date: Wed, 11 Jan 2023 11:09:06 -0800 Subject: [PATCH 389/621] Use external cloud provider for EKS Local deployments (#1111) --- files/bootstrap.sh | 10 ++++++++++ files/kubelet-containerd.service | 2 +- files/kubelet.service | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 51a12405b..52bf5847d 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -376,8 +376,13 @@ if [[ "${ENABLE_LOCAL_OUTPOST}" == "true" ]]; then mv /var/lib/kubelet/kubeconfig /var/lib/kubelet/bootstrap-kubeconfig KUBELET_EXTRA_ARGS="--bootstrap-kubeconfig /var/lib/kubelet/bootstrap-kubeconfig $KUBELET_EXTRA_ARGS" fi + ### For Local Outpost deployments, we will use the the external cloud provider + KUBELET_CLOUD_PROVIDER="external" else sed -i s,CLUSTER_NAME,$CLUSTER_NAME,g /var/lib/kubelet/kubeconfig + + ### For any other type of deployment we will use the aws cloud provider for backwards compatibility + KUBELET_CLOUD_PROVIDER="aws" fi ### kubelet.service configuration @@ -462,6 +467,11 @@ cat << EOF > /etc/systemd/system/kubelet.service.d/10-kubelet-args.conf Environment='KUBELET_ARGS=--node-ip=$INTERNAL_IP --pod-infra-container-image=$PAUSE_CONTAINER --v=2' EOF +cat << EOF > /etc/systemd/system/kubelet.service.d/20-kubelet-cloud-provider.conf +[Service] +Environment='KUBELET_CLOUD_PROVIDER=$KUBELET_CLOUD_PROVIDER' +EOF + if [[ -n "$KUBELET_EXTRA_ARGS" ]]; then cat << EOF > /etc/systemd/system/kubelet.service.d/30-kubelet-extra-args.conf [Service] diff --git a/files/kubelet-containerd.service b/files/kubelet-containerd.service index b01a5bf15..74b30f70a 100644 --- a/files/kubelet-containerd.service +++ b/files/kubelet-containerd.service @@ -6,7 +6,7 @@ Requires=containerd.service sandbox-image.service [Service] ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 -ExecStart=/usr/bin/kubelet --cloud-provider aws \ +ExecStart=/usr/bin/kubelet --cloud-provider $KUBELET_CLOUD_PROVIDER \ --config /etc/kubernetes/kubelet/kubelet-config.json \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime remote \ diff --git a/files/kubelet.service b/files/kubelet.service index 387470da1..c650d491a 100644 --- a/files/kubelet.service +++ b/files/kubelet.service @@ -6,7 +6,7 @@ Requires=docker.service [Service] ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 -ExecStart=/usr/bin/kubelet --cloud-provider aws \ +ExecStart=/usr/bin/kubelet --cloud-provider $KUBELET_CLOUD_PROVIDER \ --config /etc/kubernetes/kubelet/kubelet-config.json \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime docker \ From d0baa7af2561f18777cb5ecc484cb11220f59e05 Mon Sep 17 00:00:00 2001 From: Sichaow Date: Wed, 11 Jan 2023 11:10:55 -0800 Subject: [PATCH 390/621] Remove 1.20 (#1147) --- Makefile | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 71c410343..1f74fd28d 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.20 1.21 1.22 1.23 1.24 ## Build all versions of EKS Optimized AL2 AMI +all: 1.21 1.22 1.23 1.24 ## Build all versions of EKS Optimized AL2 AMI # ensure that these flags are equivalent to the rules in the .editorconfig SHFMT_FLAGS := --list \ @@ -80,10 +80,6 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html -.PHONY: 1.20 -1.20: ## Build EKS Optimized AL2 AMI - K8s 1.20 - $(MAKE) k8s kubernetes_version=1.20.15 kubernetes_build_date=2022-10-31 pull_cni_from_github=true - .PHONY: 1.21 1.21: ## Build EKS Optimized AL2 AMI - K8s 1.21 $(MAKE) k8s kubernetes_version=1.21.14 kubernetes_build_date=2022-10-31 pull_cni_from_github=true From b798b4fe508dc359bca9e8ffb375b00d6723cc03 Mon Sep 17 00:00:00 2001 From: guessi Date: Thu, 12 Jan 2023 10:32:42 +0800 Subject: [PATCH 391/621] Clean up log collector SSM documents, README's (#1135) --- log-collector-script/README.md | 5 +- log-collector-script/linux/README.md | 137 ++++++++++-------- .../linux/eks-ssm-content.json | 118 +++++---------- log-collector-script/windows/README.md | 60 +++++--- .../windows/eks-ssm-content.json | 122 ++++++---------- 5 files changed, 205 insertions(+), 237 deletions(-) diff --git a/log-collector-script/README.md b/log-collector-script/README.md index 79951fa11..eda832408 100644 --- a/log-collector-script/README.md +++ b/log-collector-script/README.md @@ -1,2 +1,3 @@ -### EKS Logs Collector -This project was created to collect Amazon EKS log files and OS logs for troubleshooting Amazon EKS customer support cases. \ No newline at end of file +### EKS Logs Collector + +This project was created to collect Amazon EKS log files and OS logs for troubleshooting Amazon EKS customer support cases. diff --git a/log-collector-script/linux/README.md b/log-collector-script/linux/README.md index f634e271c..efa100fa5 100644 --- a/log-collector-script/linux/README.md +++ b/log-collector-script/linux/README.md @@ -1,6 +1,6 @@ -### EKS Logs Collector +### EKS Logs Collector (Linux) -This project was created to collect Amazon EKS log files and OS logs for troubleshooting Amazon EKS customer support cases. +This project was created to collect Amazon EKS log files and OS logs for troubleshooting Amazon EKS customer support cases. #### Usage @@ -8,7 +8,8 @@ At a high level, you run this script on your Kubernetes node, and it will collec * Collect EKS logs using SSM agent, jump to below [section](#collect-eks-logs-using-ssm-agent) _(or)_ -* Run this project as the root user: +* Run this project as the root user + ``` curl -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-log-collector.sh sudo bash eks-log-collector.sh @@ -17,93 +18,113 @@ sudo bash eks-log-collector.sh Confirm if the tarball file was successfully created (it can be .tgz or .tar.gz) #### Retrieving the logs -Download the tarball using your favourite Secure Copy tool. + +Download the tarball using your favorite Secure Copy tool. #### Example output -The project can be used in normal or enable_debug(**Caution: enable_debug will prompt to confirm if we can restart Docker daemon which would kill running containers**). + +The project can be used in normal or enable_debug (**Caution: enable_debug will prompt to confirm if we can restart Docker daemon which would kill running containers**). ``` -# sudo bash eks-log-collector.sh --help +$ sudo bash eks-log-collector.sh --help + USAGE: eks-log-collector --help [ --ignore_introspection=true|false --ignore_metrics=true|false ] OPTIONS: - --ignore_introspection To ignore introspection of IPAMD; Pass this flag if DISABLE_INTROSPECTION is enabled on CNI - - --ignore_metrics To ignore prometheus metrics collection; Pass this flag if DISABLE_METRICS enabled on CNI - --help Show this help message. + --ignore_introspection To ignore introspection of IPAMD; Pass this flag if DISABLE_INTROSPECTION is enabled on CNI -Example to Ignore IPAMD introspection: -sudo bash eks-log-collector.sh --ignore_introspection=true + --ignore_metrics Variable To ignore prometheus metrics collection; Pass this flag if DISABLE_METRICS enabled on CNI -Example to Ignore IPAMD Prometheus metrics collection: -sudo bash eks-log-collector.sh --ignore_metrics=true - -Example to Ignore IPAMD introspection and Prometheus metrics collection: -sudo bash eks-log-collector.sh --ignore_introspection=true --ignore_metrics=true + --help Show this help message. ``` + #### Example output in normal mode + The following output shows this project running in normal mode. ``` -sudo bash eks-log-collector.sh - - This is version 0.6.1. New versions can be found at https://github.com/awslabs/amazon-eks-ami - -Trying to collect common operating system logs... -Trying to collect kernel logs... -Trying to collect mount points and volume information... -Trying to collect SELinux status... -Trying to collect iptables information... -Trying to collect installed packages... -Trying to collect active system services... -Trying to collect Docker daemon information... -Trying to collect kubelet information... -Trying to collect L-IPAMD information... -Trying to collect sysctls information... -Trying to collect networking infomation... -Trying to collect CNI configuration information... -Trying to collect running Docker containers and gather container data... -Trying to collect Docker daemon logs... -Trying to archive gathered information... - - Done... your bundled logs are located in /var/log/eks_i-0717c9d54b6cfaa19_2020-03-24_0103-UTC_0.6.1.tar.gz +$ sudo bash eks-log-collector.sh + + This is version 0.7.3. New versions can be found at https://github.com/awslabs/amazon-eks-ami/blob/master/log-collector-script/ + +Trying to collect common operating system logs... +Trying to collect kernel logs... +Trying to collect mount points and volume information... +Trying to collect SELinux status... +Trying to collect iptables information... +Trying to collect installed packages... +Trying to collect active system services... +Trying to Collect Containerd daemon information... +Trying to Collect Containerd running information... +Trying to Collect Docker daemon information... + + Warning: The Docker daemon is not running. + +Trying to collect kubelet information... +Trying to collect L-IPAMD introspection information... Trying to collect L-IPAMD prometheus metrics... Trying to collect L-IPAMD checkpoint... +Trying to collect Multus logs if they exist... +Trying to collect sysctls information... +Trying to collect networking infomation... conntrack v1.4.4 (conntrack-tools): 165 flow entries have been shown. + +Trying to collect CNI configuration information... +Trying to collect Docker daemon logs... +Trying to Collect sandbox-image daemon information... +Trying to Collect CPU Throttled Process Information... +Trying to Collect IO Throttled Process Information... +Trying to archive gathered information... + + Done... your bundled logs are located in /var/log/eks_i-XXXXXXXXXXXXXXXXX_2022-12-19_1639-UTC_0.7.3.tar.gz ``` +### Collect EKS logs using SSM agent + +#### To run EKS log collector script on Worker Node(s) and upload the bundle(tar) to a S3 Bucket using SSM agent, please follow below steps -### Collect EKS logs using SSM agent -#### To run EKS log collector script on Worker Node(s) and upload the bundle(tar) to a S3 Bucket using SSM agent, please follow below steps +##### Prerequisites -##### *Prerequisites*: +* Configure AWS CLI on the system where you will run the below commands. The IAM entity (User/Role) should have permissions to run/invoke `aws ssm create-document`, `aws ssm send-command` and `aws ssm get-command-invocation` commands. -* Configure AWS CLI on the system where you will run the below commands. The IAM entity (User/Role) should have permissions to run/invoke `aws ssm send-command` and `get-command-invocation` commands. + * `ssm:CreateDocument` + * `ssm:GetCommandInvocation` + * `ssm:SendCommand` * SSM agent should be installed and running on Worker Node(s). [How to Install SSM Agent link](https://docs.aws.amazon.com/systems-manager/latest/userguide/sysman-manual-agent-install.html) -* Worker Node(s) should have required permissions to communicate with SSM service. IAM managed role `AmazonEC2RoleforSSM` will have all the required permission for SSM agent to run on EC2 instances. The IAM managed role `AmazonEC2RoleforSSM` has `S3:PutObject` permission to all S3 resources. +* Worker Node(s) should have required permissions to communicate with SSM service. IAM managed role `AmazonEC2RoleforSSM` will have all the required permission for SSM agent to run on EC2 instances. The IAM managed role `AmazonEC2RoleforSSM` has `S3:PutObject` permission to all S3 resources. -        *Note:* For more granular control of the IAM permission check [AWS Systems Manager Permissions link ](https://docs.aws.amazon.com/systems-manager/latest/userguide/auth-and-access-control-permissions-reference.html) +*Note:* For more granular control of the IAM permission check [Actions defined by AWS Systems Manager](https://docs.aws.amazon.com/IAM/latest/UserGuide/list_awssystemsmanager.html%23awssystemsmanager-actions-as-permissions) * A S3 bucket location is required which is taken as an input parameter to `aws ssm send-command` command, to which the logs should be pushed. +#### To invoke SSM agent to run EKS log collector script and push bundle to S3 from Worker Node(s) -#### *To invoke SSM agent to run EKS log collector script and push bundle to S3 from Worker Node(s):* +1. Create the SSM document named "EKSLogCollector" using the following commands: -1. Create the SSM document named "EKSLogCollector" using the following commands:
``` -curl -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-ssm-content.json -aws ssm create-document --name "EKSLogCollector" --document-type "Command" --content file://eks-ssm-content.json +aws ssm create-document \ + --name "EKSLogCollectorLinux" \ + --document-type "Command" \ + --content https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-ssm-content.json ``` -2. To execute the bash script in the SSM document and to collect the logs from worker, run the following command:
-``` -aws ssm send-command --instance-ids --document-name "EKSLogCollector" --parameters "bucketName=" --output json + +2. To execute the bash script in the SSM document and to collect the logs from worker, run the following command: + ``` -3. To check the status of SSM command submitted in previous step use the command
-``` -aws ssm get-command-invocation --command-id "" --instance-id "" --output text +aws ssm send-command \ + --instance-ids \ + --document-name "EKSLogCollectorLinux" \ + --parameters "bucketName=" \ + --output json ``` -    `SSM command ID`One of the response parameters after running `aws ssm send-command` in step2
-    `EC2 Instance ID`The EC2 Instance ID provided in the `aws ssm send-command` in step2 -4. Once the above command is executed successfully, the logs should be present in the S3 bucket specified in the previous step. +3. To check the status of SSM command submitted in previous step use the command + +``` +aws ssm get-command-invocation \ + --command-id "" \ + --instance-id "" \ + --output text +``` +4. Once the above command is executed successfully, the logs should be present in the S3 bucket specified in the previous step. diff --git a/log-collector-script/linux/eks-ssm-content.json b/log-collector-script/linux/eks-ssm-content.json index a830f5068..42caf4503 100644 --- a/log-collector-script/linux/eks-ssm-content.json +++ b/log-collector-script/linux/eks-ssm-content.json @@ -1,83 +1,43 @@ { - "schemaVersion": "2.2", - "description": "EKS Log Collector", - "parameters": { - "bucketName": { - "type": "String", - "default": "Enabled" - } - }, - "mainSteps": [ - { - "action": "aws:runShellScript", - "name": "PatchLinux", - "precondition": { - "StringEquals": [ - "platformType", - "Linux" - ] - }, - "inputs": { - "runCommand": [ - "curl -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-log-collector.sh", - "echo \"Cleaning old eks-log-collector files\"", - "rm /var/log/eks_i*", - "bash ./eks-log-collector.sh >/dev/null 2>&1", - "echo \"EKS logs collected\"", - "if [ -f /usr/local/bin/aws ]; then", - "echo \"AWS_already_installed\"", - "else", - "echo \"Installing AWSCLI\"", - "curl \"https://s3.amazonaws.com/aws-cli/awscli-bundle.zip\" -o \"awscli-bundle.zip\" >/dev/null 2>&1", - "yum install unzip -y >/dev/null 2>&1", - "unzip awscli-bundle.zip >/dev/null 2>&1", - "./awscli-bundle/install -i /usr/local/aws -b /usr/local/bin/aws", - "echo \"AWSCLI version is\"", - "/usr/local/bin/aws --version", - "fi", - "echo \"Pushing to S3\"", - "/usr/local/bin/aws s3 cp /var/log/eks_i* s3://{{bucketName}}", - "echo \"Logs uploaded to S3\"" - ] - } + "schemaVersion": "2.2", + "description": "EKS Log Collector", + "parameters": { + "bucketName": { + "type": "String", + "default": "Enabled" + } + }, + "mainSteps": [ + { + "action": "aws:runShellScript", + "name": "PatchLinux", + "precondition": { + "StringEquals": ["platformType", "Linux"] }, - { - "precondition": { - "StringEquals": [ - "platformType", - "Windows" - ] - }, - "action": "aws:runPowerShellScript", - "name": "PatchWindows", - "inputs": { - "runCommand": [ - "if (!(Get-Module 'AWSPowerShell')) { ", - " Write-Host 'AWSPowerShell does not exist' ", - " Install-Module -Name AWSPowerShell -Force ", - "} ", - "try { ", - " Write-Host 'Downloading EKS Log collector script' ", - " Invoke-WebRequest -UseBasicParsing 'https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/windows/eks-log-collector.ps1' -OutFile eks-log-collector.ps1 ", - "} ", - "catch { ", - " Write-Host 'Uploaded to S3 failed' ", - " break ", - "} ", - "powershell .\\eks-log-collector.ps1", - "try { ", - " Write-Host 'Pushing to S3' ", - " Write-S3Object -BucketName {{bucketName}} -Folder C:\\log-collector -KeyPrefix eks-log-collector\\ -SearchPattern *.zip ", - " Write-Host 'Logs uploaded to S3' ", - "} ", - "catch { ", - " Write-Host 'Uploaded to S3 failed' ", - " break ", - "} ", - "", - "Remove-Item -Force .\\eks-log-collector.ps1 -ErrorAction Ignore " - ] - } + "inputs": { + "runCommand": [ + "curl -s -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-log-collector.sh", + "echo \"* Cleaning old eks-log-collector files\"", + "rm -vf /var/log/eks_i*", + "bash ./eks-log-collector.sh >/dev/null 2>&1", + "echo \"* EKS logs collected\"", + "if [ -f /bin/aws ]; then", + "echo \"* AWS CLI v2 already installed\"", + "else", + "echo \"* Installing AWS CLI v2\"", + "AWSCLI_DIR=$(mktemp -d)", + "curl \"https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip\" -o \"${AWSCLI_DIR}/awscliv2.zip\" >/dev/null 2>&1", + "yum install unzip -y >/dev/null 2>&1", + "unzip -q ${AWSCLI_DIR}/awscliv2.zip -d ${AWSCLI_DIR} >/dev/null 2>&1", + "${AWSCLI_DIR}/aws/install --bin-dir /bin --update", + "/bin/aws --version", + "fi", + "echo \"* Bucket name: s3://{{bucketName}}\"", + "echo \"* Pushing to S3\"", + "/bin/aws s3 cp /var/log/eks_i* s3://{{bucketName}}", + "echo \"* Logs uploaded to S3\"" + ] } - ] + } + ] } diff --git a/log-collector-script/windows/README.md b/log-collector-script/windows/README.md index b9a1612b8..374a4053b 100644 --- a/log-collector-script/windows/README.md +++ b/log-collector-script/windows/README.md @@ -1,23 +1,28 @@ -### EKS Logs Collector +### EKS Logs Collector (Windows) -This project was created to collect Amazon EKS log files and OS logs for troubleshooting Amazon EKS customer support cases. +This project was created to collect Amazon EKS log files and OS logs for troubleshooting Amazon EKS customer support cases. #### Usage + * Collect EKS logs using SSM agent, jump to below [section](#collect-eks-logs-using-ssm-agent) _(or)_ * Run this project as the Administrator user: + ``` Invoke-WebRequest -OutFile eks-log-collector.ps1 https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/windows/eks-log-collector.ps1 .\eks-log-collector.ps1 ``` #### Example output + The project can be used in normal or Enable/Disable Debug(**Caution: Enable/Disable Debug will restart Docker daemon which would kill running containers**). ``` USAGE: .\eks-log-collector.ps1 ``` + #### Example output in normal mode + The following output shows this project running in normal mode. ``` @@ -65,39 +70,54 @@ Archiving gathered data Done... your bundled logs are located in C:\log-collector\eks_i-0b318f704c74b6ab2_20200101T0620179658Z.zip ``` +### Collect EKS logs using SSM agent + +#### To run EKS log collector script on Worker Node(s) and upload the bundle(tar) to a S3 Bucket using SSM agent, please follow below steps -### Collect EKS logs using SSM agent -#### To run EKS log collector script on Worker Node(s) and upload the bundle(tar) to a S3 Bucket using SSM agent, please follow below steps +##### Prerequisites -##### *Prerequisites*: +* Configure AWS CLI on the system where you will run the below commands. The IAM entity (User/Role) should have permissions to run/invoke `aws ssm create-document`, `aws ssm send-command` and `aws ssm get-command-invocation` commands. -* Configure AWS CLI on the system where you will run the below commands. The IAM entity (User/Role) should have permissions to run/invoke `aws ssm send-command` and `get-command-invocation` commands. + * `ssm:CreateDocument` + * `ssm:GetCommandInvocation` + * `ssm:SendCommand` * SSM agent should be installed and running on Worker Node(s). [How to Install SSM Agent link](https://docs.aws.amazon.com/systems-manager/latest/userguide/sysman-manual-agent-install.html) -* Worker Node(s) should have required permissions to communicate with SSM service. IAM managed role `AmazonEC2RoleforSSM` will have all the required permission for SSM agent to run on EC2 instances. The IAM managed role `AmazonEC2RoleforSSM` has `S3:PutObject` permission to all S3 resources. +* Worker Node(s) should have required permissions to communicate with SSM service. IAM managed role `AmazonEC2RoleforSSM` will have all the required permission for SSM agent to run on EC2 instances. The IAM managed role `AmazonEC2RoleforSSM` has `S3:PutObject` permission to all S3 resources. -        *Note:* For more granular control of the IAM permission check [AWS Systems Manager Permissions link ](https://docs.aws.amazon.com/systems-manager/latest/userguide/auth-and-access-control-permissions-reference.html) +*Note:* For more granular control of the IAM permission check [Actions defined by AWS Systems Manager](https://docs.aws.amazon.com/IAM/latest/UserGuide/list_awssystemsmanager.html%23awssystemsmanager-actions-as-permissions) * A S3 bucket location is required which is taken as an input parameter to `aws ssm send-command` command, to which the logs should be pushed. +#### To invoke SSM agent to run EKS log collector script and push bundle to S3 from Worker Node(s) -#### *To invoke SSM agent to run EKS log collector script and push bundle to S3 from Worker Node(s):* +1. Create the SSM document named "EKSLogCollector" using the following commands: -1. Create the SSM document named "EKSLogCollector" using the following command:
``` -aws ssm create-document --name "EKSLogCollector" --document-type "Command" --content https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/windows/eks-ssm-content.json +aws ssm create-document \ + --name "EKSLogCollectorWindows" \ + --document-type "Command" \ + --content https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/windows/eks-ssm-content.json ``` -2. To execute the bash script in the SSM document and to collect the logs from worker, run the following command:
-``` -aws ssm send-command --instance-ids --document-name "EKSLogCollector" --parameters "bucketName=" --output json + +2. To execute the bash script in the SSM document and to collect the logs from worker, run the following command: + ``` -3. To check the status of SSM command submitted in previous step use the command
-``` -aws ssm get-command-invocation --command-id "" --instance-id "" --output text +aws ssm send-command \ + --instance-ids \ + --document-name "EKSLogCollectorWindows" \ + --parameters "bucketName=" \ + --output json ``` -    `SSM command ID`One of the response parameters after running `aws ssm send-command` in step2
-    `EC2 Instance ID`The EC2 Instance ID provided in the `aws ssm send-command` in step2 -4. Once the above command is executed successfully, the logs should be present in the S3 bucket specified in the previous step. +3. To check the status of SSM command submitted in previous step use the command + +``` +aws ssm get-command-invocation \ + --command-id "" \ + --instance-id "" \ + --output text +``` +4. Once the above command is executed successfully, the logs should be present in the S3 bucket specified in the previous step. diff --git a/log-collector-script/windows/eks-ssm-content.json b/log-collector-script/windows/eks-ssm-content.json index c2f4f4ed5..a3d6360a0 100644 --- a/log-collector-script/windows/eks-ssm-content.json +++ b/log-collector-script/windows/eks-ssm-content.json @@ -1,81 +1,47 @@ { - "schemaVersion":"2.2", - "description":"EKS Log Collector", - "parameters":{ - "bucketName":{ - "type": "String", - "default": "Enabled" - } - }, - "mainSteps":[ - { - "action": "aws:runShellScript", - "name": "PatchLinux", - "precondition": { - "StringEquals": [ - "platformType", - "Linux" - ] - }, - "inputs": { - "runCommand": [ - "curl -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-log-collector.sh", - "bash ./eks-log-collector.sh >/dev/null 2>&1", - "echo \"EKS logs collected\"", - "if [ -f /usr/local/bin/aws ]; then", - "echo \"AWS_already_installed\"", - "else", - "echo \"Installing AWSCLI\"", - "curl \"https://s3.amazonaws.com/aws-cli/awscli-bundle.zip\" -o \"awscli-bundle.zip\" >/dev/null 2>&1", - "yum install unzip -y >/dev/null 2>&1", - "unzip awscli-bundle.zip >/dev/null 2>&1", - "./awscli-bundle/install -i /usr/local/aws -b /usr/local/bin/aws", - "echo \"AWSCLI version is\"", - "/usr/local/bin/aws --version", - "fi", - "echo \"Pushing to S3\"", - "/usr/local/bin/aws s3 cp --recursive /opt/log-collector/ s3://{{bucketName}}", - "echo \"Logs uploaded to S3\"" - ] - } + "schemaVersion": "2.2", + "description": "EKS Log Collector", + "parameters": { + "bucketName": { + "type": "String", + "default": "Enabled" + } + }, + "mainSteps": [ + { + "precondition": { + "StringEquals": ["platformType", "Windows"] }, - { - "precondition": { - "StringEquals": [ - "platformType", - "Windows" - ] - }, - "action": "aws:runPowerShellScript", - "name": "PatchWindows", - "inputs": { - "runCommand": [ - "if (!(Get-Module 'AWSPowerShell')) { ", - " Write-Host 'AWSPowerShell does not exist' ", - " Install-Module -Name AWSPowerShell -Force ", - "} ", - "try { ", - " Write-Host 'Downloading EKS Log collector script' ", - " Invoke-WebRequest -UseBasicParsing 'https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/windows/eks-log-collector.ps1' -OutFile eks-log-collector.ps1 ", - "} ", - "catch { ", - " Write-Host 'Uploaded to S3 failed' ", - " break ", - "} ", - "powershell .\\eks-log-collector.ps1", - "try { ", - " Write-Host 'Pushing to S3' ", - " Write-S3Object -BucketName {{bucketName}} -Folder C:\\log-collector -KeyPrefix eks-log-collector\\ -SearchPattern *.zip ", - " Write-Host 'Logs uploaded to S3' ", - "} ", - "catch { ", - " Write-Host 'Uploaded to S3 failed' ", - " break ", - "} ", - "", - "Remove-Item -Force .\\eks-log-collector.ps1 -ErrorAction Ignore " - ] - } + "action": "aws:runPowerShellScript", + "name": "PatchWindows", + "inputs": { + "runCommand": [ + "if (!(Get-Module 'AWSPowerShell')) { ", + " Write-Host 'AWSPowerShell does not exist' ", + " Install-Module -Name AWSPowerShell -Force ", + "} ", + "try { ", + " Write-Host 'Downloading EKS Log collector script' ", + " Invoke-WebRequest -UseBasicParsing 'https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/windows/eks-log-collector.ps1' -OutFile eks-log-collector.ps1 ", + "} ", + "catch { ", + " Write-Host 'Uploaded to S3 failed' ", + " break ", + "} ", + "powershell .\\eks-log-collector.ps1", + "try { ", + " Write-Host 'Pushing to S3' ", + " Write-S3Object -BucketName {{bucketName}} -Folder C:\\log-collector -KeyPrefix eks-log-collector\\ -SearchPattern *.zip ", + " Write-Host 'Logs uploaded to S3' ", + "} ", + "catch { ", + " Write-Host 'Uploaded to S3 failed' ", + " break ", + "} ", + "", + "Remove-Item -Force .\\eks-log-collector.ps1 -ErrorAction Ignore " + ] } - ] -} \ No newline at end of file + } + ] +} From c0318dae76b5cef29e506b2a6b1cd9a1ee2afb13 Mon Sep 17 00:00:00 2001 From: Steve Hipwell Date: Thu, 26 Jan 2023 19:09:15 +0000 Subject: [PATCH 392/621] Use runtime.slice for containerd runtime (#1051) --- files/bootstrap.sh | 14 +++++++++++--- files/kubelet-containerd.service | 3 +++ files/runtime.slice | 4 ++++ scripts/install-worker.sh | 6 ++++++ 4 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 files/runtime.slice diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 52bf5847d..545ddd1e3 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -490,12 +490,20 @@ if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then sudo mkdir -p /etc/containerd sudo mkdir -p /etc/cni/net.d - if [[ -n "$CONTAINERD_CONFIG_FILE" ]]; then - sudo cp -v $CONTAINERD_CONFIG_FILE /etc/eks/containerd/containerd-config.toml + + sudo mkdir -p /etc/systemd/system/containerd.service.d + printf '[Service]\nSlice=runtime.slice\n' | sudo tee /etc/systemd/system/containerd.service.d/00-runtime-slice.conf + + if [[ -n "${CONTAINERD_CONFIG_FILE}" ]]; then + sudo cp -v "${CONTAINERD_CONFIG_FILE}" /etc/eks/containerd/containerd-config.toml fi - echo "$(jq '.cgroupDriver="systemd"' $KUBELET_CONFIG)" > $KUBELET_CONFIG + sudo sed -i s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g /etc/eks/containerd/containerd-config.toml + echo "$(jq '.cgroupDriver="systemd"' "${KUBELET_CONFIG}")" > "${KUBELET_CONFIG}" + echo "$(jq '.systemReservedCgroup="/system"' "${KUBELET_CONFIG}")" > "${KUBELET_CONFIG}" + echo "$(jq '.kubeReservedCgroup="/runtime"' "${KUBELET_CONFIG}")" > "${KUBELET_CONFIG}" + # Check if the containerd config file is the same as the one used in the image build. # If different, then restart containerd w/ proper config if ! cmp -s /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml; then diff --git a/files/kubelet-containerd.service b/files/kubelet-containerd.service index 74b30f70a..72b58f8b8 100644 --- a/files/kubelet-containerd.service +++ b/files/kubelet-containerd.service @@ -5,6 +5,7 @@ After=containerd.service sandbox-image.service Requires=containerd.service sandbox-image.service [Service] +Slice=runtime.slice ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 ExecStart=/usr/bin/kubelet --cloud-provider $KUBELET_CLOUD_PROVIDER \ --config /etc/kubernetes/kubelet/kubelet-config.json \ @@ -17,6 +18,8 @@ Restart=on-failure RestartForceExitStatus=SIGPIPE RestartSec=5 KillMode=process +CPUAccounting=true +MemoryAccounting=true [Install] WantedBy=multi-user.target diff --git a/files/runtime.slice b/files/runtime.slice new file mode 100644 index 000000000..5e189639a --- /dev/null +++ b/files/runtime.slice @@ -0,0 +1,4 @@ +[Unit] +Description=Kubernetes and container runtime slice +Documentation=man:systemd.special(7) +Before=slices.target diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index cf1008410..ee59a90e4 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -141,6 +141,12 @@ else sudo yum install -y awscli fi +################################################################################ +### systemd #################################################################### +################################################################################ + +sudo mv "${TEMPLATE_DIR}/runtime.slice" /etc/systemd/system/runtime.slice + ############################################################################### ### Containerd setup ########################################################## ############################################################################### From d498b68df8ae61361782595275bb9f121b2d67b3 Mon Sep 17 00:00:00 2001 From: Jeffrey Nelson Date: Thu, 26 Jan 2023 17:02:28 -0600 Subject: [PATCH 393/621] Update max pods values (#1153) --- files/eni-max-pods.txt | 52 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index a8de14260..889e6b339 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2022-09-21T13:34:09-07:00 +# This file was generated at 2023-01-25T20:06:57Z # # The regions queried were: # - ap-northeast-1 @@ -158,6 +158,15 @@ c6id.8xlarge 234 c6id.large 29 c6id.metal 737 c6id.xlarge 58 +c6in.12xlarge 234 +c6in.16xlarge 737 +c6in.24xlarge 737 +c6in.2xlarge 58 +c6in.32xlarge 345 +c6in.4xlarge 234 +c6in.8xlarge 234 +c6in.large 29 +c6in.xlarge 58 c7g.12xlarge 234 c7g.16xlarge 737 c7g.2xlarge 58 @@ -209,7 +218,7 @@ g5.12xlarge 737 g5.16xlarge 234 g5.24xlarge 737 g5.2xlarge 58 -g5.48xlarge 737 +g5.48xlarge 345 g5.4xlarge 234 g5.8xlarge 234 g5.xlarge 58 @@ -224,6 +233,7 @@ h1.2xlarge 58 h1.4xlarge 234 h1.8xlarge 234 hpc6a.48xlarge 100 +hpc6id.32xlarge 51 hs1.8xlarge 234 i2.2xlarge 58 i2.4xlarge 234 @@ -393,6 +403,24 @@ m6id.8xlarge 234 m6id.large 29 m6id.metal 737 m6id.xlarge 58 +m6idn.12xlarge 234 +m6idn.16xlarge 737 +m6idn.24xlarge 737 +m6idn.2xlarge 58 +m6idn.32xlarge 345 +m6idn.4xlarge 234 +m6idn.8xlarge 234 +m6idn.large 29 +m6idn.xlarge 58 +m6in.12xlarge 234 +m6in.16xlarge 737 +m6in.24xlarge 737 +m6in.2xlarge 58 +m6in.32xlarge 345 +m6in.4xlarge 234 +m6in.8xlarge 234 +m6in.large 29 +m6in.xlarge 58 mac1.metal 234 mac2.metal 234 p2.16xlarge 234 @@ -525,6 +553,24 @@ r6id.8xlarge 234 r6id.large 29 r6id.metal 737 r6id.xlarge 58 +r6idn.12xlarge 234 +r6idn.16xlarge 737 +r6idn.24xlarge 737 +r6idn.2xlarge 58 +r6idn.32xlarge 345 +r6idn.4xlarge 234 +r6idn.8xlarge 234 +r6idn.large 29 +r6idn.xlarge 58 +r6in.12xlarge 234 +r6in.16xlarge 737 +r6in.24xlarge 737 +r6in.2xlarge 58 +r6in.32xlarge 345 +r6in.4xlarge 234 +r6in.8xlarge 234 +r6in.large 29 +r6in.xlarge 58 t1.micro 4 t2.2xlarge 44 t2.large 35 @@ -558,7 +604,9 @@ trn1.2xlarge 58 trn1.32xlarge 247 u-12tb1.112xlarge 737 u-12tb1.metal 147 +u-18tb1.112xlarge 737 u-18tb1.metal 737 +u-24tb1.112xlarge 737 u-24tb1.metal 737 u-3tb1.56xlarge 234 u-6tb1.112xlarge 737 From 292239bb94297daa539f3413f54eb9433f4db3d4 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 31 Jan 2023 10:50:27 -0800 Subject: [PATCH 394/621] Skip docker installation for Kubernetes 1.25+ (#1157) --- scripts/install-worker.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index ee59a90e4..c68af36cf 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -203,7 +203,12 @@ EOF sudo yum install -y device-mapper-persistent-data lvm2 -INSTALL_DOCKER="${INSTALL_DOCKER:-true}" +if [[ ! -v "INSTALL_DOCKER" ]]; then + INSTALL_DOCKER=$(vercmp "$KUBERNETES_VERSION" lt "1.25.0" || true) +else + echo "WARNING: using override INSTALL_DOCKER=${INSTALL_DOCKER}. This option is deprecated and will be removed in a future release." +fi + if [[ "$INSTALL_DOCKER" == "true" ]]; then sudo amazon-linux-extras enable docker sudo groupadd -og 1950 docker From 7f022ffd60d841c2123103b65831cc2af7b63616 Mon Sep 17 00:00:00 2001 From: Sichaow Date: Tue, 31 Jan 2023 13:47:30 -0800 Subject: [PATCH 395/621] Update kubelet version and build date to 2023-01-11 (#1160) --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 1f74fd28d..3d25473cd 100644 --- a/Makefile +++ b/Makefile @@ -82,19 +82,19 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI .PHONY: 1.21 1.21: ## Build EKS Optimized AL2 AMI - K8s 1.21 - $(MAKE) k8s kubernetes_version=1.21.14 kubernetes_build_date=2022-10-31 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.21.14 kubernetes_build_date=2023-01-11 pull_cni_from_github=true .PHONY: 1.22 1.22: ## Build EKS Optimized AL2 AMI - K8s 1.22 - $(MAKE) k8s kubernetes_version=1.22.15 kubernetes_build_date=2022-10-31 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.22.17 kubernetes_build_date=2023-01-11 pull_cni_from_github=true .PHONY: 1.23 1.23: ## Build EKS Optimized AL2 AMI - K8s 1.23 - $(MAKE) k8s kubernetes_version=1.23.13 kubernetes_build_date=2022-10-31 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.23.15 kubernetes_build_date=2023-01-11 pull_cni_from_github=true .PHONY: 1.24 1.24: ## Build EKS Optimized AL2 AMI - K8s 1.24 - $(MAKE) k8s kubernetes_version=1.24.7 kubernetes_build_date=2022-10-31 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.24.9 kubernetes_build_date=2023-01-11 pull_cni_from_github=true .PHONY: help help: ## Display help From 1eed59e5b02dccc2796255fe2414f9d68bcd29bb Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 31 Jan 2023 15:11:58 -0800 Subject: [PATCH 396/621] Uses IMDSv2 in log collector script (#1163) --- log-collector-script/linux/eks-log-collector.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index e7d45f9d1..50f759461 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -177,6 +177,9 @@ systemd_check() { fi } +# Get token for IMDSv2 calls +IMDS_TOKEN=$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 360") + create_directories() { # Make sure the directory the script lives in is there. Not an issue if # the EKS AMI is used, as it will have it. @@ -195,7 +198,7 @@ get_instance_id() { cp ${INSTANCE_ID_FILE} "${COLLECT_DIR}"/system/instance-id.txt readonly INSTANCE_ID=$(cat "${COLLECT_DIR}"/system/instance-id.txt) else - readonly INSTANCE_ID=$(curl -f -s --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/instance-id) + readonly INSTANCE_ID=$(curl -H "X-aws-ec2-metadata-token: $IMDS_TOKEN" -f -s --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/instance-id) if [ 0 -eq $? ]; then # Check if previous command was successful. echo "${INSTANCE_ID}" > "${COLLECT_DIR}"/system/instance-id.txt else @@ -205,13 +208,13 @@ get_instance_id() { } get_region() { - if REGION=$(curl -f -s --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/placement/region); then + if REGION=$(curl -H "X-aws-ec2-metadata-token: $IMDS_TOKEN" -f -s --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/placement/region); then echo "${REGION}" > "${COLLECT_DIR}"/system/region.txt else warning "Unable to find EC2 Region, skipping." fi - if AZ=$(curl -f -s --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/placement/availability-zone); then + if AZ=$(curl -H "X-aws-ec2-metadata-token: $IMDS_TOKEN" -f -s --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/placement/availability-zone); then echo "${AZ}" > "${COLLECT_DIR}"/system/availability-zone.txt else warning "Unable to find EC2 AZ, skipping." From 343e830dc4cbf0b2646026fced4cea7202a345a5 Mon Sep 17 00:00:00 2001 From: Sichaow Date: Wed, 1 Feb 2023 18:01:09 -0800 Subject: [PATCH 397/621] update CHANGELOG for AMI Release v20230127 (#1165) * Update CHANGELOG for AMI Release v20230127 * Update CHANGELOG for AMI Release v20230127 --- CHANGELOG.md | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c2d77b6f..760784636 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,52 @@ # Changelog +### AMI Release v20230127 +* amazon-eks-gpu-node-1.24-v20230127 +* amazon-eks-gpu-node-1.23-v20230127 +* amazon-eks-gpu-node-1.22-v20230127 +* amazon-eks-gpu-node-1.21-v20230127 +* amazon-eks-arm64-node-1.24-v20230127 +* amazon-eks-arm64-node-1.23-v20230127 +* amazon-eks-arm64-node-1.22-v20230127 +* amazon-eks-arm64-node-1.21-v20230127 +* amazon-eks-node-1.24-v20230127 +* amazon-eks-node-1.23-v20230127 +* amazon-eks-node-1.22-v20230127 +* amazon-eks-node-1.21-v20230127 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.24.9-20230127` +* `1.23.15-20230127` +* `1.22.17-20230127` +* `1.21.14-20230127` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.24.9/2023-01-11/ +* s3://amazon-eks/1.23.15/2023-01-11/ +* s3://amazon-eks/1.22.17/2023-01-11/ +* s3://amazon-eks/1.21.14/2023-01-11/ + +AMI details: +* kernel: 5.4.228-131.415.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: 1.1.4-1.amzn2 +* cuda: 11.4.0-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +- Updated kernel version to `5.4.228-131.415.amzn2` for: + - [ALAS2KERNEL-5.4-2023-041](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2023-041.html). +- Add support for `C6in`, `M6in`, `M6idn`, `R6in`, `R6idn` and `Hpc6id` instances [#1153](https://github.com/awslabs/amazon-eks-ami/pull/1153) +- This is the first AMI release available in `ap-south-2`, `eu-central-2`, and `eu-south-2`. +- Cache image content without unpacking/snapshotting [#1144](https://github.com/awslabs/amazon-eks-ami/pull/1144) + - Container image caching has been re-enabled for 1.24 AMI's. + +Minor changes: +- Update AWS CLI to `2.9.18` +- Configure containerd registry certificates by default in the GPU AMI. + ### AMI Release v20230105 * amazon-eks-gpu-node-1.24-v20230105 * amazon-eks-gpu-node-1.23-v20230105 From aafd2c6dfbb7c1992be9ff64edee7c74d2cd0b23 Mon Sep 17 00:00:00 2001 From: Sichaow Date: Thu, 2 Feb 2023 14:51:12 -0800 Subject: [PATCH 398/621] Add C7g.metal, M7g, R7g instance (#1166) --- files/eni-max-pods.txt | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 889e6b339..6bef4aed6 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2023-01-25T20:06:57Z +# This file was generated at 2023-02-02T10:15:26-08:00 # # The regions queried were: # - ap-northeast-1 @@ -421,6 +421,15 @@ m6in.4xlarge 234 m6in.8xlarge 234 m6in.large 29 m6in.xlarge 58 +m7g.12xlarge 234 +m7g.16xlarge 737 +m7g.2xlarge 58 +m7g.4xlarge 234 +m7g.8xlarge 234 +m7g.large 29 +m7g.medium 8 +m7g.metal 737 +m7g.xlarge 58 mac1.metal 234 mac2.metal 234 p2.16xlarge 234 @@ -571,6 +580,15 @@ r6in.4xlarge 234 r6in.8xlarge 234 r6in.large 29 r6in.xlarge 58 +r7g.12xlarge 234 +r7g.16xlarge 737 +r7g.2xlarge 58 +r7g.4xlarge 234 +r7g.8xlarge 234 +r7g.large 29 +r7g.medium 8 +r7g.metal 737 +r7g.xlarge 58 t1.micro 4 t2.2xlarge 44 t2.large 35 From 4b9b546dc325e6372e705f1e192f68395ce017db Mon Sep 17 00:00:00 2001 From: Victor Pineda Gonzalez Date: Thu, 2 Feb 2023 16:04:16 -0800 Subject: [PATCH 399/621] Revert "Use external cloud provider for EKS Local deployments" (#1167) This reverts commit 2297119cee8466d0c4516e775cd030bcf01971bd. --- files/bootstrap.sh | 10 ---------- files/kubelet-containerd.service | 2 +- files/kubelet.service | 2 +- 3 files changed, 2 insertions(+), 12 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 545ddd1e3..f24595cc6 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -376,13 +376,8 @@ if [[ "${ENABLE_LOCAL_OUTPOST}" == "true" ]]; then mv /var/lib/kubelet/kubeconfig /var/lib/kubelet/bootstrap-kubeconfig KUBELET_EXTRA_ARGS="--bootstrap-kubeconfig /var/lib/kubelet/bootstrap-kubeconfig $KUBELET_EXTRA_ARGS" fi - ### For Local Outpost deployments, we will use the the external cloud provider - KUBELET_CLOUD_PROVIDER="external" else sed -i s,CLUSTER_NAME,$CLUSTER_NAME,g /var/lib/kubelet/kubeconfig - - ### For any other type of deployment we will use the aws cloud provider for backwards compatibility - KUBELET_CLOUD_PROVIDER="aws" fi ### kubelet.service configuration @@ -467,11 +462,6 @@ cat << EOF > /etc/systemd/system/kubelet.service.d/10-kubelet-args.conf Environment='KUBELET_ARGS=--node-ip=$INTERNAL_IP --pod-infra-container-image=$PAUSE_CONTAINER --v=2' EOF -cat << EOF > /etc/systemd/system/kubelet.service.d/20-kubelet-cloud-provider.conf -[Service] -Environment='KUBELET_CLOUD_PROVIDER=$KUBELET_CLOUD_PROVIDER' -EOF - if [[ -n "$KUBELET_EXTRA_ARGS" ]]; then cat << EOF > /etc/systemd/system/kubelet.service.d/30-kubelet-extra-args.conf [Service] diff --git a/files/kubelet-containerd.service b/files/kubelet-containerd.service index 72b58f8b8..d043b30b9 100644 --- a/files/kubelet-containerd.service +++ b/files/kubelet-containerd.service @@ -7,7 +7,7 @@ Requires=containerd.service sandbox-image.service [Service] Slice=runtime.slice ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 -ExecStart=/usr/bin/kubelet --cloud-provider $KUBELET_CLOUD_PROVIDER \ +ExecStart=/usr/bin/kubelet --cloud-provider aws \ --config /etc/kubernetes/kubelet/kubelet-config.json \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime remote \ diff --git a/files/kubelet.service b/files/kubelet.service index c650d491a..387470da1 100644 --- a/files/kubelet.service +++ b/files/kubelet.service @@ -6,7 +6,7 @@ Requires=docker.service [Service] ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 -ExecStart=/usr/bin/kubelet --cloud-provider $KUBELET_CLOUD_PROVIDER \ +ExecStart=/usr/bin/kubelet --cloud-provider aws \ --config /etc/kubernetes/kubelet/kubelet-config.json \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime docker \ From 3fb401098f20541c15a61e1e0937981f1e7ea077 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 3 Feb 2023 09:40:56 -0800 Subject: [PATCH 400/621] Add ALAS issue workflow (#1158) --- .github/workflows/alas-issues.yaml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 .github/workflows/alas-issues.yaml diff --git a/.github/workflows/alas-issues.yaml b/.github/workflows/alas-issues.yaml new file mode 100644 index 000000000..d71611bdc --- /dev/null +++ b/.github/workflows/alas-issues.yaml @@ -0,0 +1,26 @@ +--- +name: "[ALAS] Open issues for new bulletins" +on: + workflow_dispatch: + inputs: + window: + description: "Only consider bulletins published within this relative time window (golang Duration)" + default: "24h" + required: true + schedule: + # once an hour, at the top of hour + - cron: "0 * * * *" +permissions: + issues: write +jobs: + alas-al2-bulletins: + runs-on: ubuntu-latest + steps: + - uses: guilhem/rss-issues-action@0.5.2 + with: + repo-token: "${{ secrets.GITHUB_TOKEN }}" + feed: "https://alas.aws.amazon.com/AL2/alas.rss" + dry-run: "true" + lastTime: "${{ github.event.inputs.window || '24h' }}" + labels: "alas,alas/al2" + titleFilter: "(medium|low)" From ccf5b1525dfa7a17f5cae21f8d8395a6f21b45d8 Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 6 Feb 2023 16:02:09 -0800 Subject: [PATCH 401/621] Upgrade 1.24 to kernel 5.10 (#1118) --- doc/USER_GUIDE.md | 11 ++++++++++- eks-worker-al2.json | 27 +++++++++++++++++---------- scripts/install-worker.sh | 7 ------- scripts/upgrade_kernel.sh | 9 ++++++--- 4 files changed, 33 insertions(+), 21 deletions(-) diff --git a/doc/USER_GUIDE.md b/doc/USER_GUIDE.md index 0ed73532b..c565cff4d 100644 --- a/doc/USER_GUIDE.md +++ b/doc/USER_GUIDE.md @@ -249,7 +249,16 @@ $ curl -sSL "http://localhost:8001/api/v1/nodes/ip-192-168-92-220.us-east-2.comp By default, the `amazon-eks-ami` uses a [source_ami_filter](https://github.com/awslabs/amazon-eks-ami/blob/e3f1b910f83ad1f27e68312e50474ea6059f052d/eks-worker-al2.json#L46) that selects the latest [hvm](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/virtualization_types.html) AL2 AMI for the given architecture as the base AMI. For more information on what kernel versions are running on published Amazon EKS optimized Linux AMIs, see [the public documentation](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html). -When building an AMI, you can set the `kernel_version` to `4.14` or `5.4` to customize the kernel version. The [upgrade_kernel.sh script](https://github.com/awslabs/amazon-eks-ami/blob/master/scripts/upgrade_kernel.sh#L26) contains the logic for updating and upgrading the kernel. For Kubernetes versions 1.18 and below, it uses the `4.14` kernel if not set, and it will install the latest patches. For Kubernetes version 1.19 and above, it uses the `5.4` kernel if not set. +When building an AMI, you can set `kernel_version` to customize the kernel version. Valid values are: +- `4.14` +- `5.4` +- `5.10` + +If `kernel_version` is not set: +- For Kubernetes 1.23 and below, `5.4` is used. +- For Kubernetes 1.24 and above, `5.10` is used. + +The [upgrade_kernel.sh script](../scripts/upgrade_kernel.sh) contains the logic for updating and upgrading the kernel. --- diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 020758d36..873978909 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -119,16 +119,6 @@ "ADDITIONAL_YUM_REPOS={{user `additional_yum_repos`}}" ] }, - { - "type": "shell", - "remote_folder": "{{ user `remote_folder`}}", - "expect_disconnect": true, - "script": "{{template_dir}}/scripts/upgrade_kernel.sh", - "environment_vars": [ - "KUBERNETES_VERSION={{user `kubernetes_version`}}", - "KERNEL_VERSION={{user `kernel_version`}}" - ] - }, { "type": "shell", "pause_before": "90s", @@ -147,6 +137,23 @@ "source": "{{template_dir}}/log-collector-script/linux/", "destination": "/tmp/worker/log-collector-script/" }, + { + "type": "shell", + "inline": [ + "sudo chmod -R a+x /tmp/worker/bin/", + "sudo mv /tmp/worker/bin/* /usr/bin/" + ] + }, + { + "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", + "expect_disconnect": true, + "script": "{{template_dir}}/scripts/upgrade_kernel.sh", + "environment_vars": [ + "KUBERNETES_VERSION={{user `kubernetes_version`}}", + "KERNEL_VERSION={{user `kernel_version`}}" + ] + }, { "type": "shell", "remote_folder": "{{ user `remote_folder`}}", diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index c68af36cf..56524243b 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -48,13 +48,6 @@ else exit 1 fi -################################################################################ -### Utilities ################################################################## -################################################################################ - -sudo chmod -R a+x $TEMPLATE_DIR/bin/ -sudo mv $TEMPLATE_DIR/bin/* /usr/bin/ - ################################################################################ ### Packages ################################################################### ################################################################################ diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh index 5736d0f5c..134bd454b 100755 --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -5,9 +5,12 @@ set -o nounset set -o errexit if [[ -z "$KERNEL_VERSION" ]]; then - KERNEL_VERSION=5.4 - - echo "kernel_version is unset. Setting to $KERNEL_VERSION" + if vercmp "$KUBERNETES_VERSION" gteq "1.24.0"; then + KERNEL_VERSION=5.10 + else + KERNEL_VERSION=5.4 + fi + echo "kernel_version is unset. Setting to $KERNEL_VERSION based on Kubernetes version $KUBERNETES_VERSION." fi if [[ $KERNEL_VERSION == "4.14" ]]; then From 671ce3f9a50bb8b2a1ffd92b772dc5991d15a8d9 Mon Sep 17 00:00:00 2001 From: Carter McKinnon Date: Tue, 31 Jan 2023 13:38:53 -0800 Subject: [PATCH 402/621] Enable PSI --- scripts/upgrade_kernel.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh index 134bd454b..85390785b 100755 --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -24,4 +24,9 @@ else exit 1 fi +# enable pressure stall information +sudo grubby \ + --update-kernel=ALL \ + --args="psi=1" + sudo reboot From 61f45dde61d7343f3fe1b0db22761adec9fff12f Mon Sep 17 00:00:00 2001 From: Carter McKinnon Date: Mon, 6 Feb 2023 18:52:30 -0800 Subject: [PATCH 403/621] Pause after kernel upgrade --- eks-worker-al2.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 873978909..33c37ed38 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -121,7 +121,6 @@ }, { "type": "shell", - "pause_before": "90s", "remote_folder": "{{ user `remote_folder`}}", "inline": [ "mkdir -p /tmp/worker/log-collector-script/" @@ -148,6 +147,7 @@ "type": "shell", "remote_folder": "{{ user `remote_folder`}}", "expect_disconnect": true, + "pause_after": "90s", "script": "{{template_dir}}/scripts/upgrade_kernel.sh", "environment_vars": [ "KUBERNETES_VERSION={{user `kubernetes_version`}}", From da96ebc848a5c6de3c49e22b4401b72a54668306 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 7 Feb 2023 10:16:15 -0800 Subject: [PATCH 404/621] Add 1.25 make target (#1173) --- Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3d25473cd..c3c9cd455 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.21 1.22 1.23 1.24 ## Build all versions of EKS Optimized AL2 AMI +all: 1.21 1.22 1.23 1.24 1.25 ## Build all versions of EKS Optimized AL2 AMI # ensure that these flags are equivalent to the rules in the .editorconfig SHFMT_FLAGS := --list \ @@ -96,6 +96,10 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI 1.24: ## Build EKS Optimized AL2 AMI - K8s 1.24 $(MAKE) k8s kubernetes_version=1.24.9 kubernetes_build_date=2023-01-11 pull_cni_from_github=true +.PHONY: 1.25 +1.25: ## Build EKS Optimized AL2 AMI - K8s 1.25 + $(MAKE) k8s kubernetes_version=1.25.5 kubernetes_build_date=2023-01-11 pull_cni_from_github=true + .PHONY: help help: ## Display help @awk 'BEGIN {FS = ":.*##"; printf "Usage:\n make \033[36m\033[0m\n"} /^[\.a-zA-Z_0-9\-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) From ecc413a4d69b7cf14333b67a853b233a3c073dba Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 7 Feb 2023 10:19:05 -0800 Subject: [PATCH 405/621] Exclude docker from 1.25+ AMI metadata (#1174) --- Makefile | 8 ++++++++ eks-worker-al2-variables.json | 1 + eks-worker-al2.json | 3 ++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c3c9cd455..d52909999 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,14 @@ K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS MAKEFILE_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) +# Docker is not present on 1.25+ AMI's +ifeq ($(shell $(MAKEFILE_DIR)/files/bin/vercmp "$(kubernetes_version)" gteq "1.25.0"), true) +# do not tag the AMI with the Docker version +docker_version ?= none +# do not include the Docker version in the AMI description +ami_component_description ?= (k8s: {{ user `kubernetes_version` }}, containerd: {{ user `containerd_version` }}) +endif + arch ?= x86_64 ifeq ($(arch), arm64) instance_type ?= m6g.large diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index 2685e5aa1..b83543f49 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -1,5 +1,6 @@ { "additional_yum_repos": "", + "ami_component_description": "(k8s: {{ user `kubernetes_version` }}, docker: {{ user `docker_version` }}, containerd: {{ user `containerd_version` }})", "ami_description": "EKS Kubernetes Worker AMI with AmazonLinux2 image", "ami_regions": "", "ami_users": "", diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 33c37ed38..1f805c85b 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -2,6 +2,7 @@ "_comment": "All template variables are enumerated here; and most variables have a default value defined in eks-worker-al2-variables.json", "variables": { "additional_yum_repos": null, + "ami_component_description": null, "ami_description": null, "ami_name": null, "ami_regions": null, @@ -107,7 +108,7 @@ "cni_plugin_version": "{{ user `cni_plugin_version`}}" }, "ami_name": "{{user `ami_name`}}", - "ami_description": "{{ user `ami_description` }}, (k8s: {{ user `kubernetes_version` }}, docker: {{ user `docker_version` }}, containerd: {{ user `containerd_version` }})" + "ami_description": "{{ user `ami_description` }}, {{ user `ami_component_description` }}" } ], "provisioners": [ From 613785695a83cda7a9a785750e6e29ddbde72e4f Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 7 Feb 2023 11:16:22 -0800 Subject: [PATCH 406/621] Update CHANGELOG for release v20230203 (#1170) --- CHANGELOG.md | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 760784636..8037f146d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,43 @@ # Changelog +### AMI Release v20230203 +* amazon-eks-gpu-node-1.24-v20230203 +* amazon-eks-gpu-node-1.23-v20230203 +* amazon-eks-gpu-node-1.22-v20230203 +* amazon-eks-gpu-node-1.21-v20230203 +* amazon-eks-arm64-node-1.24-v20230203 +* amazon-eks-arm64-node-1.23-v20230203 +* amazon-eks-arm64-node-1.22-v20230203 +* amazon-eks-arm64-node-1.21-v20230203 +* amazon-eks-node-1.24-v20230203 +* amazon-eks-node-1.23-v20230203 +* amazon-eks-node-1.22-v20230203 +* amazon-eks-node-1.21-v20230203 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.24.9-20230203` +* `1.23.15-20230203` +* `1.22.17-20230203` +* `1.21.14-20230203` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.24.9/2023-01-11/ +* s3://amazon-eks/1.23.15/2023-01-11/ +* s3://amazon-eks/1.22.17/2023-01-11/ +* s3://amazon-eks/1.21.14/2023-01-11/ + +AMI details: +* kernel: 5.4.228-131.415.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: 1.1.4-1.amzn2 +* cuda: 11.4.0-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +* Reverted [Use external cloud provider for EKS Local deployments](https://github.com/awslabs/amazon-eks-ami/commit/4b9b546dc325e6372e705f1e192f68395ce017db) + ### AMI Release v20230127 * amazon-eks-gpu-node-1.24-v20230127 * amazon-eks-gpu-node-1.23-v20230127 From 24f908124120b58bad54d023e71f33f345b98068 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 7 Feb 2023 13:30:39 -0800 Subject: [PATCH 407/621] Allow any KERNEL_VERSION (#1175) --- scripts/upgrade_kernel.sh | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh index 85390785b..67e509caa 100755 --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -15,13 +15,8 @@ fi if [[ $KERNEL_VERSION == "4.14" ]]; then sudo yum update -y kernel -elif [[ $KERNEL_VERSION == "5.4" ]]; then - sudo amazon-linux-extras install -y kernel-5.4 -elif [[ $KERNEL_VERSION == "5.10" ]]; then - sudo amazon-linux-extras install -y kernel-5.10 else - echo "$KERNEL_VERSION is not a valid kernel version" - exit 1 + sudo amazon-linux-extras install -y "kernel-${KERNEL_VERSION}" fi # enable pressure stall information From 58bdc867afbd7808a6f78ebb1141fa33264946fd Mon Sep 17 00:00:00 2001 From: Sichaow Date: Tue, 7 Feb 2023 16:19:25 -0800 Subject: [PATCH 408/621] Version lock kernel (#1177) * Version lock header * Version lock header * Version lock header --- scripts/install-worker.sh | 1 - scripts/upgrade_kernel.sh | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 56524243b..56ae744d1 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -68,7 +68,6 @@ sudo yum install -y \ socat \ unzip \ wget \ - yum-plugin-versionlock \ yum-utils # Remove any old kernel versions. `--count=1` here means "only leave 1 kernel version installed" diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh index 67e509caa..37946283a 100755 --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -24,4 +24,7 @@ sudo grubby \ --update-kernel=ALL \ --args="psi=1" +sudo yum install -y yum-plugin-versionlock +sudo yum versionlock kernel + sudo reboot From e32d36834383f4bdfd3876af059728f54e98f7a1 Mon Sep 17 00:00:00 2001 From: Sichaow Date: Wed, 8 Feb 2023 14:24:41 -0800 Subject: [PATCH 409/621] Update max pods (#1178) --- files/eni-max-pods.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 6bef4aed6..d7aa1db4c 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2023-02-02T10:15:26-08:00 +# This file was generated at 2023-02-07T22:38:44Z # # The regions queried were: # - ap-northeast-1 @@ -643,6 +643,12 @@ x1e.32xlarge 234 x1e.4xlarge 58 x1e.8xlarge 58 x1e.xlarge 29 +x2ezn.12xlarge 737 +x2ezn.2xlarge 58 +x2ezn.4xlarge 234 +x2ezn.6xlarge 234 +x2ezn.8xlarge 234 +x2ezn.metal 737 x2gd.12xlarge 234 x2gd.16xlarge 737 x2gd.2xlarge 58 From 1f8bb313a21fbec2c536799557a3c742e1276323 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 15 Feb 2023 11:15:39 -0800 Subject: [PATCH 410/621] Add Action to generate eni-max-pods.txt (#1184) --- sync-eni-max-pods.yaml | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 sync-eni-max-pods.yaml diff --git a/sync-eni-max-pods.yaml b/sync-eni-max-pods.yaml new file mode 100644 index 000000000..2d8e22ed5 --- /dev/null +++ b/sync-eni-max-pods.yaml @@ -0,0 +1,40 @@ +name: '[Sync] Update eni-max-pods.txt' +on: + workflow_dispatch: + schedule: + # once a day + - cron: '0 0 * * *' +permissions: + id-token: write + contents: write + pull-requests: write +jobs: + update-max-pods: + runs-on: ubuntu-latest + steps: + - uses: aws-actions/configure-aws-credentials@v1 + with: + aws-region: ${{ secrets.AWS_REGION }} + role-to-assume: ${{ secrets.AWS_ROLE_ARN }} + - uses: actions/checkout@v3 + with: + repository: awslabs/amazon-eks-ami + ref: refs/heads/master + path: amazon-eks-ami/ + - uses: actions/checkout@v3 + with: + repository: aws/amazon-vpc-cni-k8s + ref: refs/heads/master + path: amazon-vpc-cni-k8s/ + - run: | + #!/usr/bin/env bash + set -o errexit + cd amazon-vpc-cni-k8s/ + make generate-limits + cp misc/eni-max-pods.txt ../amazon-eks-ami/files/eni-max-pods.txt + - uses: peter-evans/create-pull-request@v4 + with: + commit-message: "Update eni-max-pods.txt" + branch: update-eni-max-pods + path: amazon-eks-ami/ + add-paths: files/eni-max-pods.txt From 4319d222984c0b58e0c12e3c8b4bfe5e74c9cbf8 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 15 Feb 2023 11:21:24 -0800 Subject: [PATCH 411/621] Move eni-max-pods.txt Action to correct location (#1186) --- .../workflows/sync-eni-max-pods.yaml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename sync-eni-max-pods.yaml => .github/workflows/sync-eni-max-pods.yaml (100%) diff --git a/sync-eni-max-pods.yaml b/.github/workflows/sync-eni-max-pods.yaml similarity index 100% rename from sync-eni-max-pods.yaml rename to .github/workflows/sync-eni-max-pods.yaml From c242245b6c4a29ac44a405ef2d6949fa5f49dd7c Mon Sep 17 00:00:00 2001 From: camrakin <113552683+camrakin@users.noreply.github.com> Date: Wed, 15 Feb 2023 12:54:27 -0800 Subject: [PATCH 412/621] Updating kubelet versions and build date to 1-30 (#1187) --- Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index d52909999..c76a0dc57 100644 --- a/Makefile +++ b/Makefile @@ -90,23 +90,23 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI .PHONY: 1.21 1.21: ## Build EKS Optimized AL2 AMI - K8s 1.21 - $(MAKE) k8s kubernetes_version=1.21.14 kubernetes_build_date=2023-01-11 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.21.14 kubernetes_build_date=2023-01-30 pull_cni_from_github=true .PHONY: 1.22 1.22: ## Build EKS Optimized AL2 AMI - K8s 1.22 - $(MAKE) k8s kubernetes_version=1.22.17 kubernetes_build_date=2023-01-11 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.22.17 kubernetes_build_date=2023-01-30 pull_cni_from_github=true .PHONY: 1.23 1.23: ## Build EKS Optimized AL2 AMI - K8s 1.23 - $(MAKE) k8s kubernetes_version=1.23.15 kubernetes_build_date=2023-01-11 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.23.16 kubernetes_build_date=2023-01-30 pull_cni_from_github=true .PHONY: 1.24 1.24: ## Build EKS Optimized AL2 AMI - K8s 1.24 - $(MAKE) k8s kubernetes_version=1.24.9 kubernetes_build_date=2023-01-11 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.24.10 kubernetes_build_date=2023-01-30 pull_cni_from_github=true .PHONY: 1.25 1.25: ## Build EKS Optimized AL2 AMI - K8s 1.25 - $(MAKE) k8s kubernetes_version=1.25.5 kubernetes_build_date=2023-01-11 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.25.6 kubernetes_build_date=2023-01-30 pull_cni_from_github=true .PHONY: help help: ## Display help From 3a6a0c8df2c2025801319fff562f7ea50835e167 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 16 Feb 2023 11:33:34 -0800 Subject: [PATCH 413/621] Set title and body of eni-max-pods PR's (#1190) --- .github/workflows/sync-eni-max-pods.yaml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/sync-eni-max-pods.yaml b/.github/workflows/sync-eni-max-pods.yaml index 2d8e22ed5..8610a3e27 100644 --- a/.github/workflows/sync-eni-max-pods.yaml +++ b/.github/workflows/sync-eni-max-pods.yaml @@ -34,7 +34,13 @@ jobs: cp misc/eni-max-pods.txt ../amazon-eks-ami/files/eni-max-pods.txt - uses: peter-evans/create-pull-request@v4 with: - commit-message: "Update eni-max-pods.txt" branch: update-eni-max-pods path: amazon-eks-ami/ add-paths: files/eni-max-pods.txt + commit-message: "Update eni-max-pods.txt" + title: "Update eni-max-pods.txt" + body: | + Generated by [aws/amazon-vpc-cni-k8s](https://github.com/aws/amazon-vpc-cni-k8s): + ``` + make generate-limits + ``` From 8b10a76977278e7afec85b51c326d17392565fd6 Mon Sep 17 00:00:00 2001 From: camrakin <113552683+camrakin@users.noreply.github.com> Date: Fri, 17 Feb 2023 09:57:44 -0800 Subject: [PATCH 414/621] Updating CHANGELOG for 1.25 Release (#1188) * Updating CHANGELOG for 1.25 Release --- CHANGELOG.md | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8037f146d..0dc9fe521 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,59 @@ # Changelog +### AMI Release v20230211 +* amazon-eks-gpu-node-1.25-v20230211 +* amazon-eks-gpu-node-1.24-v20230211 +* amazon-eks-gpu-node-1.23-v20230211 +* amazon-eks-gpu-node-1.22-v20230211 +* amazon-eks-gpu-node-1.21-v20230211 +* amazon-eks-arm64-node-1.25-v20230211 +* amazon-eks-arm64-node-1.24-v20230211 +* amazon-eks-arm64-node-1.23-v20230211 +* amazon-eks-arm64-node-1.22-v20230211 +* amazon-eks-arm64-node-1.21-v20230211 +* amazon-eks-node-1.25-v20230211 +* amazon-eks-node-1.24-v20230211 +* amazon-eks-node-1.23-v20230211 +* amazon-eks-node-1.22-v20230211 +* amazon-eks-node-1.21-v20230211 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.25.6-20230211` +* `1.24.10-20230211` +* `1.23.16-20230211` +* `1.22.17-20230211` +* `1.21.14-20230211` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.25.6/2023-01-30/ +* s3://amazon-eks/1.24.10/2023-01-30/ +* s3://amazon-eks/1.23.16/2023-01-30/ +* s3://amazon-eks/1.22.17/2023-01-30/ +* s3://amazon-eks/1.21.14/2023-01-30/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.228-132.418.amzn2 + * Kubernetes 1.24 and above: 5.10.165-143.735.amzn2 +* `dockerd`: 20.10.17-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.6-1.amzn2.0.2 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +- This is the first AMI release for Kubernetes 1.25. +- Kubernetes 1.24+ now use `kernel-5.10` for x86 and ARM AMIs. + - The GPU AMI will continue to use `kernel-5.4` as we work to address a compatibility issue with `nvidia-driver-latest-dkms`. +- The `kernel` package is now version-locked. + +Minor changes: +- Updated `eni-max-pods.txt` with new instance types. +- Allow `kernel_version` to be set to any value (such as `5.15`) when building a custom AMI. +- Fix a misconfiguration in the GPU AMI with `containerd`'s registry certificates. [#1168](https://github.com/awslabs/amazon-eks-ami/issues/1168). + ### AMI Release v20230203 * amazon-eks-gpu-node-1.24-v20230203 * amazon-eks-gpu-node-1.23-v20230203 From 2b75446ce362f7b698750836f9bfbc30e45c8fb0 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 17 Feb 2023 11:18:06 -0800 Subject: [PATCH 415/621] Version lock kernel after removing old kernels (#1191) --- scripts/install-worker.sh | 5 ++++- scripts/upgrade_kernel.sh | 3 --- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 56ae744d1..e9a033b91 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -68,11 +68,14 @@ sudo yum install -y \ socat \ unzip \ wget \ - yum-utils + yum-utils \ + yum-plugin-versionlock # Remove any old kernel versions. `--count=1` here means "only leave 1 kernel version installed" sudo package-cleanup --oldkernels --count=1 -y +sudo yum versionlock kernel-$(uname -r) + # Remove the ec2-net-utils package, if it's installed. This package interferes with the route setup on the instance. if yum list installed | grep ec2-net-utils; then sudo yum remove ec2-net-utils -y -q; fi diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh index 37946283a..67e509caa 100755 --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -24,7 +24,4 @@ sudo grubby \ --update-kernel=ALL \ --args="psi=1" -sudo yum install -y yum-plugin-versionlock -sudo yum versionlock kernel - sudo reboot From 77ca8818683809a6d0bbb51b436bdb5094ff93a5 Mon Sep 17 00:00:00 2001 From: Saurav Agarwalla Date: Fri, 17 Feb 2023 17:12:00 -0400 Subject: [PATCH 416/621] Mark v20230211 as recalled (#1192) * Mark v20230211 as recalled v20230211 is being recalled due to an issue affecting Kernel upgrades. See https://github.com/awslabs/amazon-eks-ami/issues/1193 for more details. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0dc9fe521..78ad94a38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -### AMI Release v20230211 +### [Recalled] AMI Release v20230211 * amazon-eks-gpu-node-1.25-v20230211 * amazon-eks-gpu-node-1.24-v20230211 * amazon-eks-gpu-node-1.23-v20230211 From fdb91d8a77268e8a3e08a803cc68901a5c602432 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Tue, 21 Feb 2023 17:17:15 -0800 Subject: [PATCH 417/621] Update CHANGELOG for new AMI release (#1196) --- CHANGELOG.md | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78ad94a38..6d66222a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,60 @@ # Changelog +### AMI Release v20230217 +* amazon-eks-gpu-node-1.25-v20230217 +* amazon-eks-gpu-node-1.24-v20230217 +* amazon-eks-gpu-node-1.23-v20230217 +* amazon-eks-gpu-node-1.22-v20230217 +* amazon-eks-gpu-node-1.21-v20230217 +* amazon-eks-arm64-node-1.25-v20230217 +* amazon-eks-arm64-node-1.24-v20230217 +* amazon-eks-arm64-node-1.23-v20230217 +* amazon-eks-arm64-node-1.22-v20230217 +* amazon-eks-arm64-node-1.21-v20230217 +* amazon-eks-node-1.25-v20230217 +* amazon-eks-node-1.24-v20230217 +* amazon-eks-node-1.23-v20230217 +* amazon-eks-node-1.22-v20230217 +* amazon-eks-node-1.21-v20230217 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.25.6-20230217` +* `1.24.10-20230217` +* `1.23.16-20230217` +* `1.22.17-20230217` +* `1.21.14-20230217` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.25.6/20230130/ +* s3://amazon-eks/1.24.10/20230130/ +* s3://amazon-eks/1.23.16/20230130/ +* s3://amazon-eks/1.22.17/20230211/ +* s3://amazon-eks/1.21.14/20230130/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.228-132.418.amzn2 + * Kubernetes 1.24 and above: 5.10.165-143.735.amzn2 +* `dockerd`: 20.10.17-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.6-1.amzn2.0.2 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +- Kubernetes 1.24+ now use `kernel-5.10` for x86 and ARM AMIs. + - The GPU AMI will continue to use `kernel-5.4` as we work to address a compatibility issue with `nvidia-driver-latest-dkms`. +- The `kernel` package is now properly version-locked [#1191](https://github.com/awslabs/amazon-eks-ami/pull/1191). + - See [#1193](https://github.com/awslabs/amazon-eks-ami/issues/1193) for more information. +- New AMIs released for kubernetes version 1.25 +- Pressure stall information (PSI) is now enabled [#1161](https://github.com/awslabs/amazon-eks-ami/pull/1161). + +Minor changes: +- Updated `eni-max-pods.txt` with new instance types. +- Allow `kernel_version` to be set to any value (such as `5.15`) when building a custom AMI. + ### [Recalled] AMI Release v20230211 * amazon-eks-gpu-node-1.25-v20230211 * amazon-eks-gpu-node-1.24-v20230211 From f2ade6202e617db49cad28f64817cf5e228e2f44 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 23 Feb 2023 17:02:17 -0800 Subject: [PATCH 418/621] Add `Using the AMI` (#1201) --- README.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index cbb911f78..053535d7c 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,6 @@ custom Amazon EKS AMI with [HashiCorp Packer](https://www.packer.io/). This is the same configuration that Amazon EKS uses to create the official Amazon EKS-optimized AMI. -**Check out the AMI's [user guide](doc/USER_GUIDE.md) for more information.** - ## 🚀 Getting started If you are new to Amazon EKS, we recommend that you follow @@ -39,9 +37,13 @@ To build an Amazon EKS Worker AMI for a particular Kubernetes version run the fo make 1.23 ## Build a Amazon EKS Worker AMI for k8s 1.23 ``` -**Note** -The default instance type to build this AMI does not qualify for the AWS free tier. You are charged for any instances created -when building this AMI. +> **Note** +> The default instance type to build this AMI does not qualify for the AWS free tier. +> You are charged for any instances created when building this AMI. + +## 👩‍💻 Using the AMI + +The [AMI user guide](doc/USER_GUIDE.md) has details about the AMI's internals, and the [EKS user guide](https://docs.aws.amazon.com/eks/latest/userguide/launch-templates.html#launch-template-custom-ami) explains how to use a custom AMI in a managed node group. ## 🔒 Security From 3731ff06d6d7903b60143db20c4ecb907ed941da Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 23 Feb 2023 17:05:20 -0800 Subject: [PATCH 419/621] Add doc on version-locked packages (#1199) --- doc/USER_GUIDE.md | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/doc/USER_GUIDE.md b/doc/USER_GUIDE.md index c565cff4d..4e7291138 100644 --- a/doc/USER_GUIDE.md +++ b/doc/USER_GUIDE.md @@ -1,6 +1,6 @@ # User Guide -This guide will provide more detailed usage information on this repo. +This document includes details about using the AMI template and the resulting AMIs. 1. [AMI template variables](#ami-template-variables) 1. [Building against other versions of Kubernetes binaries](#building-against-other-versions-of-kubernetes-binaries) @@ -10,6 +10,7 @@ This guide will provide more detailed usage information on this repo. 1. [Customizing kubelet config](#customizing-kubelet-config) 1. [AL2 and Linux kernel information](#al2-and-linux-kernel-information) 1. [Updating known instance types](#updating-known-instance-types) +1. [Version-locked packages](#version-locked-packages) --- @@ -283,3 +284,28 @@ $ git diff ``` At this point, you can build an AMI and it will include the updated list of instance types. + +--- + +## Version-locked packages + +Some packages are critical for correct, performant behavior of a Kubernetes node; such as: +- `kernel` +- `containerd` +- `runc` + +> **Note** +> This is not an exhaustive list. The complete list of locked packages is available with `yum versionlock list`. + +As a result, these packages should generally be modified within the bounds of a managed process that gracefully handles failures and prevents disruption to the cluster's workloads. + +To prevent unintentional changes, the [yum-versionlock](https://github.com/rpm-software-management/yum-utils/tree/05db7ef501fc9d6698935bcc039c83c0761c3be2/plugins/versionlock) plugin is used on these packages. + +If you wish to modify a locked package, you can: +``` +# unlock a single package +sudo yum versionlock delete $PACKAGE_NAME + +# unlock all packages +sudo yum versionlock clear +``` From 2fcd227eefd82b622e13847d9130d4c84890dc3c Mon Sep 17 00:00:00 2001 From: Zaid Ahmed Farooq <38226823+zaf6862@users.noreply.github.com> Date: Fri, 24 Feb 2023 13:55:47 -0800 Subject: [PATCH 420/621] Add pause container accounts for ap-southeast-4, il-central-1 (#1204) --- files/get-ecr-uri.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/files/get-ecr-uri.sh b/files/get-ecr-uri.sh index e865ab24d..134dc39cc 100755 --- a/files/get-ecr-uri.sh +++ b/files/get-ecr-uri.sh @@ -54,6 +54,12 @@ else ap-south-2) acct="900889452093" ;; + ap-southeast-4) + acct="491585149902" + ;; + il-central-1) + acct="066635153087" + ;; *) acct="602401143452" ;; From e3dee6345b3c1f83f49ad1bf5aea3f03beb4ef7b Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 1 Mar 2023 19:52:42 -0800 Subject: [PATCH 421/621] Disable eni-max-pods.txt workflow in forks (#1209) --- .github/workflows/sync-eni-max-pods.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/sync-eni-max-pods.yaml b/.github/workflows/sync-eni-max-pods.yaml index 8610a3e27..c9abf4a5f 100644 --- a/.github/workflows/sync-eni-max-pods.yaml +++ b/.github/workflows/sync-eni-max-pods.yaml @@ -10,6 +10,8 @@ permissions: pull-requests: write jobs: update-max-pods: + # this workflow will always fail in forks; bail if this isn't running in the upstream + if: github.repository == awslabs/amazon-eks-ami runs-on: ubuntu-latest steps: - uses: aws-actions/configure-aws-credentials@v1 From c8931377b6502f68462bd3bd38e4ad41b5f20594 Mon Sep 17 00:00:00 2001 From: Carter Date: Sat, 4 Mar 2023 17:02:03 -0800 Subject: [PATCH 422/621] Add quotes to repository name (#1213) --- .github/workflows/sync-eni-max-pods.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sync-eni-max-pods.yaml b/.github/workflows/sync-eni-max-pods.yaml index c9abf4a5f..76f02addf 100644 --- a/.github/workflows/sync-eni-max-pods.yaml +++ b/.github/workflows/sync-eni-max-pods.yaml @@ -11,7 +11,7 @@ permissions: jobs: update-max-pods: # this workflow will always fail in forks; bail if this isn't running in the upstream - if: github.repository == awslabs/amazon-eks-ami + if: github.repository == 'awslabs/amazon-eks-ami' runs-on: ubuntu-latest steps: - uses: aws-actions/configure-aws-credentials@v1 From 951c6ec76624c5959057ea57c8847d633645032f Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 6 Mar 2023 13:33:50 -0800 Subject: [PATCH 423/621] Validate package versionlocks (#1195) --- scripts/validate.sh | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/scripts/validate.sh b/scripts/validate.sh index 01297dfdd..ae329005e 100644 --- a/scripts/validate.sh +++ b/scripts/validate.sh @@ -36,11 +36,40 @@ validate_file_nonexists '/var/log/secure' validate_file_nonexists '/var/log/wtmp' actual_kernel=$(uname -r) -echo "Verifying that kernel version $actual_kernel matches $KERNEL_VERSION" +echo "Verifying that kernel version $actual_kernel matches $KERNEL_VERSION..." if [[ $actual_kernel == $KERNEL_VERSION* ]]; then - echo "Kernel matches expected version" + echo "Kernel matches expected version!" else - echo "Kernel does not match expected version." + echo "Kernel does not match expected version!" exit 1 fi + +echo "Verifying that the package versionlocks are correct..." + +function versionlock-entries() { + # the format of this output is EPOCH:NAME-VERSION-RELEASE.ARCH + # more info in yum-versionlock(1) + # rpm doesn't accept EPOCH when querying the db, so remove it + yum versionlock list --quiet | cut -d ':' -f2 +} + +function versionlock-packages() { + versionlock-entries | xargs -I '{}' rpm --query '{}' --queryformat '%{NAME}\n' +} + +for ENTRY in $(versionlock-entries); do + if ! rpm --query "$ENTRY" &> /dev/null; then + echo "There is no package matching the versionlock entry: '$ENTRY'" + exit 1 + fi +done + +LOCKED_PACKAGES=$(versionlock-packages | wc -l) +UNIQUE_LOCKED_PACKAGES=$(versionlock-packages | sort -u | wc -l) +if [ $LOCKED_PACKAGES -ne $UNIQUE_LOCKED_PACKAGES ]; then + echo "Package(s) have multiple version locks!" + versionlock-entries +fi + +echo "Package versionlocks are correct!" From 17e1e3d7cd47700202daf23dbef337f35161b3b8 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 7 Mar 2023 16:31:22 -0800 Subject: [PATCH 424/621] Remove 1.21 (#1210) --- Makefile | 8 ++------ README.md | 11 ++++++----- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index c76a0dc57..28ab0c3cd 100644 --- a/Makefile +++ b/Makefile @@ -36,8 +36,8 @@ T_GREEN := \e[0;32m T_YELLOW := \e[0;33m T_RESET := \e[0m -.PHONY: all -all: 1.21 1.22 1.23 1.24 1.25 ## Build all versions of EKS Optimized AL2 AMI +.PHONY: latest +latest: 1.25 ## Build EKS Optimized AL2 AMI with the latest supported version of Kubernetes # ensure that these flags are equivalent to the rules in the .editorconfig SHFMT_FLAGS := --list \ @@ -88,10 +88,6 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html -.PHONY: 1.21 -1.21: ## Build EKS Optimized AL2 AMI - K8s 1.21 - $(MAKE) k8s kubernetes_version=1.21.14 kubernetes_build_date=2023-01-30 pull_cni_from_github=true - .PHONY: 1.22 1.22: ## Build EKS Optimized AL2 AMI - K8s 1.22 $(MAKE) k8s kubernetes_version=1.22.17 kubernetes_build_date=2023-01-30 pull_cni_from_github=true diff --git a/README.md b/README.md index 053535d7c..758fb9868 100644 --- a/README.md +++ b/README.md @@ -29,13 +29,14 @@ invoking Packer directly. You can initiate the build process by running the following command in the root of this repository: ```bash +# build an AMI with the latest Kubernetes version make + +# build an AMI with a specific Kubernetes version +make 1.25 ``` -The Makefile chooses a particular kubelet binary to use per kubernetes version which you can [view here](Makefile). -To build an Amazon EKS Worker AMI for a particular Kubernetes version run the following command -```bash -make 1.23 ## Build a Amazon EKS Worker AMI for k8s 1.23 -``` + +The Makefile chooses a particular kubelet binary to use per Kubernetes version which you can [view here](Makefile). > **Note** > The default instance type to build this AMI does not qualify for the AWS free tier. From afea1918a4fd1896d9b4ef718576b55c006ea548 Mon Sep 17 00:00:00 2001 From: Eden Jose Date: Wed, 8 Mar 2023 09:08:56 +0800 Subject: [PATCH 425/621] Add --update to AWS CLI installation (#1205) --- scripts/install-worker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index e9a033b91..aed1c3806 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -130,7 +130,7 @@ if [[ "$BINARY_BUCKET_REGION" != "us-iso-east-1" && "$BINARY_BUCKET_REGION" != " --retry-delay 1 \ -L "https://awscli.amazonaws.com/awscli-exe-linux-${MACHINE}.zip" -o "${AWSCLI_DIR}/awscliv2.zip" unzip -q "${AWSCLI_DIR}/awscliv2.zip" -d ${AWSCLI_DIR} - sudo "${AWSCLI_DIR}/aws/install" --bin-dir /bin/ + sudo "${AWSCLI_DIR}/aws/install" --bin-dir /bin/ --update else echo "Installing awscli package" sudo yum install -y awscli From 1e3c108c6ea8347f674593e1bcae6e7ee1711754 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 8 Mar 2023 11:32:09 -0800 Subject: [PATCH 426/621] Use --cloud-provider=external on 1.26+ (#1203) --- files/bin/imds | 115 ++++++++++++++++------------ files/bin/provider-id | 9 +++ files/bootstrap.sh | 14 +++- files/kubelet-containerd.service | 5 +- files/kubelet.service | 6 +- test/Dockerfile | 2 +- test/cases/cloud-provider-config.sh | 82 ++++++++++++++++++++ test/cases/imds-errors.sh | 32 ++++++++ test/cases/provider-id.sh | 25 ++++++ 9 files changed, 234 insertions(+), 56 deletions(-) create mode 100755 files/bin/provider-id create mode 100755 test/cases/cloud-provider-config.sh create mode 100755 test/cases/imds-errors.sh create mode 100755 test/cases/provider-id.sh diff --git a/files/bin/imds b/files/bin/imds index e9f8e749d..7619ee3fb 100755 --- a/files/bin/imds +++ b/files/bin/imds @@ -4,13 +4,6 @@ set -o errexit set -o pipefail set -o nounset -IMDS_DEBUG="${IMDS_DEBUG:-false}" -function log() { - if [ "$IMDS_DEBUG" = "true" ]; then - echo >&2 "$1" - fi -} - if [ "$#" -ne 1 ]; then echo >&2 "usage: imds API_PATH" exit 1 @@ -21,57 +14,79 @@ API_PATH="${1#/}" CURRENT_TIME=$(date '+%s') -IMDS_ENDPOINT=${IMDS_ENDPOINT:-169.254.169.254} - -log "ℹ️ Talking to IMDS at $IMDS_ENDPOINT" - -TOKEN_DIR=/tmp/imds-tokens -mkdir -p $TOKEN_DIR - -IMDS_RETRIES=${IMDS_RETRIES:-10} -IMDS_RETRY_DELAY_SECONDS=${IMDS_RETRY_DELAY_SECONDS:-1} - +IMDS_DEBUG="${IMDS_DEBUG:-false}" # default ttl is 15 minutes IMDS_TOKEN_TTL_SECONDS=${IMDS_TOKEN_TTL_SECONDS:-900} - # max ttl is 6 hours, see: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-instance-metadata-service.html IMDS_MAX_TOKEN_TTL_SECONDS=${IMDS_MAX_TOKEN_TTL_SECONDS:-21600} +IMDS_RETRIES=${IMDS_RETRIES:-10} +IMDS_RETRY_DELAY_SECONDS=${IMDS_RETRY_DELAY_SECONDS:-1} +IMDS_ENDPOINT=${IMDS_ENDPOINT:-169.254.169.254} -# cleanup expired tokens -DELETED_TOKENS=0 -for TOKEN_FILE in $(ls $TOKEN_DIR | awk '$0 < '$(($CURRENT_TIME - $IMDS_MAX_TOKEN_TTL_SECONDS))); do - rm $TOKEN_DIR/$TOKEN_FILE - DELETED_TOKENS=$(($DELETED_TOKENS + 1)) -done -if [ "$DELETED_TOKENS" -gt 0 ]; then - log "🗑️ Deleted $DELETED_TOKENS expired IMDS token(s)." -fi - -TOKEN_FILE=$(ls $TOKEN_DIR | awk '$0 > '$CURRENT_TIME | sort -n -r | head -n 1) +function log() { + if [ "$IMDS_DEBUG" = "true" ]; then + echo >&2 "$1" + fi +} -if [ "$TOKEN_FILE" = "" ]; then - TOKEN_FILE=$(($CURRENT_TIME + $IMDS_TOKEN_TTL_SECONDS)) - curl \ +function imdscurl() { + local OUTPUT_FILE=$(mktemp) + local CODE=$(curl \ --silent \ --show-error \ + --output $OUTPUT_FILE \ + --write-out "%{http_code}" \ --retry $IMDS_RETRIES \ --retry-delay $IMDS_RETRY_DELAY_SECONDS \ - -o $TOKEN_DIR/$TOKEN_FILE \ - -H "X-aws-ec2-metadata-token-ttl-seconds: $IMDS_TOKEN_TTL_SECONDS" \ - -X PUT \ - "http://$IMDS_ENDPOINT/latest/api/token" - # make sure any user can utilize (and clean up) these tokens - chmod a+rwx $TOKEN_DIR/$TOKEN_FILE - log "🔑 Retrieved a fresh IMDS token that will expire in $IMDS_TOKEN_TTL_SECONDS seconds." -else - log "ℹ️ Using cached IMDS token that expires in $(($TOKEN_FILE - $CURRENT_TIME)) seconds." -fi + "$@" || echo "1") + # CODE will be either the HTTP status code, or 1 if the exit code of `curl` is non-zero + if [[ ${CODE} -lt 200 || ${CODE} -gt 299 ]]; then + cat >&2 $OUTPUT_FILE + return $CODE + fi + printf "$(cat $OUTPUT_FILE)\n" + rm $OUTPUT_FILE +} + +function get-token() { + local TOKEN_DIR=/tmp/imds-tokens + mkdir -p $TOKEN_DIR + + # cleanup expired tokens + local DELETED_TOKENS=0 + for TOKEN_FILE in $(ls $TOKEN_DIR | awk '$0 < '$(($CURRENT_TIME - $IMDS_MAX_TOKEN_TTL_SECONDS))); do + rm $TOKEN_DIR/$TOKEN_FILE + DELETED_TOKENS=$(($DELETED_TOKENS + 1)) + done + if [ "$DELETED_TOKENS" -gt 0 ]; then + log "🗑️ Deleted $DELETED_TOKENS expired IMDS token(s)." + fi + + local TOKEN_FILE=$(ls $TOKEN_DIR | awk '$0 > '$CURRENT_TIME | sort -n -r | head -n 1) + + if [ "$TOKEN_FILE" = "" ]; then + TOKEN_FILE=$(($CURRENT_TIME + $IMDS_TOKEN_TTL_SECONDS)) + local TOKEN=$(imdscurl \ + -H "X-aws-ec2-metadata-token-ttl-seconds: $IMDS_TOKEN_TTL_SECONDS" \ + -X PUT \ + "http://$IMDS_ENDPOINT/latest/api/token") + echo "$TOKEN" > "$TOKEN_DIR/$TOKEN_FILE" + # make sure any user can utilize (and clean up) these tokens + chmod a+rwx $TOKEN_DIR/$TOKEN_FILE + log "🔑 Retrieved a fresh IMDS token that will expire in $IMDS_TOKEN_TTL_SECONDS seconds." + else + log "ℹ️ Using cached IMDS token that expires in $(($TOKEN_FILE - $CURRENT_TIME)) seconds." + fi + cat "$TOKEN_DIR/$TOKEN_FILE" +} + +function get-with-token() { + local API_PATH="$1" + imdscurl \ + -H "X-aws-ec2-metadata-token: $(get-token)" \ + "http://$IMDS_ENDPOINT/$API_PATH" +} + +log "ℹ️ Talking to IMDS at $IMDS_ENDPOINT" -curl \ - --silent \ - --show-error \ - --retry $IMDS_RETRIES \ - --retry-delay $IMDS_RETRY_DELAY_SECONDS \ - --write-out '\n' \ - -H "X-aws-ec2-metadata-token: $(cat $TOKEN_DIR/$TOKEN_FILE)" \ - "http://$IMDS_ENDPOINT/$API_PATH" +get-with-token "$API_PATH" diff --git a/files/bin/provider-id b/files/bin/provider-id new file mode 100755 index 000000000..7cced7f3a --- /dev/null +++ b/files/bin/provider-id @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +set -o errexit +set -o nounset + +AVAILABILITY_ZONE=$(imds '/latest/meta-data/placement/availability-zone') +INSTANCE_ID=$(imds '/latest/meta-data/instance-id') + +echo "aws:///$AVAILABILITY_ZONE/$INSTANCE_ID" diff --git a/files/bootstrap.sh b/files/bootstrap.sh index f24595cc6..e817e7731 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -455,11 +455,23 @@ if [[ "$USE_MAX_PODS" = "true" ]]; then echo "$(jq ".maxPods=$MAX_PODS" $KUBELET_CONFIG)" > $KUBELET_CONFIG fi +KUBELET_ARGS="--node-ip=$INTERNAL_IP --pod-infra-container-image=$PAUSE_CONTAINER --v=2" + +if vercmp "$KUBELET_VERSION" lt "1.26.0"; then + # TODO: remove this when 1.25 is EOL + KUBELET_CLOUD_PROVIDER="aws" +else + KUBELET_CLOUD_PROVIDER="external" + echo "$(jq ".providerID=\"$(provider-id)\"" $KUBELET_CONFIG)" > $KUBELET_CONFIG +fi + +KUBELET_ARGS="$KUBELET_ARGS --cloud-provider=$KUBELET_CLOUD_PROVIDER" + mkdir -p /etc/systemd/system/kubelet.service.d cat << EOF > /etc/systemd/system/kubelet.service.d/10-kubelet-args.conf [Service] -Environment='KUBELET_ARGS=--node-ip=$INTERNAL_IP --pod-infra-container-image=$PAUSE_CONTAINER --v=2' +Environment='KUBELET_ARGS=$KUBELET_ARGS' EOF if [[ -n "$KUBELET_EXTRA_ARGS" ]]; then diff --git a/files/kubelet-containerd.service b/files/kubelet-containerd.service index d043b30b9..d5789530e 100644 --- a/files/kubelet-containerd.service +++ b/files/kubelet-containerd.service @@ -7,12 +7,13 @@ Requires=containerd.service sandbox-image.service [Service] Slice=runtime.slice ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 -ExecStart=/usr/bin/kubelet --cloud-provider aws \ +ExecStart=/usr/bin/kubelet \ --config /etc/kubernetes/kubelet/kubelet-config.json \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime remote \ --container-runtime-endpoint unix:///run/containerd/containerd.sock \ - $KUBELET_ARGS $KUBELET_EXTRA_ARGS + $KUBELET_ARGS \ + $KUBELET_EXTRA_ARGS Restart=on-failure RestartForceExitStatus=SIGPIPE diff --git a/files/kubelet.service b/files/kubelet.service index 387470da1..a4aa5a890 100644 --- a/files/kubelet.service +++ b/files/kubelet.service @@ -6,11 +6,13 @@ Requires=docker.service [Service] ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 -ExecStart=/usr/bin/kubelet --cloud-provider aws \ +ExecStart=/usr/bin/kubelet \ --config /etc/kubernetes/kubelet/kubelet-config.json \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime docker \ - --network-plugin cni $KUBELET_ARGS $KUBELET_EXTRA_ARGS + --network-plugin cni \ + $KUBELET_ARGS \ + $KUBELET_EXTRA_ARGS Restart=always RestartSec=5 diff --git a/test/Dockerfile b/test/Dockerfile index 9aaa44905..bab93ee84 100644 --- a/test/Dockerfile +++ b/test/Dockerfile @@ -1,7 +1,7 @@ FROM public.ecr.aws/aws-ec2/amazon-ec2-metadata-mock:v1.11.2 as aemm FROM public.ecr.aws/amazonlinux/amazonlinux:2 RUN amazon-linux-extras enable docker && \ - yum install -y jq containerd wget && \ + yum install -y jq containerd wget which && \ wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 && \ chmod a+x /usr/local/bin/yq diff --git a/test/cases/cloud-provider-config.sh b/test/cases/cloud-provider-config.sh new file mode 100755 index 000000000..f21cd93cb --- /dev/null +++ b/test/cases/cloud-provider-config.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash + +set -o errexit +set -o pipefail +set -o nounset + +KUBELET_UNIT_DIR="/etc/systemd/system/kubelet.service.d" +KUBELET_CONFIG_FILE="/etc/kubernetes/kubelet/kubelet-config.json" + +function fail() { + echo "❌ Test Failed:" "$@" + echo "Kubelet systemd units:" + find $KUBELET_UNIT_DIR -type f | xargs cat + echo "Kubelet config file:" + cat $KUBELET_CONFIG_FILE | jq '.' + exit 1 +} + +EXPECTED_PROVIDER_ID=$(provider-id) + +echo "--> Should use in-tree cloud provider below k8s version 1.26" +# This variable is used to override the default value in the kubelet mock +export KUBELET_VERSION=v1.25.5-eks-ba74326 +EXIT_CODE=0 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + fail "expected a zero exit code but got '${EXIT_CODE}'" +fi +EXIT_CODE=0 +grep -RFq -e "--cloud-provider=aws" $KUBELET_UNIT_DIR || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + fail "expected --cloud-provider=aws to be present in kubelet's systemd units" +fi +ACTUAL_PROVIDER_ID=$(jq -r '.providerID' $KUBELET_CONFIG_FILE) +if [ ! "$ACTUAL_PROVIDER_ID" = "null" ]; then + fail "expected .providerID to be absent in kubelet's config file but was '$ACTUAL_PROVIDER_ID'" +fi + +echo "--> Should use external cloud provider at k8s version 1.26" +# at 1.26 +export KUBELET_VERSION=v1.26.5-eks-ba74326 +EXIT_CODE=0 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + fail "expected a zero exit code but got '${EXIT_CODE}'" +fi +EXIT_CODE=0 +grep -RFq -e "--cloud-provider=external" $KUBELET_UNIT_DIR || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + fail "expected --cloud-provider=external to be present in kubelet's systemd units" +fi +ACTUAL_PROVIDER_ID=$(jq -r '.providerID' $KUBELET_CONFIG_FILE) +if [ ! "$ACTUAL_PROVIDER_ID" = "$EXPECTED_PROVIDER_ID" ]; then + fail "expected .providerID=$EXPECTED_PROVIDER_ID to be present in kubelet's config file but was '$ACTUAL_PROVIDER_ID'" +fi + +echo "--> Should use external cloud provider above k8s version 1.26" +# above 1.26 +export KUBELET_VERSION=v1.27.0-eks-ba74326 +EXIT_CODE=0 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + fail "expected a zero exit code but got '${EXIT_CODE}'" +fi +EXIT_CODE=0 +grep -RFq -e "--cloud-provider=external" $KUBELET_UNIT_DIR || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + fail "expected --cloud-provider=external to be present in kubelet's systemd units" +fi +ACTUAL_PROVIDER_ID=$(jq -r '.providerID' $KUBELET_CONFIG_FILE) +if [ ! "$ACTUAL_PROVIDER_ID" = "$EXPECTED_PROVIDER_ID" ]; then + fail "expected .providerID=$EXPECTED_PROVIDER_ID to be present in kubelet's config file but was '$ACTUAL_PROVIDER_ID" +fi diff --git a/test/cases/imds-errors.sh b/test/cases/imds-errors.sh new file mode 100755 index 000000000..865ca8dc7 --- /dev/null +++ b/test/cases/imds-errors.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +set -o nounset +set -o errexit +set -o pipefail + +export IMDS_DEBUG=true + +echo "--> Should succeed for known API" +EXIT_CODE=0 +imds /latest/meta-data/instance-id || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got: $EXIT_CODE" + exit 1 +fi + +echo "--> Should fail for unknown API" +EXIT_CODE=0 +imds /foo || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code" + exit 1 +fi + +echo "--> Should fail for invalid endpoint" +EXIT_CODE=0 +export IMDS_ENDPOINT="127.0.0.0:1234" +imds /latest/meta-data/instance-id || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code" + exit 1 +fi diff --git a/test/cases/provider-id.sh b/test/cases/provider-id.sh new file mode 100755 index 000000000..8707a41ea --- /dev/null +++ b/test/cases/provider-id.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +set -o nounset +set -o errexit +set -o pipefail + +echo "--> Should fetch imds details correctly" +EXPECTED_INSTANCE_ID="i-1234567890abcdef0" +EXPECTED_AVAILABILITY_ZONE="us-east-1a" +EXPECTED_PROVIDER_ID="aws:///$EXPECTED_AVAILABILITY_ZONE/$EXPECTED_INSTANCE_ID" +PROVIDER_ID=$(provider-id) +if [ ! "$PROVIDER_ID" = "$EXPECTED_PROVIDER_ID" ]; then + echo "❌ Test Failed: expected provider-id=$EXPECTED_PROVIDER_ID but got '${PROVIDER_ID}'" + exit 1 +fi + +echo "--> Should fail when imds is unreachable" +echo '#!/usr/bin/sh +exit 1' > $(which imds) +EXIT_CODE=0 +provider-id || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code" + exit 1 +fi From 1f8ec9f4bb08e598db8c56f772e620481e62fd78 Mon Sep 17 00:00:00 2001 From: Sichaow Date: Thu, 9 Mar 2023 17:14:50 -0800 Subject: [PATCH 427/621] update CHANGELOG.md for v20230304 release (#1218) --- CHANGELOG.md | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d66222a9..0639a2883 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,55 @@ # Changelog +### AMI Release v20230304 +* amazon-eks-gpu-node-1.25-v20230304 +* amazon-eks-gpu-node-1.24-v20230304 +* amazon-eks-gpu-node-1.23-v20230304 +* amazon-eks-gpu-node-1.22-v20230304 +* amazon-eks-gpu-node-1.21-v20230304 +* amazon-eks-arm64-node-1.25-v20230304 +* amazon-eks-arm64-node-1.24-v20230304 +* amazon-eks-arm64-node-1.23-v20230304 +* amazon-eks-arm64-node-1.22-v20230304 +* amazon-eks-arm64-node-1.21-v20230304 +* amazon-eks-node-1.25-v20230304 +* amazon-eks-node-1.24-v20230304 +* amazon-eks-node-1.23-v20230304 +* amazon-eks-node-1.22-v20230304 +* amazon-eks-node-1.21-v20230304 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.25.6-20230304` +* `1.24.10-20230304` +* `1.23.16-20230304` +* `1.22.17-20230304` +* `1.21.14-20230304` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.25.6/20230130/ +* s3://amazon-eks/1.24.10/20230130/ +* s3://amazon-eks/1.23.16/20230130/ +* s3://amazon-eks/1.22.17/20230130/ +* s3://amazon-eks/1.21.14/20230130/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.231-137.341.amzn2 + * Kubernetes 1.24 and above: 5.10.167-147.601.amzn2 +* `dockerd`: 20.10.17-1.amzn2.0.1 + * **Note** that with Kubernetes 1.25+, Docker is only installed on GPU AMI's. This is subject to change as we remove unnecessary dependencies, and we recommend completing the migration to `containerd` immediately. +* `containerd`: 1.6.6-1.amzn2.0.2 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +- This is the last AMI release for Kubernetes 1.21 +- This is the first AMI release available in `ap-southeast-4` + +Minor changes: +- Adds a user guide section about packages in the versionlock file. [(#1199)](https://github.com/awslabs/amazon-eks-ami/pull/1199) + ### AMI Release v20230217 * amazon-eks-gpu-node-1.25-v20230217 * amazon-eks-gpu-node-1.24-v20230217 From e989803c4887bc4cd4d6b083ffe4628c2a640018 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 21 Mar 2023 11:57:11 -0700 Subject: [PATCH 428/621] Update eni-max-pods.txt (#1194) --- files/eni-max-pods.txt | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index d7aa1db4c..c914d80cf 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,8 +11,6 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2023-02-07T22:38:44Z -# # The regions queried were: # - ap-northeast-1 # - ap-northeast-2 @@ -46,6 +44,7 @@ a1.large 29 a1.medium 8 a1.metal 234 a1.xlarge 58 +bmn-sf1.metal 737 c1.medium 12 c1.xlarge 58 c3.2xlarge 58 @@ -176,7 +175,6 @@ c7g.large 29 c7g.medium 8 c7g.metal 737 c7g.xlarge 58 -cc2.8xlarge 234 cr1.8xlarge 234 d2.2xlarge 58 d2.4xlarge 234 @@ -643,12 +641,6 @@ x1e.32xlarge 234 x1e.4xlarge 58 x1e.8xlarge 58 x1e.xlarge 29 -x2ezn.12xlarge 737 -x2ezn.2xlarge 58 -x2ezn.4xlarge 234 -x2ezn.6xlarge 234 -x2ezn.8xlarge 234 -x2ezn.metal 737 x2gd.12xlarge 234 x2gd.16xlarge 737 x2gd.2xlarge 58 From 93b61ac4c940b0fda6dc272f8f7b6a48d3d9ee18 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 21 Mar 2023 18:56:06 -0700 Subject: [PATCH 429/621] Update eni-max-pods.txt (#1226) --- files/eni-max-pods.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index c914d80cf..cde3c610e 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -165,6 +165,7 @@ c6in.32xlarge 345 c6in.4xlarge 234 c6in.8xlarge 234 c6in.large 29 +c6in.metal 345 c6in.xlarge 58 c7g.12xlarge 234 c7g.16xlarge 737 @@ -409,6 +410,7 @@ m6idn.32xlarge 345 m6idn.4xlarge 234 m6idn.8xlarge 234 m6idn.large 29 +m6idn.metal 345 m6idn.xlarge 58 m6in.12xlarge 234 m6in.16xlarge 737 @@ -418,6 +420,7 @@ m6in.32xlarge 345 m6in.4xlarge 234 m6in.8xlarge 234 m6in.large 29 +m6in.metal 345 m6in.xlarge 58 m7g.12xlarge 234 m7g.16xlarge 737 @@ -568,6 +571,7 @@ r6idn.32xlarge 345 r6idn.4xlarge 234 r6idn.8xlarge 234 r6idn.large 29 +r6idn.metal 345 r6idn.xlarge 58 r6in.12xlarge 234 r6in.16xlarge 737 @@ -577,6 +581,7 @@ r6in.32xlarge 345 r6in.4xlarge 234 r6in.8xlarge 234 r6in.large 29 +r6in.metal 345 r6in.xlarge 58 r7g.12xlarge 234 r7g.16xlarge 737 From 1d17dd0f5c755963cc8944f00c20ab37ced1cc05 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 22 Mar 2023 14:04:57 -0700 Subject: [PATCH 430/621] Don't pass URL to --content (#1227) --- log-collector-script/linux/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/log-collector-script/linux/README.md b/log-collector-script/linux/README.md index efa100fa5..69bc088b3 100644 --- a/log-collector-script/linux/README.md +++ b/log-collector-script/linux/README.md @@ -102,10 +102,11 @@ Trying to archive gathered information... 1. Create the SSM document named "EKSLogCollector" using the following commands: ``` +curl -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-ssm-content.json aws ssm create-document \ --name "EKSLogCollectorLinux" \ --document-type "Command" \ - --content https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-ssm-content.json + --content file://eks-ssm-content.json ``` 2. To execute the bash script in the SSM document and to collect the logs from worker, run the following command: From ec01efcedbc4ff8ab0390eacb6f75db90675ea65 Mon Sep 17 00:00:00 2001 From: jacobwolfaws <113703057+jacobwolfaws@users.noreply.github.com> Date: Mon, 27 Mar 2023 16:25:40 -0400 Subject: [PATCH 431/621] Collect logs for fsx & file-cache (#1232) --- log-collector-script/linux/eks-log-collector.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 50f759461..351c3f103 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -20,7 +20,7 @@ export LANG="C" export LC_ALL="C" # Global options -readonly PROGRAM_VERSION="0.7.3" +readonly PROGRAM_VERSION="0.7.4" readonly PROGRAM_SOURCE="https://github.com/awslabs/amazon-eks-ami/blob/master/log-collector-script/" readonly PROGRAM_NAME="$(basename "$0" .sh)" readonly PROGRAM_DIR="/opt/log-collector" @@ -352,6 +352,8 @@ get_common_logs() { cp --force --dereference --recursive /var/log/containers/kube-proxy* "${COLLECT_DIR}"/var_log/ 2> /dev/null cp --force --dereference --recursive /var/log/containers/ebs-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null cp --force --dereference --recursive /var/log/containers/efs-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/fsx-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/file-cache-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null continue fi if [[ "${entry}" == "pods" ]]; then From 1dde199bb06f8f68c563ac96f8e3a630e0728b28 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 28 Mar 2023 09:56:52 -0700 Subject: [PATCH 432/621] Update binaries to 2023-03-17 (#1233) --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 28ab0c3cd..27db56a86 100644 --- a/Makefile +++ b/Makefile @@ -90,19 +90,19 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI .PHONY: 1.22 1.22: ## Build EKS Optimized AL2 AMI - K8s 1.22 - $(MAKE) k8s kubernetes_version=1.22.17 kubernetes_build_date=2023-01-30 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.22.17 kubernetes_build_date=2023-03-17 pull_cni_from_github=true .PHONY: 1.23 1.23: ## Build EKS Optimized AL2 AMI - K8s 1.23 - $(MAKE) k8s kubernetes_version=1.23.16 kubernetes_build_date=2023-01-30 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.23.17 kubernetes_build_date=2023-03-17 pull_cni_from_github=true .PHONY: 1.24 1.24: ## Build EKS Optimized AL2 AMI - K8s 1.24 - $(MAKE) k8s kubernetes_version=1.24.10 kubernetes_build_date=2023-01-30 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.24.11 kubernetes_build_date=2023-03-17 pull_cni_from_github=true .PHONY: 1.25 1.25: ## Build EKS Optimized AL2 AMI - K8s 1.25 - $(MAKE) k8s kubernetes_version=1.25.6 kubernetes_build_date=2023-01-30 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.25.7 kubernetes_build_date=2023-03-17 pull_cni_from_github=true .PHONY: help help: ## Display help From e535e78282f603b4f968154f212c7caff0cf1c9a Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 28 Mar 2023 14:16:38 -0700 Subject: [PATCH 433/621] Add clean target to Makefile (#1236) --- Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Makefile b/Makefile index 27db56a86..3a287c94a 100644 --- a/Makefile +++ b/Makefile @@ -104,6 +104,11 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI 1.25: ## Build EKS Optimized AL2 AMI - K8s 1.25 $(MAKE) k8s kubernetes_version=1.25.7 kubernetes_build_date=2023-03-17 pull_cni_from_github=true +.PHONY: clean +clean: + rm *-manifest.json + rm *-version-info.json + .PHONY: help help: ## Display help @awk 'BEGIN {FS = ":.*##"; printf "Usage:\n make \033[36m\033[0m\n"} /^[\.a-zA-Z_0-9\-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) From 4376066eb037b1557e20177d726a8eeafa8ee421 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 29 Mar 2023 14:10:49 -0700 Subject: [PATCH 434/621] Mount eBPF filesystem by default on 1.27+ (#1223) --- files/bin/mount-bpf-fs | 50 ++++++++++++++++++++ files/bootstrap.sh | 35 ++++++++++---- test/cases/mount-bpf-fs.sh | 96 ++++++++++++++++++++++++++++++++++++++ test/mocks/aws | 2 + test/mocks/iptables-save | 3 +- test/mocks/kubelet | 5 +- test/mocks/mount | 23 +++++++++ test/mocks/sudo | 2 +- test/mocks/systemctl | 3 +- 9 files changed, 202 insertions(+), 17 deletions(-) create mode 100755 files/bin/mount-bpf-fs create mode 100755 test/cases/mount-bpf-fs.sh create mode 100755 test/mocks/mount diff --git a/files/bin/mount-bpf-fs b/files/bin/mount-bpf-fs new file mode 100755 index 000000000..df5767e99 --- /dev/null +++ b/files/bin/mount-bpf-fs @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +set -o errexit +set -o nounset + +SYSTEMD_UNIT_DIR="/etc/systemd/system" +SYSTEMD_UNIT="sys-fs-bpf.mount" +SYSTEMD_UNIT_PATH="$SYSTEMD_UNIT_DIR/$SYSTEMD_UNIT" +MOUNT_POINT="/sys/fs/bpf" +FS_TYPE="bpf" + +MOUNT_BPF_FS_DEBUG=${MOUNT_BPF_FS_DEBUG:-false} +function debug() { + if [ "$MOUNT_BPF_FS_DEBUG" = "true" ]; then + echo >&2 "DEBUG:" "$@" + fi +} + +if [ $(mount --types "$FS_TYPE" | wc -l) -gt 0 ]; then + debug "$FS_TYPE filesystem already mounted!" + exit 0 +elif mount | awk '{print $3}' | grep "$MOUNT_POINT"; then + debug "mount point at $MOUNT_POINT already exists!" + exit 0 +elif [ -f "$SYSTEMD_UNIT_PATH" ]; then + debug "systemd unit at $SYSTEMD_UNIT_PATH already exists!" + exit 0 +fi + +mkdir -p "$SYSTEMD_UNIT_DIR" +cat > "$SYSTEMD_UNIT_PATH" << EOL +[Unit] +Description=BPF mounts +Documentation=https://docs.kernel.org/bpf/index.html +DefaultDependencies=no +Before=local-fs.target umount.target +After=swap.target + +[Mount] +What=bpffs +Where=$MOUNT_POINT +Type=bpf +Options=rw,nosuid,nodev,noexec,relatime,mode=700 + +[Install] +WantedBy=multi-user.target +EOL + +systemctl enable "$SYSTEMD_UNIT" +systemctl start "$SYSTEMD_UNIT" diff --git a/files/bootstrap.sh b/files/bootstrap.sh index e817e7731..0d17f1141 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -16,22 +16,24 @@ function print_help { echo "Bootstraps an instance into an EKS cluster" echo "" echo "-h,--help print this help" - echo "--use-max-pods Sets --max-pods for the kubelet when true. (default: true)" - echo "--b64-cluster-ca The base64 encoded cluster CA content. Only valid when used with --apiserver-endpoint. Bypasses calling \"aws eks describe-cluster\"" + echo echo "--apiserver-endpoint The EKS cluster API Server endpoint. Only valid when used with --b64-cluster-ca. Bypasses calling \"aws eks describe-cluster\"" - echo "--kubelet-extra-args Extra arguments to add to the kubelet. Useful for adding labels or taints." - echo "--enable-docker-bridge Restores the docker default bridge network. (default: false)" echo "--aws-api-retry-attempts Number of retry attempts for AWS API call (DescribeCluster) (default: 3)" - echo "--docker-config-json The contents of the /etc/docker/daemon.json file. Useful if you want a custom config differing from the default one in the AMI" + echo "--b64-cluster-ca The base64 encoded cluster CA content. Only valid when used with --apiserver-endpoint. Bypasses calling \"aws eks describe-cluster\"" + echo "--cluster-id Specify the id of EKS cluster" + echo "--container-runtime Specify a container runtime (default: dockerd)" echo "--containerd-config-file File containing the containerd configuration to be used in place of AMI defaults." echo "--dns-cluster-ip Overrides the IP address to use for DNS queries within the cluster. Defaults to 10.100.0.10 or 172.20.0.10 based on the IP address of the primary interface" + echo "--docker-config-json The contents of the /etc/docker/daemon.json file. Useful if you want a custom config differing from the default one in the AMI" + echo "--enable-docker-bridge Restores the docker default bridge network. (default: false)" + echo "--enable-local-outpost Enable support for worker nodes to communicate with the local control plane when running on a disconnected Outpost. (true or false)" + echo "--ip-family Specify ip family of the cluster" + echo "--kubelet-extra-args Extra arguments to add to the kubelet. Useful for adding labels or taints." + echo "--mount-bfs-fs Mount a bpffs at /sys/fs/bpf (default: true, for Kubernetes 1.27+; false otherwise)" echo "--pause-container-account The AWS account (number) to pull the pause container from" echo "--pause-container-version The tag of the pause container" - echo "--container-runtime Specify a container runtime (default: dockerd)" - echo "--ip-family Specify ip family of the cluster" echo "--service-ipv6-cidr ipv6 cidr range of the cluster" - echo "--enable-local-outpost Enable support for worker nodes to communicate with the local control plane when running on a disconnected Outpost. (true or false)" - echo "--cluster-id Specify the id of EKS cluster" + echo "--use-max-pods Sets --max-pods for the kubelet when true. (default: true)" } POSITIONAL=() @@ -123,6 +125,11 @@ while [[ $# -gt 0 ]]; do shift shift ;; + --mount-bpf-fs) + MOUNT_BPF_FS=$2 + shift + shift + ;; *) # unknown option POSITIONAL+=("$1") # save it in an array for later shift # past argument @@ -178,6 +185,12 @@ SERVICE_IPV6_CIDR="${SERVICE_IPV6_CIDR:-}" ENABLE_LOCAL_OUTPOST="${ENABLE_LOCAL_OUTPOST:-}" CLUSTER_ID="${CLUSTER_ID:-}" +DEFAULT_MOUNT_BPF_FS="true" +if vercmp "$KUBELET_VERSION" lt "1.27.0"; then + DEFAULT_MOUNT_BPF_FS="false" +fi +MOUNT_BPF_FS="${MOUNT_BPF_FS:-$DEFAULT_MOUNT_BPF_FS}" + # Helper function which calculates the amount of the given resource (either CPU or memory) # to reserve in a given resource range, specified by a start and end of the range and a percentage # of the resource to reserve. Note that we return zero if the start of the resource range is @@ -269,6 +282,10 @@ if [[ "$MACHINE" != "x86_64" && "$MACHINE" != "aarch64" ]]; then exit 1 fi +if [ "$MOUNT_BPF_FS" = "true" ]; then + sudo mount-bpf-fs +fi + ECR_URI=$(/etc/eks/get-ecr-uri.sh "${AWS_DEFAULT_REGION}" "${AWS_SERVICES_DOMAIN}" "${PAUSE_CONTAINER_ACCOUNT:-}") PAUSE_CONTAINER_IMAGE=${PAUSE_CONTAINER_IMAGE:-$ECR_URI/eks/pause} PAUSE_CONTAINER="$PAUSE_CONTAINER_IMAGE:$PAUSE_CONTAINER_VERSION" diff --git a/test/cases/mount-bpf-fs.sh b/test/cases/mount-bpf-fs.sh new file mode 100755 index 000000000..e8ef5da99 --- /dev/null +++ b/test/cases/mount-bpf-fs.sh @@ -0,0 +1,96 @@ +#!/usr/bin/env bash + +set -o nounset +set -o errexit +set -o pipefail + +export MOUNT_BPF_FS_DEBUG=true + +echo "--> Should succeed if bpf type fs already exists" +function mount() { + echo "none on /foo/bar type bpf (rw,nosuid,nodev,noexec,relatime,mode=700)" +} +export -f mount +EXIT_CODE=0 +mount-bpf-fs || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got: $EXIT_CODE" + exit 1 +fi +export -nf mount + +echo "--> Should succeed if mount point already exists" +function mount() { + echo "none on /sys/fs/bpf type foo (rw,nosuid,nodev,noexec,relatime,mode=700)" +} +export -f mount +EXIT_CODE=0 +mount-bpf-fs || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got: $EXIT_CODE" + exit 1 +fi +export -nf mount + +echo "--> Should succeed if systemd unit already exists" +function mount() { + echo "foo" +} +export -f mount +SYSTEMD_UNIT=/etc/systemd/system/sys-fs-bpf.mount +mkdir -p $(dirname $SYSTEMD_UNIT) +echo "foo" > $SYSTEMD_UNIT +EXIT_CODE=0 +mount-bpf-fs || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got: $EXIT_CODE" + exit 1 +fi +export -nf mount +rm $SYSTEMD_UNIT + +echo "--> Should default to true on 1.27+" +export KUBELET_VERSION=v1.27.0-eks-ba74326 +MOUNT_BPF_FS_MOCK=$(mktemp) +function mount-bpf-fs() { + echo "called" >> $MOUNT_BPF_FS_MOCK +} +export MOUNT_BPF_FS_MOCK +export -f mount-bpf-fs +EXIT_CODE=0 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +if [ "$(cat $MOUNT_BPF_FS_MOCK)" = "called" ]; then + echo "❌ Test Failed: expected mount-bpf-fs to be called once but it was not!" + exit 1 +fi +export -nf mount-bpf-fs + +echo "--> Should default to false on 1.26-" +export KUBELET_VERSION=v1.26.0-eks-ba74326 +MOUNT_BPF_FS_MOCK=$(mktemp) +function mount-bpf-fs() { + echo "called" >> $MOUNT_BPF_FS_MOCK +} +export MOUNT_BPF_FS_MOCK +export -f mount-bpf-fs +EXIT_CODE=0 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +if [ "$(cat $MOUNT_BPF_FS_MOCK)" = "called" ]; then + echo "❌ Test Failed: expected mount-bpf-fs to not be called but it was!" + exit 1 +fi +export -nf mount-bpf-fs diff --git a/test/mocks/aws b/test/mocks/aws index b752fb6e3..da5f00b50 100755 --- a/test/mocks/aws +++ b/test/mocks/aws @@ -5,6 +5,8 @@ SCRIPTPATH="$( pwd -P )" +echo >&2 "mocking 'aws $@'" + if [[ $1 == "ec2" ]]; then if [[ $2 == "describe-instance-types" ]]; then diff --git a/test/mocks/iptables-save b/test/mocks/iptables-save index b4d037268..6c02baf7b 100755 --- a/test/mocks/iptables-save +++ b/test/mocks/iptables-save @@ -1,4 +1,3 @@ #!/usr/bin/env bash -set -euo pipefail -echo "mocking iptables-save with params $@" +echo >&2 "mocking 'iptables-save $@'" diff --git a/test/mocks/kubelet b/test/mocks/kubelet index b3fed23c1..3c7c5dc91 100755 --- a/test/mocks/kubelet +++ b/test/mocks/kubelet @@ -1,11 +1,10 @@ #!/usr/bin/env bash -set -euo pipefail + +echo >&2 "mocking 'kubelet $@'" # The only use of kubelet directly is to get the Kubernetes version, # so we'll set a default here to avoid test failures, and you can # override by setting the KUBELET_VERSION environment variable. if [ $# == 1 ] && [ $1 == "--version" ]; then echo "Kubernetes ${KUBELET_VERSION:-v1.23.9-eks-ba74326}" -else - echo "mocking kubelet with params $@" fi diff --git a/test/mocks/mount b/test/mocks/mount new file mode 100755 index 000000000..7a9170b84 --- /dev/null +++ b/test/mocks/mount @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +echo >&2 "mocking 'mount $@'" + +echo 'sysfs on /sys type sysfs (rw,nosuid,nodev,noexec,relatime) +proc on /proc type proc (rw,nosuid,nodev,noexec,relatime) +devtmpfs on /dev type devtmpfs (rw,nosuid,size=4059512k,nr_inodes=1014878,mode=755) +securityfs on /sys/kernel/security type securityfs (rw,nosuid,nodev,noexec,relatime) +tmpfs on /run type tmpfs (rw,nosuid,nodev,mode=755) +tmpfs on /sys/fs/cgroup type tmpfs (ro,nosuid,nodev,noexec,mode=755) +cgroup on /sys/fs/cgroup/systemd type cgroup (rw,nosuid,nodev,noexec,relatime,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd) +pstore on /sys/fs/pstore type pstore (rw,nosuid,nodev,noexec,relatime) +cgroup on /sys/fs/cgroup/blkio type cgroup (rw,nosuid,nodev,noexec,relatime,blkio) +cgroup on /sys/fs/cgroup/freezer type cgroup (rw,nosuid,nodev,noexec,relatime,freezer) +cgroup on /sys/fs/cgroup/perf_event type cgroup (rw,nosuid,nodev,noexec,relatime,perf_event) +cgroup on /sys/fs/cgroup/hugetlb type cgroup (rw,nosuid,nodev,noexec,relatime,hugetlb) +cgroup on /sys/fs/cgroup/cpuset type cgroup (rw,nosuid,nodev,noexec,relatime,cpuset) +cgroup on /sys/fs/cgroup/net_cls,net_prio type cgroup (rw,nosuid,nodev,noexec,relatime,net_cls,net_prio) +cgroup on /sys/fs/cgroup/cpu,cpuacct type cgroup (rw,nosuid,nodev,noexec,relatime,cpu,cpuacct) +cgroup on /sys/fs/cgroup/memory type cgroup (rw,nosuid,nodev,noexec,relatime,memory) +cgroup on /sys/fs/cgroup/pids type cgroup (rw,nosuid,nodev,noexec,relatime,pids) +cgroup on /sys/fs/cgroup/devices type cgroup (rw,nosuid,nodev,noexec,relatime,devices) +/dev/xvda1 on / type xfs (rw,noatime,attr2,inode64,logbufs=8,logbsize=32k,noquota)' diff --git a/test/mocks/sudo b/test/mocks/sudo index 7bc3d2ffb..7b76a82a8 100755 --- a/test/mocks/sudo +++ b/test/mocks/sudo @@ -1,4 +1,4 @@ #!/usr/bin/env bash -set -euo pipefail +echo >&2 "mocking 'sudo $@'" exec "$@" diff --git a/test/mocks/systemctl b/test/mocks/systemctl index 5efdd955e..3e74c7e2e 100755 --- a/test/mocks/systemctl +++ b/test/mocks/systemctl @@ -1,4 +1,3 @@ #!/usr/bin/env bash -set -euo pipefail -echo "mocking systemctl with $@" +echo >&2 "mocking 'systemctl $@'" From 0e5fa8775b126c8526ab39f8f92a040d4f9373df Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 29 Mar 2023 17:09:50 -0700 Subject: [PATCH 435/621] AMI Release v20230322 (#1238) --- CHANGELOG.md | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0639a2883..44e28e823 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,52 @@ # Changelog +### AMI Release v20230322 +* amazon-eks-gpu-node-1.25-v20230322 +* amazon-eks-gpu-node-1.24-v20230322 +* amazon-eks-gpu-node-1.23-v20230322 +* amazon-eks-gpu-node-1.22-v20230322 +* amazon-eks-arm64-node-1.25-v20230322 +* amazon-eks-arm64-node-1.24-v20230322 +* amazon-eks-arm64-node-1.23-v20230322 +* amazon-eks-arm64-node-1.22-v20230322 +* amazon-eks-node-1.25-v20230322 +* amazon-eks-node-1.24-v20230322 +* amazon-eks-node-1.23-v20230322 +* amazon-eks-node-1.22-v20230322 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.25.7-20230322` +* `1.24.11-20230322` +* `1.23.17-20230322` +* `1.22.17-20230322` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.25.7/2023-03-17/ +* s3://amazon-eks/1.24.11/2023-03-17/ +* s3://amazon-eks/1.23.17/2023-03-17/ +* s3://amazon-eks/1.22.17/2023-03-17/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.235-144.344.amzn2 + * Kubernetes 1.24 and above: 5.10.173-154.642.amzn2 + * The GPU AMI will continue to use `kernel-5.4` for all Kubernetes versions as we work to address a compatibility issue with `nvidia-driver-latest-dkms` ([#1222](https://github.com/awslabs/amazon-eks-ami/issues/1222)). +* `dockerd`: 20.10.17-1.amzn2.0.1 + * **Note** that with Kubernetes 1.25+, Docker is only installed on GPU AMI's. This is subject to change as we remove unnecessary dependencies, and we recommend completing the migration to `containerd` immediately. +* `containerd`: 1.6.6-1.amzn2.0.2 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +- Validate package versionlocks ([#1195](https://github.com/awslabs/amazon-eks-ami/pull/1195)) +- Updated `kernel-5.4` to address: + - [ALASKERNEL-5.4-2023-043](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2023-043.html) +- Updated `kernel-5.10` to address: + - [ALASKERNEL-5.10-2023-027](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-027.html) + - [ALASKERNEL-5.10-2023-028](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-028.html) + ### AMI Release v20230304 * amazon-eks-gpu-node-1.25-v20230304 * amazon-eks-gpu-node-1.24-v20230304 From fbb0ded69742525508c5e96950041d49003fdf7f Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 30 Mar 2023 13:41:35 -0700 Subject: [PATCH 436/621] Always configure kubelet to use external ecr-credential-provider (#1240) --- files/kubelet-config.json | 3 ++- files/kubelet-containerd.service | 2 ++ files/kubelet.service | 2 ++ scripts/install-worker.sh | 15 --------------- 4 files changed, 6 insertions(+), 16 deletions(-) diff --git a/files/kubelet-config.json b/files/kubelet-config.json index b78510c6a..666350e2b 100644 --- a/files/kubelet-config.json +++ b/files/kubelet-config.json @@ -27,7 +27,8 @@ "cgroupDriver": "cgroupfs", "cgroupRoot": "/", "featureGates": { - "RotateKubeletServerCertificate": true + "RotateKubeletServerCertificate": true, + "KubeletCredentialProviders": true }, "protectKernelDefaults": true, "serializeImagePulls": false, diff --git a/files/kubelet-containerd.service b/files/kubelet-containerd.service index d5789530e..db1c56511 100644 --- a/files/kubelet-containerd.service +++ b/files/kubelet-containerd.service @@ -12,6 +12,8 @@ ExecStart=/usr/bin/kubelet \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime remote \ --container-runtime-endpoint unix:///run/containerd/containerd.sock \ + --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \ + --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider \ $KUBELET_ARGS \ $KUBELET_EXTRA_ARGS diff --git a/files/kubelet.service b/files/kubelet.service index a4aa5a890..5002876be 100644 --- a/files/kubelet.service +++ b/files/kubelet.service @@ -11,6 +11,8 @@ ExecStart=/usr/bin/kubelet \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime docker \ --network-plugin cni \ + --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \ + --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider \ $KUBELET_ARGS \ $KUBELET_EXTRA_ARGS diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index aed1c3806..e52fe2c0b 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -162,12 +162,6 @@ else sudo mv $TEMPLATE_DIR/containerd-config.toml /etc/eks/containerd/containerd-config.toml fi -if vercmp "$KUBERNETES_VERSION" gteq "1.22.0"; then - # enable CredentialProviders features in kubelet-containerd service file - IMAGE_CREDENTIAL_PROVIDER_FLAGS='\\\n --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \\\n --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider' - sudo sed -i s,"aws","aws $IMAGE_CREDENTIAL_PROVIDER_FLAGS", $TEMPLATE_DIR/kubelet-containerd.service -fi - sudo mv $TEMPLATE_DIR/kubelet-containerd.service /etc/eks/containerd/kubelet-containerd.service sudo mv $TEMPLATE_DIR/sandbox-image.service /etc/eks/containerd/sandbox-image.service sudo mv $TEMPLATE_DIR/pull-sandbox-image.sh /etc/eks/containerd/pull-sandbox-image.sh @@ -325,15 +319,6 @@ if [[ $KUBERNETES_VERSION == "1.20"* ]]; then echo $KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED > $TEMPLATE_DIR/kubelet-config.json fi -if vercmp "$KUBERNETES_VERSION" gteq "1.22.0"; then - # enable CredentialProviders feature flags in kubelet service file - IMAGE_CREDENTIAL_PROVIDER_FLAGS='\\\n --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \\\n --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider' - sudo sed -i s,"aws","aws $IMAGE_CREDENTIAL_PROVIDER_FLAGS", $TEMPLATE_DIR/kubelet.service - # enable KubeletCredentialProviders features in kubelet configuration - KUBELET_CREDENTIAL_PROVIDERS_FEATURES=$(cat $TEMPLATE_DIR/kubelet-config.json | jq '.featureGates += {KubeletCredentialProviders: true}') - printf "%s" "$KUBELET_CREDENTIAL_PROVIDERS_FEATURES" > "$TEMPLATE_DIR/kubelet-config.json" -fi - sudo mv $TEMPLATE_DIR/kubelet.service /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/kubelet.service sudo mv $TEMPLATE_DIR/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json From d66425524e8a88592c493d68216b0aad24117468 Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Thu, 30 Mar 2023 17:40:15 -0500 Subject: [PATCH 437/621] Use default kubelet API QPS for 1.27+ (#1241) --- files/bootstrap.sh | 3 +- test/cases/api-qps-k8s-1.22-to-1.26.sh | 56 +++++++++++++++++++ ....22-above.sh => api-qps-k8s-1.27-above.sh} | 9 +-- 3 files changed, 63 insertions(+), 5 deletions(-) create mode 100755 test/cases/api-qps-k8s-1.22-to-1.26.sh rename test/cases/{api-qps-k8s-1.22-above.sh => api-qps-k8s-1.27-above.sh} (82%) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 0d17f1141..ce1a7f0e5 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -437,8 +437,9 @@ else fi INSTANCE_TYPE=$(imds 'latest/meta-data/instance-type') -if vercmp "$KUBELET_VERSION" gteq "1.22.0"; then +if vercmp "$KUBELET_VERSION" gteq "1.22.0" && vercmp "$KUBELET_VERSION" lt "1.27.0"; then # for K8s versions that suport API Priority & Fairness, increase our API server QPS + # in 1.27, the default is already increased to 50/100, so use the higher defaults echo $(jq ".kubeAPIQPS=( .kubeAPIQPS // 10)|.kubeAPIBurst=( .kubeAPIBurst // 20)" $KUBELET_CONFIG) > $KUBELET_CONFIG fi diff --git a/test/cases/api-qps-k8s-1.22-to-1.26.sh b/test/cases/api-qps-k8s-1.22-to-1.26.sh new file mode 100755 index 000000000..f61f1d0ac --- /dev/null +++ b/test/cases/api-qps-k8s-1.22-to-1.26.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "--> Should increase API server QPS for K8s 1.22 - 1.26" +exit_code=0 +export KUBELET_VERSION=v1.22.0-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +expected_api_qps="10" +expected_api_burst="20" + +actual_api_qps=$(jq -r '.kubeAPIQPS' < /etc/kubernetes/kubelet/kubelet-config.json) +actual_api_burst=$(jq -r '.kubeAPIBurst' < /etc/kubernetes/kubelet/kubelet-config.json) +if [[ ${actual_api_qps} != ${expected_api_qps} ]]; then + echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" + exit 1 +fi + +if [[ ${actual_api_burst} != ${expected_api_burst} ]]; then + echo "❌ Test Failed: expected kubeAPIBurst = '${expected_api_burst}' but got '${actual_api_burst}'" + exit 1 +fi + +export KUBELET_VERSION=v1.26.0-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +expected_api_qps="10" +expected_api_burst="20" + +actual_api_qps=$(jq -r '.kubeAPIQPS' < /etc/kubernetes/kubelet/kubelet-config.json) +actual_api_burst=$(jq -r '.kubeAPIBurst' < /etc/kubernetes/kubelet/kubelet-config.json) +if [[ ${actual_api_qps} != ${expected_api_qps} ]]; then + echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" + exit 1 +fi + +if [[ ${actual_api_burst} != ${expected_api_burst} ]]; then + echo "❌ Test Failed: expected kubeAPIBurst = '${expected_api_burst}' but got '${actual_api_burst}'" + exit 1 +fi diff --git a/test/cases/api-qps-k8s-1.22-above.sh b/test/cases/api-qps-k8s-1.27-above.sh similarity index 82% rename from test/cases/api-qps-k8s-1.22-above.sh rename to test/cases/api-qps-k8s-1.27-above.sh index cbc242ac2..d25f1ac31 100755 --- a/test/cases/api-qps-k8s-1.22-above.sh +++ b/test/cases/api-qps-k8s-1.27-above.sh @@ -1,9 +1,9 @@ #!/usr/bin/env bash set -euo pipefail -echo "--> Should increase API server QPS for K8s 1.22+" +echo "--> Should use default API server QPS for K8s 1.27+" exit_code=0 -export KUBELET_VERSION=v1.22.0-eks-ba74326 +export KUBELET_VERSION=v1.27.0-eks-ba74326 /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ @@ -14,8 +14,9 @@ if [[ ${exit_code} -ne 0 ]]; then exit 1 fi -expected_api_qps="10" -expected_api_burst="20" +# values should not be set +expected_api_qps="null" +expected_api_burst="null" actual_api_qps=$(jq -r '.kubeAPIQPS' < /etc/kubernetes/kubelet/kubelet-config.json) actual_api_burst=$(jq -r '.kubeAPIBurst' < /etc/kubernetes/kubelet/kubelet-config.json) From a22169e55f5b5ff592f22b82f0063707fd3dd469 Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 3 Apr 2023 12:36:54 -0700 Subject: [PATCH 438/621] Add 1.26 (#1246) --- Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3a287c94a..9b8d6e952 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: latest -latest: 1.25 ## Build EKS Optimized AL2 AMI with the latest supported version of Kubernetes +latest: 1.26 ## Build EKS Optimized AL2 AMI with the latest supported version of Kubernetes # ensure that these flags are equivalent to the rules in the .editorconfig SHFMT_FLAGS := --list \ @@ -104,6 +104,10 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI 1.25: ## Build EKS Optimized AL2 AMI - K8s 1.25 $(MAKE) k8s kubernetes_version=1.25.7 kubernetes_build_date=2023-03-17 pull_cni_from_github=true +.PHONY: 1.26 +1.26: ## Build EKS Optimized AL2 AMI - K8s 1.26 + $(MAKE) k8s kubernetes_version=1.26.2 kubernetes_build_date=2023-03-17 pull_cni_from_github=true + .PHONY: clean clean: rm *-manifest.json From 2252d0b4e7eb80e24758d0686d4768f9cdcf41a2 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 4 Apr 2023 16:23:11 -0700 Subject: [PATCH 439/621] Remove --container-runtime kubelet flag for 1.27+ (#1250) --- files/bootstrap.sh | 8 +++++++- files/kubelet-containerd.service | 1 - 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index ce1a7f0e5..c9b5da054 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -21,7 +21,7 @@ function print_help { echo "--aws-api-retry-attempts Number of retry attempts for AWS API call (DescribeCluster) (default: 3)" echo "--b64-cluster-ca The base64 encoded cluster CA content. Only valid when used with --apiserver-endpoint. Bypasses calling \"aws eks describe-cluster\"" echo "--cluster-id Specify the id of EKS cluster" - echo "--container-runtime Specify a container runtime (default: dockerd)" + echo "--container-runtime Specify a container runtime. For Kubernetes 1.23 and below, possible values are [dockerd, containerd] and the default value is dockerd. For Kubernetes 1.24 and above, containerd is the only valid value. This flag is deprecated and will be removed in a future release." echo "--containerd-config-file File containing the containerd configuration to be used in place of AMI defaults." echo "--dns-cluster-ip Overrides the IP address to use for DNS queries within the cluster. Defaults to 10.100.0.10 or 172.20.0.10 based on the IP address of the primary interface" echo "--docker-config-json The contents of the /etc/docker/daemon.json file. Useful if you want a custom config differing from the default one in the AMI" @@ -538,6 +538,12 @@ if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then sudo chown root:root /etc/systemd/system/kubelet.service # Validate containerd config sudo containerd config dump > /dev/null + + # --container-runtime flag is gone in 1.27+ + # TODO: remove this when 1.26 is EOL + if vercmp "$KUBELET_VERSION" lt "1.27.0"; then + KUBELET_ARGS="$KUBELET_ARGS --container-runtime=remote" + fi elif [[ "$CONTAINER_RUNTIME" = "dockerd" ]]; then mkdir -p /etc/docker bash -c "/sbin/iptables-save > /etc/sysconfig/iptables" diff --git a/files/kubelet-containerd.service b/files/kubelet-containerd.service index db1c56511..bd8ed1cf6 100644 --- a/files/kubelet-containerd.service +++ b/files/kubelet-containerd.service @@ -10,7 +10,6 @@ ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 ExecStart=/usr/bin/kubelet \ --config /etc/kubernetes/kubelet/kubelet-config.json \ --kubeconfig /var/lib/kubelet/kubeconfig \ - --container-runtime remote \ --container-runtime-endpoint unix:///run/containerd/containerd.sock \ --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \ --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider \ From 9ea1a7a6c3857e3c6b791e5758c9a4cafc48b88d Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 4 Apr 2023 16:26:29 -0700 Subject: [PATCH 440/621] Use gp3 volume_type for 1.27+ (#1197) --- Makefile | 51 ++++++++++++++++++++++++++--------- eks-worker-al2-variables.json | 2 +- 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/Makefile b/Makefile index 9b8d6e952..eb1621870 100644 --- a/Makefile +++ b/Makefile @@ -6,29 +6,54 @@ K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS MAKEFILE_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) +# expands to 'true' if PACKER_VARIABLE_FILE is non-empty +# and the file contains the string passed as the first argument +# otherwise, expands to 'false' +packer_variable_file_contains = $(if $(PACKER_VARIABLE_FILE),$(shell grep -Fq $1 $(PACKER_VARIABLE_FILE) && echo true || echo false),false) + +# expands to 'true' if the version comparison is affirmative +# otherwise expands to 'false' +vercmp = $(shell $(MAKEFILE_DIR)/files/bin/vercmp "$1" "$2" "$3") + +# expands to 'true' if the 'aws_region' contains 'us-iso' (an isolated region) +# otherwise, expands to 'false' +in_iso_region = $(if $(findstring us-iso,$(aws_region)),true,false) + +# gp3 volumes are used by default for 1.27+ +# TODO: remove when 1.26 reaches EOL +# TODO: remove when gp3 is supported in isolated regions +ifneq ($(call packer_variable_file_contains,volume_type), true) + ifeq ($(call in_iso_region), true) + volume_type ?= gp2 + else ifeq ($(call vercmp,$(kubernetes_version),lt,1.27.0), true) + volume_type ?= gp2 + endif +endif + # Docker is not present on 1.25+ AMI's -ifeq ($(shell $(MAKEFILE_DIR)/files/bin/vercmp "$(kubernetes_version)" gteq "1.25.0"), true) -# do not tag the AMI with the Docker version -docker_version ?= none -# do not include the Docker version in the AMI description -ami_component_description ?= (k8s: {{ user `kubernetes_version` }}, containerd: {{ user `containerd_version` }}) +# TODO: remove this when 1.24 reaches EOL +ifeq ($(call vercmp,$(kubernetes_version),gteq,1.25.0), true) + # do not tag the AMI with the Docker version + docker_version ?= none + # do not include the Docker version in the AMI description + ami_component_description ?= (k8s: {{ user `kubernetes_version` }}, containerd: {{ user `containerd_version` }}) endif arch ?= x86_64 ifeq ($(arch), arm64) -instance_type ?= m6g.large -ami_name ?= amazon-eks-arm64-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') + instance_type ?= m6g.large + ami_name ?= amazon-eks-arm64-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') else -instance_type ?= m4.large -ami_name ?= amazon-eks-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') + instance_type ?= m4.large + ami_name ?= amazon-eks-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') endif ifeq ($(aws_region), cn-northwest-1) -source_ami_owners ?= 141808717104 + source_ami_owners ?= 141808717104 endif ifeq ($(aws_region), us-gov-west-1) -source_ami_owners ?= 045324592363 + source_ami_owners ?= 045324592363 endif T_RED := \e[0;31m @@ -49,7 +74,7 @@ SHFMT_FLAGS := --list \ SHFMT_COMMAND := $(shell which shfmt) ifeq (, $(SHFMT_COMMAND)) -SHFMT_COMMAND = docker run --rm -v $(MAKEFILE_DIR):$(MAKEFILE_DIR) mvdan/shfmt + SHFMT_COMMAND = docker run --rm -v $(MAKEFILE_DIR):$(MAKEFILE_DIR) mvdan/shfmt endif .PHONY: fmt @@ -58,7 +83,7 @@ fmt: ## Format the source files SHELLCHECK_COMMAND := $(shell which shellcheck) ifeq (, $(SHELLCHECK_COMMAND)) -SHELLCHECK_COMMAND = docker run --rm -v $(MAKEFILE_DIR):$(MAKEFILE_DIR) koalaman/shellcheck:stable + SHELLCHECK_COMMAND = docker run --rm -v $(MAKEFILE_DIR):$(MAKEFILE_DIR) koalaman/shellcheck:stable endif SHELL_FILES := $(shell find $(MAKEFILE_DIR) -type f -name '*.sh') diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index b83543f49..456bcabf9 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -33,5 +33,5 @@ "ssh_username": "ec2-user", "subnet_id": "", "temporary_security_group_source_cidrs": "", - "volume_type": "gp2" + "volume_type": "gp3" } From f9fa3f614ba04c411eb13f3edad674b79348479b Mon Sep 17 00:00:00 2001 From: Laxmi Soumya Josyula <42261978+ljosyula@users.noreply.github.com> Date: Tue, 4 Apr 2023 18:35:19 -0700 Subject: [PATCH 441/621] Adding inf2 and trn1n instances (#1251) Co-authored-by: ljosyula --- files/eni-max-pods.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index cde3c610e..2c0253e90 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -271,6 +271,10 @@ inf1.24xlarge 321 inf1.2xlarge 38 inf1.6xlarge 234 inf1.xlarge 38 +inf2.24xlarge 737 +inf2.48xlarge 737 +inf2.8xlarge 234 +inf2.xlarge 58 is4gen.2xlarge 58 is4gen.4xlarge 234 is4gen.8xlarge 234 @@ -623,6 +627,7 @@ t4g.small 11 t4g.xlarge 58 trn1.2xlarge 58 trn1.32xlarge 247 +trn1n.32xlarge 247 u-12tb1.112xlarge 737 u-12tb1.metal 147 u-18tb1.112xlarge 737 From 8875417bc39f7218f99bec59ae226763b9d19870 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 5 Apr 2023 14:36:01 -0700 Subject: [PATCH 442/621] Create KUBELET_ARGS dropin after configuration container runtime (#1257) --- files/bootstrap.sh | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index c9b5da054..539752d8a 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -485,19 +485,7 @@ fi KUBELET_ARGS="$KUBELET_ARGS --cloud-provider=$KUBELET_CLOUD_PROVIDER" -mkdir -p /etc/systemd/system/kubelet.service.d - -cat << EOF > /etc/systemd/system/kubelet.service.d/10-kubelet-args.conf -[Service] -Environment='KUBELET_ARGS=$KUBELET_ARGS' -EOF - -if [[ -n "$KUBELET_EXTRA_ARGS" ]]; then - cat << EOF > /etc/systemd/system/kubelet.service.d/30-kubelet-extra-args.conf -[Service] -Environment='KUBELET_EXTRA_ARGS=$KUBELET_EXTRA_ARGS' -EOF -fi +mkdir -p /etc/systemd/system if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then if $ENABLE_DOCKER_BRIDGE; then @@ -568,6 +556,21 @@ else exit 1 fi +mkdir -p /etc/systemd/system/kubelet.service.d + +cat << EOF > /etc/systemd/system/kubelet.service.d/10-kubelet-args.conf +[Service] +Environment='KUBELET_ARGS=$KUBELET_ARGS' +EOF + +if [[ -n "$KUBELET_EXTRA_ARGS" ]]; then + cat << EOF > /etc/systemd/system/kubelet.service.d/30-kubelet-extra-args.conf +[Service] +Environment='KUBELET_EXTRA_ARGS=$KUBELET_EXTRA_ARGS' +EOF +fi + +systemctl daemon-reload systemctl enable kubelet systemctl start kubelet From 8d7078a24e2804984d79ee2b5cd79acb0effec88 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 5 Apr 2023 15:14:18 -0700 Subject: [PATCH 443/621] Install latest containerd 1.6.x (#1247) --- eks-worker-al2-variables.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index 456bcabf9..30d769e03 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -13,7 +13,7 @@ "binary_bucket_region": "us-west-2", "cache_container_images": "false", "cni_plugin_version": "v0.8.6", - "containerd_version": "1.6.6-1.amzn2.0.2", + "containerd_version": "1.6.*", "creator": "{{env `USER`}}", "docker_version": "20.10.17-1.amzn2.0.1", "encrypted": "false", From e02ca984d5c14fb3126ec59808ed9aa3c9a224fe Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 7 Apr 2023 14:32:46 -0700 Subject: [PATCH 444/621] Remove stale issue workflow (#1256) --- .github/workflows/stale-issues.yaml | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 .github/workflows/stale-issues.yaml diff --git a/.github/workflows/stale-issues.yaml b/.github/workflows/stale-issues.yaml deleted file mode 100644 index a56181160..000000000 --- a/.github/workflows/stale-issues.yaml +++ /dev/null @@ -1,21 +0,0 @@ -name: 'Close stale issues' -on: - schedule: - # once a day at noon - - cron: '0 12 * * *' -permissions: - issues: write -jobs: - stale: - runs-on: ubuntu-latest - steps: - - uses: actions/stale@v6 - with: - days-before-stale: 90 - days-before-close: 14 - stale-issue-message: 'Please update this issue if it applies to the latest AMI release; otherwise it will be closed soon.' - stale-issue-label: 'stale' - exempt-issue-labels: 'never-stale' - # empty message will prevent PR's from being staled - stale-pr-message: '' - debug-only: true \ No newline at end of file From e56aa6502d8faf37340bbef038d9e7dafff7838f Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 7 Apr 2023 16:44:31 -0700 Subject: [PATCH 445/621] Parameterize Packer template and default variable file (#1252) --- Makefile | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index eb1621870..271faa8fb 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,13 @@ +MAKEFILE_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + +PACKER_DEFAULT_VARIABLE_FILE ?= $(MAKEFILE_DIR)/eks-worker-al2-variables.json +PACKER_TEMPLATE_FILE ?= $(MAKEFILE_DIR)/eks-worker-al2.json PACKER_BINARY ?= packer -AVAILABLE_PACKER_VARIABLES := $(shell $(PACKER_BINARY) inspect -machine-readable eks-worker-al2.json | grep 'template-variable' | awk -F ',' '{print $$4}') +AVAILABLE_PACKER_VARIABLES := $(shell $(PACKER_BINARY) inspect -machine-readable $(PACKER_TEMPLATE_FILE) | grep 'template-variable' | awk -F ',' '{print $$4}') K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) -MAKEFILE_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) - # expands to 'true' if PACKER_VARIABLE_FILE is non-empty # and the file contains the string passed as the first argument # otherwise, expands to 'false' @@ -98,18 +100,18 @@ test: ## run the test-harness # include only variables which have a defined value PACKER_VARIABLES := $(foreach packerVar,$(AVAILABLE_PACKER_VARIABLES),$(if $($(packerVar)),$(packerVar))) -PACKER_VAR_FLAGS := -var-file eks-worker-al2-variables.json \ -$(if $(PACKER_VARIABLE_FILE),--var-file=$(PACKER_VARIABLE_FILE),) \ +PACKER_VAR_FLAGS := -var-file $(PACKER_DEFAULT_VARIABLE_FILE) \ +$(if $(PACKER_VARIABLE_FILE),-var-file=$(PACKER_VARIABLE_FILE),) \ $(foreach packerVar,$(PACKER_VARIABLES),-var $(packerVar)='$($(packerVar))') .PHONY: validate validate: ## Validate packer config - $(PACKER_BINARY) validate $(PACKER_VAR_FLAGS) eks-worker-al2.json + $(PACKER_BINARY) validate $(PACKER_VAR_FLAGS) $(PACKER_TEMPLATE_FILE) .PHONY: k8s k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI @echo "$(T_GREEN)Building AMI for version $(T_YELLOW)$(kubernetes_version)$(T_GREEN) on $(T_YELLOW)$(arch)$(T_RESET)" - $(PACKER_BINARY) build -timestamp-ui -color=false $(PACKER_VAR_FLAGS) eks-worker-al2.json + $(PACKER_BINARY) build -timestamp-ui -color=false $(PACKER_VAR_FLAGS) $(PACKER_TEMPLATE_FILE) # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html From 1efc3d6630bac4f82c05f15faf698a481ff7129d Mon Sep 17 00:00:00 2001 From: camrakin <113552683+camrakin@users.noreply.github.com> Date: Fri, 7 Apr 2023 17:49:53 -0700 Subject: [PATCH 446/621] Updating CHANGELOG.MD for 1.26 Release (#1260) * Updating CHANGELOG.MD for 1.26 Release --- CHANGELOG.md | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 44e28e823..6859fe2db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,62 @@ # Changelog +### AMI Release v20230406 +* amazon-eks-gpu-node-1.26-v20230406 +* amazon-eks-gpu-node-1.25-v20230406 +* amazon-eks-gpu-node-1.24-v20230406 +* amazon-eks-gpu-node-1.23-v20230406 +* amazon-eks-gpu-node-1.22-v20230406 +* amazon-eks-arm64-node-1.26-v20230406 +* amazon-eks-arm64-node-1.25-v20230406 +* amazon-eks-arm64-node-1.24-v20230406 +* amazon-eks-arm64-node-1.23-v20230406 +* amazon-eks-arm64-node-1.22-v20230406 +* amazon-eks-node-1.26-v20230406 +* amazon-eks-node-1.25-v20230406 +* amazon-eks-node-1.24-v20230406 +* amazon-eks-node-1.23-v20230406 +* amazon-eks-node-1.22-v20230406 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.26.2-20230406` +* `1.25.7-20230406` +* `1.24.11-20230406` +* `1.23.17-20230406` +* `1.22.17-20230406` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.26.2/2023-03-17/ +* s3://amazon-eks/1.25.7/2023-03-17/ +* s3://amazon-eks/1.24.11/2023-03-17/ +* s3://amazon-eks/1.23.17/2023-03-17/ +* s3://amazon-eks/1.22.17/2023-03-17/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.238-148.346.amzn2 + * Kubernetes 1.24 and above: 5.10.173-154.642.amzn2 +* `dockerd`: 20.10.17-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.4 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0 + +Notable changes: +- Add support for Kubernetes 1.26 ([#1246](https://github.com/awslabs/amazon-eks-ami/pull/1246)) +- Add support `inf2`, `trn1n` instance types ([#1251](https://github.com/awslabs/amazon-eks-ami/pull/1251)) +- Updated `containerd` to address: + - [ALASDOCKER-2023-023](https://alas.aws.amazon.com/AL2/ALASDOCKER-2023-023.html) +- Fixed `ecr-credential-provider` flags not being passed correctly to `kubelet` ([#1240](https://github.com/awslabs/amazon-eks-ami/pull/1240)) + - Added `--image-credential-provider-config` and `--image-credential-provider-bin-dir` flags to the `systemd` units. + - Set `KubeletCredentialProviders` feature flag to `true` in the `kubelet` JSON config. + +Other changes: +- Use `gp3 volume_type` for 1.27+ ([#1197](https://github.com/awslabs/amazon-eks-ami/pull/1197)) +- Use default kubelet API QPS for 1.27+ ([#1241](https://github.com/awslabs/amazon-eks-ami/pull/1241)) +- Remove `--container-runtime` kubelet flag for 1.27+ ([#1250](https://github.com/awslabs/amazon-eks-ami/pull/1250)) + ### AMI Release v20230322 * amazon-eks-gpu-node-1.25-v20230322 * amazon-eks-gpu-node-1.24-v20230322 From 9456c6a51a5d78291a21ba0d0c22fd2f6bb27d33 Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Mon, 10 Apr 2023 14:04:06 -0500 Subject: [PATCH 447/621] Add ethtool (#1261) --- log-collector-script/linux/eks-log-collector.sh | 10 +++++++++- scripts/install-worker.sh | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 351c3f103..dee5654a6 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -20,7 +20,7 @@ export LANG="C" export LC_ALL="C" # Global options -readonly PROGRAM_VERSION="0.7.4" +readonly PROGRAM_VERSION="0.7.5" readonly PROGRAM_SOURCE="https://github.com/awslabs/amazon-eks-ami/blob/master/log-collector-script/" readonly PROGRAM_NAME="$(basename "$0" .sh)" readonly PROGRAM_DIR="/opt/log-collector" @@ -526,6 +526,14 @@ get_networking_info() { fi cp /etc/resolv.conf "${COLLECT_DIR}"/networking/resolv.conf + + # collect ethtool -S for all interfaces + INTERFACES=$(ip -o a | awk '{print $2}' | sort -n | uniq) + for ifc in ${INTERFACES}; do + echo "Interface ${ifc}" >> "${COLLECT_DIR}"/networking/ethtool.txt + ethtool -S ${ifc} >> "${COLLECT_DIR}"/networking/ethtool.txt 2>&1 + echo -e "\n" >> "${COLLECT_DIR}"/networking/ethtool.txt + done ok } diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index e52fe2c0b..a32fda814 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -62,6 +62,7 @@ sudo yum install -y \ conntrack \ curl \ ec2-instance-connect \ + ethtool \ ipvsadm \ jq \ nfs-utils \ From db6ba5584bb3c3d434138d3e3f8ceaad0934e22e Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 19 Apr 2023 08:51:34 -0700 Subject: [PATCH 448/621] Override hostname to match EC2's PrivateDnsName (#1264) --- files/bootstrap.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 539752d8a..539628c1f 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -481,6 +481,13 @@ if vercmp "$KUBELET_VERSION" lt "1.26.0"; then else KUBELET_CLOUD_PROVIDER="external" echo "$(jq ".providerID=\"$(provider-id)\"" $KUBELET_CONFIG)" > $KUBELET_CONFIG + # When the external cloud provider is used, kubelet will use /etc/hostname as the name of the Node object. + # If the VPC has a custom `domain-name` in its DHCP options set, and the VPC has `enableDnsHostnames` set to `true`, + # then /etc/hostname is not the same as EC2's PrivateDnsName. + # The name of the Node object must be equal to EC2's PrivateDnsName for the aws-iam-authenticator to allow this kubelet to manage it. + INSTANCE_ID=$(imds /latest/meta-data/instance-id) + PRIVATE_DNS_NAME=$(AWS_RETRY_MODE=standard AWS_MAX_ATTEMPTS=10 aws ec2 describe-instances --instance-ids $INSTANCE_ID --query 'Reservations[].Instances[].PrivateDnsName' --output text) + KUBELET_ARGS="$KUBELET_ARGS --hostname-override=$PRIVATE_DNS_NAME" fi KUBELET_ARGS="$KUBELET_ARGS --cloud-provider=$KUBELET_CLOUD_PROVIDER" From 42c3f524cd74d82c7ea4aaecbc0d46c1c39d2095 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 19 Apr 2023 10:11:20 -0700 Subject: [PATCH 449/621] Use credential provider API v1 in 1.27+, v1alpha1 in 1.26- (#1269) --- doc/USER_GUIDE.md | 12 +++++ files/bootstrap.sh | 25 ++++----- files/ecr-credential-provider-config | 14 ------ files/ecr-credential-provider-config.json | 18 +++++++ files/kubelet-containerd.service | 4 +- files/kubelet.service | 4 +- scripts/install-worker.sh | 26 ++++------ test/Dockerfile | 2 +- test/cases/ecr-credential-provider-config.sh | 53 +++++++++++++++----- test/test-harness.sh | 5 +- 10 files changed, 103 insertions(+), 60 deletions(-) delete mode 100644 files/ecr-credential-provider-config create mode 100644 files/ecr-credential-provider-config.json diff --git a/doc/USER_GUIDE.md b/doc/USER_GUIDE.md index 4e7291138..b37348d60 100644 --- a/doc/USER_GUIDE.md +++ b/doc/USER_GUIDE.md @@ -11,6 +11,7 @@ This document includes details about using the AMI template and the resulting AM 1. [AL2 and Linux kernel information](#al2-and-linux-kernel-information) 1. [Updating known instance types](#updating-known-instance-types) 1. [Version-locked packages](#version-locked-packages) +1. [Image credential provider plugins](#image-credential-provider-plugins) --- @@ -309,3 +310,14 @@ sudo yum versionlock delete $PACKAGE_NAME # unlock all packages sudo yum versionlock clear ``` + +--- + +## Image credential provider plugins + +Prior to Kubernetes 1.27, the `kubelet` could obtain credentials for ECR out of the box. This legacy credential process has been removed in Kubernetes 1.27, and +ECR credentials should now be obtained via a plugin, the `ecr-credential-provider`. This plugin is installed in the AMI at `/etc/eks/image-credential-provider/ecr-credential-provider`. More information about this plugin is available in the [`cloud-provider-aws` documentation](https://cloud-provider-aws.sigs.k8s.io/credential_provider/). + +Additional image credential provider plugins may be appended to `/etc/eks/image-credential-provider/config.json`. In Kubernetes versions 1.26 and below, all plugins in this file must support `credentialprovider.kubelet.k8s.io/v1alpha1`. In Kubernetes versions 1.27 and above, they must support `credentialprovider.kubelet.k8s.io/v1`. + +For more information about image credential provider plugins, refer to the [Kubernetes documentation](https://kubernetes.io/docs/tasks/administer-cluster/kubelet-credential-provider/). diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 539628c1f..a42da5f2f 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -145,28 +145,29 @@ set -u KUBELET_VERSION=$(kubelet --version | grep -Eo '[0-9]\.[0-9]+\.[0-9]+') echo "Using kubelet version $KUBELET_VERSION" +# ecr-credential-provider only implements credentialprovider.kubelet.k8s.io/v1alpha1 prior to 1.27.1: https://github.com/kubernetes/cloud-provider-aws/pull/597 +# TODO: remove this when 1.26 is EOL +if vercmp "$KUBELET_VERSION" lt "1.27.0"; then + IMAGE_CREDENTIAL_PROVIDER_CONFIG=/etc/eks/image-credential-provider/config.json + echo "$(jq '.apiVersion = "kubelet.config.k8s.io/v1alpha1"' $IMAGE_CREDENTIAL_PROVIDER_CONFIG)" > $IMAGE_CREDENTIAL_PROVIDER_CONFIG + echo "$(jq '.providers[].apiVersion = "credentialprovider.kubelet.k8s.io/v1alpha1"' $IMAGE_CREDENTIAL_PROVIDER_CONFIG)" > $IMAGE_CREDENTIAL_PROVIDER_CONFIG +fi + +# Set container runtime related variables +DOCKER_CONFIG_JSON="${DOCKER_CONFIG_JSON:-}" +ENABLE_DOCKER_BRIDGE="${ENABLE_DOCKER_BRIDGE:-false}" + # As of Kubernetes version 1.24, we will start defaulting the container runtime to containerd # and no longer support docker as a container runtime. -IS_124_OR_GREATER=false DEFAULT_CONTAINER_RUNTIME=dockerd if vercmp "$KUBELET_VERSION" gteq "1.24.0"; then - IS_124_OR_GREATER=true DEFAULT_CONTAINER_RUNTIME=containerd -elif vercmp "$KUBELET_VERSION" gteq "1.22.0"; then - # These APIs are only available in alpha pre-1.24. - # This can be removed when version 1.23 is no longer supported. - sed -i s,kubelet.config.k8s.io/v1beta1,kubelet.config.k8s.io/v1alpha1,g /etc/eks/ecr-credential-provider/ecr-credential-provider-config - sed -i s,credentialprovider.kubelet.k8s.io/v1beta1,credentialprovider.kubelet.k8s.io/v1alpha1,g /etc/eks/ecr-credential-provider/ecr-credential-provider-config fi - -# Set container runtime related variables -DOCKER_CONFIG_JSON="${DOCKER_CONFIG_JSON:-}" -ENABLE_DOCKER_BRIDGE="${ENABLE_DOCKER_BRIDGE:-false}" CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-$DEFAULT_CONTAINER_RUNTIME}" echo "Using $CONTAINER_RUNTIME as the container runtime" -if $IS_124_OR_GREATER && [ $CONTAINER_RUNTIME != "containerd" ]; then +if vercmp "$KUBELET_VERSION" gteq "1.24.0" && [ $CONTAINER_RUNTIME != "containerd" ]; then echo "ERROR: containerd is the only supported container runtime as of Kubernetes version 1.24" exit 1 fi diff --git a/files/ecr-credential-provider-config b/files/ecr-credential-provider-config deleted file mode 100644 index ae1f9d7a5..000000000 --- a/files/ecr-credential-provider-config +++ /dev/null @@ -1,14 +0,0 @@ -apiVersion: kubelet.config.k8s.io/v1beta1 -kind: CredentialProviderConfig -providers: - - name: ecr-credential-provider - matchImages: - - "*.dkr.ecr.*.amazonaws.com" - - "*.dkr.ecr.*.amazonaws.cn" - - "*.dkr.ecr-fips.*.amazonaws.com" - - "*.dkr.ecr.us-iso-east-1.c2s.ic.gov" - - "*.dkr.ecr.us-isob-east-1.sc2s.sgov.gov" - defaultCacheDuration: "12h" - apiVersion: credentialprovider.kubelet.k8s.io/v1beta1 - args: - - get-credentials diff --git a/files/ecr-credential-provider-config.json b/files/ecr-credential-provider-config.json new file mode 100644 index 000000000..7fe687156 --- /dev/null +++ b/files/ecr-credential-provider-config.json @@ -0,0 +1,18 @@ +{ + "apiVersion": "kubelet.config.k8s.io/v1", + "kind": "CredentialProviderConfig", + "providers": [ + { + "name": "ecr-credential-provider", + "matchImages": [ + "*.dkr.ecr.*.amazonaws.com", + "*.dkr.ecr.*.amazonaws.cn", + "*.dkr.ecr-fips.*.amazonaws.com", + "*.dkr.ecr.us-iso-east-1.c2s.ic.gov", + "*.dkr.ecr.us-isob-east-1.sc2s.sgov.gov" + ], + "defaultCacheDuration": "12h", + "apiVersion": "credentialprovider.kubelet.k8s.io/v1" + } + ] +} diff --git a/files/kubelet-containerd.service b/files/kubelet-containerd.service index bd8ed1cf6..946fb1c28 100644 --- a/files/kubelet-containerd.service +++ b/files/kubelet-containerd.service @@ -11,8 +11,8 @@ ExecStart=/usr/bin/kubelet \ --config /etc/kubernetes/kubelet/kubelet-config.json \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime-endpoint unix:///run/containerd/containerd.sock \ - --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \ - --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider \ + --image-credential-provider-config /etc/eks/image-credential-provider/config.json \ + --image-credential-provider-bin-dir /etc/eks/image-credential-provider \ $KUBELET_ARGS \ $KUBELET_EXTRA_ARGS diff --git a/files/kubelet.service b/files/kubelet.service index 5002876be..08c746504 100644 --- a/files/kubelet.service +++ b/files/kubelet.service @@ -11,8 +11,8 @@ ExecStart=/usr/bin/kubelet \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime docker \ --network-plugin cni \ - --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \ - --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider \ + --image-credential-provider-config /etc/eks/image-credential-provider/config.json \ + --image-credential-provider-bin-dir /etc/eks/image-credential-provider \ $KUBELET_ARGS \ $KUBELET_EXTRA_ARGS diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index a32fda814..b40bf4d5a 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -351,22 +351,18 @@ fi ################################################################################ ### ECR CREDENTIAL PROVIDER #################################################### ################################################################################ -if vercmp "$KUBERNETES_VERSION" gteq "1.22.0"; then - ECR_BINARY="ecr-credential-provider" - if [[ -n "$AWS_ACCESS_KEY_ID" ]]; then - echo "AWS cli present - using it to copy ecr-credential-provider binaries from s3." - aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/$ECR_BINARY . - else - echo "AWS cli missing - using wget to fetch ecr-credential-provider binaries from s3. Note: This won't work for private bucket." - sudo wget "$S3_URL_BASE/$ECR_BINARY" - fi - sudo chmod +x $ECR_BINARY - sudo mkdir -p /etc/eks/ecr-credential-provider - sudo mv $ECR_BINARY /etc/eks/ecr-credential-provider - - # copying credential provider config file to eks folder - sudo mv $TEMPLATE_DIR/ecr-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config +ECR_CREDENTIAL_PROVIDER_BINARY="ecr-credential-provider" +if [[ -n "$AWS_ACCESS_KEY_ID" ]]; then + echo "AWS cli present - using it to copy ${ECR_CREDENTIAL_PROVIDER_BINARY} from s3." + aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/$ECR_CREDENTIAL_PROVIDER_BINARY . +else + echo "AWS cli missing - using wget to fetch ${ECR_CREDENTIAL_PROVIDER_BINARY} from s3. Note: This won't work for private bucket." + sudo wget "$S3_URL_BASE/$ECR_CREDENTIAL_PROVIDER_BINARY" fi +sudo chmod +x $ECR_CREDENTIAL_PROVIDER_BINARY +sudo mkdir -p /etc/eks/image-credential-provider +sudo mv $ECR_CREDENTIAL_PROVIDER_BINARY /etc/eks/image-credential-provider/ +sudo mv $TEMPLATE_DIR/ecr-credential-provider-config.json /etc/eks/image-credential-provider/config.json ################################################################################ ### Cache Images ############################################################### diff --git a/test/Dockerfile b/test/Dockerfile index bab93ee84..5470ad965 100644 --- a/test/Dockerfile +++ b/test/Dockerfile @@ -12,7 +12,7 @@ COPY files/ /etc/eks/ COPY files/containerd-config.toml files/kubelet-containerd.service files/pull-sandbox-image.sh files/sandbox-image.service /etc/eks/containerd/ COPY files/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json COPY files/kubelet-kubeconfig /var/lib/kubelet/kubeconfig -COPY files/ecr-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config +COPY files/ecr-credential-provider-config.json /etc/eks/image-credential-provider/config.json COPY test/entrypoint.sh /entrypoint.sh COPY files/bin/* /usr/bin/ COPY test/mocks/ /sbin/ diff --git a/test/cases/ecr-credential-provider-config.sh b/test/cases/ecr-credential-provider-config.sh index 5d4856ed1..4eb74a761 100755 --- a/test/cases/ecr-credential-provider-config.sh +++ b/test/cases/ecr-credential-provider-config.sh @@ -4,7 +4,7 @@ set -euo pipefail exit_code=0 TEMP_DIR=$(mktemp -d) -export CRED_PROVIDER_FILE="/etc/eks/ecr-credential-provider/ecr-credential-provider-config" +export CRED_PROVIDER_FILE="/etc/eks/image-credential-provider/config.json" export CRED_PROVIDER_RESET_FILE="./cred-provider-config" # Store the original version of the config @@ -15,7 +15,7 @@ function reset_scenario { cp $CRED_PROVIDER_RESET_FILE $CRED_PROVIDER_FILE } -echo "--> Should default to credentialprovider.kubelet.k8s.io/v1alpha1 and kubelet.config.k8s.io/v1alpha1 when below k8s version 1.24" +echo "--> Should default to credentialprovider.kubelet.k8s.io/v1alpha1 and kubelet.config.k8s.io/v1alpha1 when below k8s version 1.27" reset_scenario # This variable is used to override the default value in the kubelet mock @@ -31,23 +31,24 @@ if [[ ${exit_code} -ne 0 ]]; then fi expected_cred_provider_api="credentialprovider.kubelet.k8s.io/v1alpha1" -actual=$(yq e '.providers[0].apiVersion' $CRED_PROVIDER_FILE) +actual=$(jq -r '.providers[0].apiVersion' $CRED_PROVIDER_FILE) if [[ "$expected_cred_provider_api" != "$actual" ]]; then echo "❌ Test Failed: expected 1.22 credential provider file to contain $expected_cred_provider_api" exit 1 fi expected_kubelet_config_api="kubelet.config.k8s.io/v1alpha1" -actual=$(yq e '.apiVersion' $CRED_PROVIDER_FILE) +actual=$(jq -r '.apiVersion' $CRED_PROVIDER_FILE) if [[ "$expected_kubelet_config_api" != "$actual" ]]; then echo "❌ Test Failed: expected 1.22 credential provider file to contain $expected_kubelet_config_api" exit 1 fi -echo "--> Should default to credentialprovider.kubelet.k8s.io/v1beta1 and kubelet.config.k8s.io/v1beta1 when at or above k8s version 1.24" +echo "--> Should default to credentialprovider.kubelet.k8s.io/v1alpha1 and kubelet.config.k8s.io/v1alpha1 when below k8s version 1.27" reset_scenario -export KUBELET_VERSION=v1.24.15-eks-ba74326 +# This variable is used to override the default value in the kubelet mock +export KUBELET_VERSION=v1.26.0-eks-ba74326 /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ @@ -58,18 +59,44 @@ if [[ ${exit_code} -ne 0 ]]; then exit 1 fi -expected_cred_provider_api="credentialprovider.kubelet.k8s.io/v1beta1" -actual=$(yq e '.providers[0].apiVersion' $CRED_PROVIDER_FILE) +expected_cred_provider_api="credentialprovider.kubelet.k8s.io/v1alpha1" +actual=$(jq -r '.providers[0].apiVersion' $CRED_PROVIDER_FILE) if [[ "$expected_cred_provider_api" != "$actual" ]]; then - echo "❌ Test Failed: expected 1.24 credential provider file to contain $expected_cred_provider_api" + echo "❌ Test Failed: expected 1.26 credential provider file to contain $expected_cred_provider_api" exit 1 fi -expected_kubelet_config_api="kubelet.config.k8s.io/v1beta1" -actual=$(yq e '.apiVersion' $CRED_PROVIDER_FILE) +expected_kubelet_config_api="kubelet.config.k8s.io/v1alpha1" +actual=$(jq -r '.apiVersion' $CRED_PROVIDER_FILE) if [[ "$expected_kubelet_config_api" != "$actual" ]]; then - echo "❌ Test Failed: expected 1.24 credential provider file to contain $expected_kubelet_config_api" + echo "❌ Test Failed: expected 1.26 credential provider file to contain $expected_kubelet_config_api" exit 1 fi -exit_code=0 +echo "--> Should default to credentialprovider.kubelet.k8s.io/v1 and kubelet.config.k8s.io/v1 when at or above k8s version 1.27" +reset_scenario + +export KUBELET_VERSION=v1.27.1-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +expected_cred_provider_api="credentialprovider.kubelet.k8s.io/v1" +actual=$(jq -r '.providers[0].apiVersion' $CRED_PROVIDER_FILE) +if [[ "$expected_cred_provider_api" != "$actual" ]]; then + echo "❌ Test Failed: expected 1.27 credential provider file to contain $expected_cred_provider_api" + exit 1 +fi + +expected_kubelet_config_api="kubelet.config.k8s.io/v1" +actual=$(jq -r '.apiVersion' $CRED_PROVIDER_FILE) +if [[ "$expected_kubelet_config_api" != "$actual" ]]; then + echo "❌ Test Failed: expected 1.27 credential provider file to contain $expected_kubelet_config_api" + exit 1 +fi diff --git a/test/test-harness.sh b/test/test-harness.sh index b0cc2180f..c253f562e 100755 --- a/test/test-harness.sh +++ b/test/test-harness.sh @@ -37,12 +37,14 @@ done docker build -t eks-optimized-ami -f "${SCRIPTPATH}/Dockerfile" "${SCRIPTPATH}/../" overall_status=0 +test_run_log_file=$(mktemp) + function run() { docker run -v "$(realpath $1):/test.sh" \ --attach STDOUT \ --attach STDERR \ --rm \ - eks-optimized-ami + eks-optimized-ami > $test_run_log_file 2>&1 } if [[ ! -z ${TEST_CASE_SCRIPT} ]]; then @@ -59,6 +61,7 @@ for case in "${test_cases[@]}"; do if [[ ${status} -eq 0 ]]; then echo "✅ ✅ $(basename ${case}) Tests Passed! ✅ ✅" else + cat $test_run_log_file echo "❌ ❌ $(basename ${case}) Tests Failed! ❌ ❌" overall_status=1 fi From 76c1b1b8eaa9bd6912c504b00fc2c067cb5b21b8 Mon Sep 17 00:00:00 2001 From: Zaid Ahmed Farooq <38226823+zaf6862@users.noreply.github.com> Date: Wed, 19 Apr 2023 17:24:35 -0700 Subject: [PATCH 450/621] AMI Release v20230411 (#1270) --- CHANGELOG.md | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6859fe2db..acafb611e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,51 @@ # Changelog +### AMI Release v20230411 +* amazon-eks-gpu-node-1.26-v20230411 +* amazon-eks-gpu-node-1.25-v20230411 +* amazon-eks-gpu-node-1.24-v20230411 +* amazon-eks-gpu-node-1.23-v20230411 +* amazon-eks-gpu-node-1.22-v20230411 +* amazon-eks-arm64-node-1.26-v20230411 +* amazon-eks-arm64-node-1.25-v20230411 +* amazon-eks-arm64-node-1.24-v20230411 +* amazon-eks-arm64-node-1.23-v20230411 +* amazon-eks-arm64-node-1.22-v20230411 +* amazon-eks-node-1.26-v20230411 +* amazon-eks-node-1.25-v20230411 +* amazon-eks-node-1.24-v20230411 +* amazon-eks-node-1.23-v20230411 +* amazon-eks-node-1.22-v20230411 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.26.2-20230411` +* `1.25.7-20230411` +* `1.24.11-20230411` +* `1.23.17-20230411` +* `1.22.17-20230411` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.26.2/2023-03-17/ +* s3://amazon-eks/1.25.7/2023-03-17/ +* s3://amazon-eks/1.24.11/2023-03-17/ +* s3://amazon-eks/1.23.17/2023-03-17/ +* s3://amazon-eks/1.22.17/2023-03-17/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.238-148.347.amzn2 + * Kubernetes 1.24 and above: 5.10.176-157.645.amzn2 +* `dockerd`: 20.10.17-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.4 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0 + +Notable changes: +- The AMI changes include update for 5.4 kernel version from `5.4.238-148.346.amzn2` to `kernel-5.4.238-148.347.amzn2`. `kernel-5.4.238-148.346` had a fatal issue affecting SMB mounts in which a null pointer dereference caused a panic. As a result, this package was removed from the Amazon Linux 2 repositories. + ### AMI Release v20230406 * amazon-eks-gpu-node-1.26-v20230406 * amazon-eks-gpu-node-1.25-v20230406 From 0711325f3517b2db1a6cd0cf9eb42cd79ed1358f Mon Sep 17 00:00:00 2001 From: guessi Date: Thu, 20 Apr 2023 08:29:48 +0800 Subject: [PATCH 451/621] Fix mount-bpf-fs test cases (#1271) --- test/cases/mount-bpf-fs.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/cases/mount-bpf-fs.sh b/test/cases/mount-bpf-fs.sh index e8ef5da99..61b2f3844 100755 --- a/test/cases/mount-bpf-fs.sh +++ b/test/cases/mount-bpf-fs.sh @@ -61,7 +61,7 @@ EXIT_CODE=0 /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ - test || exit_code=$? + test || EXIT_CODE=$? if [[ ${EXIT_CODE} -ne 0 ]]; then echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" exit 1 @@ -84,7 +84,7 @@ EXIT_CODE=0 /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ - test || exit_code=$? + test || EXIT_CODE=$? if [[ ${EXIT_CODE} -ne 0 ]]; then echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" exit 1 From e39d71f6832221409cd9990ad85e870f6d621698 Mon Sep 17 00:00:00 2001 From: guessi Date: Thu, 20 Apr 2023 10:07:30 +0800 Subject: [PATCH 452/621] Keep actions up-to-date (#1243) --- .github/workflows/ci.yaml | 4 ++-- .github/workflows/sync-eni-max-pods.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index dfc7f6804..7f780e683 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -13,12 +13,12 @@ jobs: lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - run: echo "$(go env GOPATH)/bin" >> $GITHUB_PATH - run: go install mvdan.cc/sh/v3/cmd/shfmt@latest - run: make lint test: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - run: make test diff --git a/.github/workflows/sync-eni-max-pods.yaml b/.github/workflows/sync-eni-max-pods.yaml index 76f02addf..9bb3275bc 100644 --- a/.github/workflows/sync-eni-max-pods.yaml +++ b/.github/workflows/sync-eni-max-pods.yaml @@ -14,7 +14,7 @@ jobs: if: github.repository == 'awslabs/amazon-eks-ami' runs-on: ubuntu-latest steps: - - uses: aws-actions/configure-aws-credentials@v1 + - uses: aws-actions/configure-aws-credentials@v2 with: aws-region: ${{ secrets.AWS_REGION }} role-to-assume: ${{ secrets.AWS_ROLE_ARN }} From 406954de1f7a4cdfabb9d9cc2b7ddfd1e2e08b23 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 21 Apr 2023 15:40:49 -0700 Subject: [PATCH 453/621] Make imds-token directory world-writeable (#1267) --- files/bin/imds | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/bin/imds b/files/bin/imds index 7619ee3fb..2d23801ba 100755 --- a/files/bin/imds +++ b/files/bin/imds @@ -50,7 +50,7 @@ function imdscurl() { function get-token() { local TOKEN_DIR=/tmp/imds-tokens - mkdir -p $TOKEN_DIR + mkdir -p -m a+wrx $TOKEN_DIR # cleanup expired tokens local DELETED_TOKENS=0 From 1ec4d5cfb209992901253bf07b3c0dba34cdf70d Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 24 Apr 2023 16:45:41 -0700 Subject: [PATCH 454/621] Improve bootstrap logging (#1276) --- files/bin/vercmp | 5 +++- files/bootstrap.sh | 67 +++++++++++++++++++++++++++++++++------------- 2 files changed, 53 insertions(+), 19 deletions(-) diff --git a/files/bin/vercmp b/files/bin/vercmp index 8edf7b920..5bb467854 100755 --- a/files/bin/vercmp +++ b/files/bin/vercmp @@ -81,7 +81,10 @@ case $OPERATOR in ;; esac -echo "$OUTCOME" +VERCMP_QUIET="${VERCMP_QUIET:-false}" +if [ ! "$VERCMP_QUIET" = "true" ]; then + echo "$OUTCOME" +fi if [ "$OUTCOME" = "true" ]; then exit 0 diff --git a/files/bootstrap.sh b/files/bootstrap.sh index a42da5f2f..205cb5458 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -11,6 +11,9 @@ trap 'err_report $LINENO' ERR IFS=$'\n\t' +# mute stdout from vercmp +export VERCMP_QUIET=true + function print_help { echo "usage: $0 [options] " echo "Bootstraps an instance into an EKS cluster" @@ -36,6 +39,12 @@ function print_help { echo "--use-max-pods Sets --max-pods for the kubelet when true. (default: true)" } +function log { + echo >&2 "$(date '+%Y-%m-%dT%H:%M:%S%z')" "[eks-bootstrap]" "$@" +} + +log "INFO: starting..." + POSITIONAL=() while [[ $# -gt 0 ]]; do @@ -47,86 +56,103 @@ while [[ $# -gt 0 ]]; do ;; --use-max-pods) USE_MAX_PODS="$2" + log "INFO: --use-max-pods='${USE_MAX_PODS}'" shift shift ;; --b64-cluster-ca) B64_CLUSTER_CA=$2 + log "INFO: --b64-cluster-ca='${B64_CLUSTER_CA}'" shift shift ;; --apiserver-endpoint) APISERVER_ENDPOINT=$2 + log "INFO: --apiserver-endpoint='${APISERVER_ENDPOINT}'" shift shift ;; --kubelet-extra-args) KUBELET_EXTRA_ARGS=$2 + log "INFO: --kubelet-extra-args='${KUBELET_EXTRA_ARGS}'" shift shift ;; --enable-docker-bridge) ENABLE_DOCKER_BRIDGE=$2 + log "INFO: --enable-docker-bridge='${ENABLE_DOCKER_BRIDGE}'" shift shift ;; --aws-api-retry-attempts) API_RETRY_ATTEMPTS=$2 + log "INFO: --aws-api-retry-attempts='${API_RETRY_ATTEMPTS}'" shift shift ;; --docker-config-json) DOCKER_CONFIG_JSON=$2 + log "INFO: --docker-config-json='${DOCKER_CONFIG_JSON}'" shift shift ;; --containerd-config-file) CONTAINERD_CONFIG_FILE=$2 + log "INFO: --containerd-config-file='${CONTAINERD_CONFIG_FILE}'" shift shift ;; --pause-container-account) PAUSE_CONTAINER_ACCOUNT=$2 + log "INFO: --pause-container-accounte='${PAUSE_CONTAINER_ACCOUNT}'" shift shift ;; --pause-container-version) PAUSE_CONTAINER_VERSION=$2 + log "INFO: --pause-container-version='${PAUSE_CONTAINER_VERSION}'" shift shift ;; --dns-cluster-ip) DNS_CLUSTER_IP=$2 + log "INFO: --dns-cluster-ip='${DNS_CLUSTER_IP}'" shift shift ;; --container-runtime) CONTAINER_RUNTIME=$2 + log "INFO: --container-runtime='${CONTAINER_RUNTIME}'" shift shift ;; --ip-family) IP_FAMILY=$2 + log "INFO: --ip-family='${IP_FAMILY}'" shift shift ;; --service-ipv6-cidr) SERVICE_IPV6_CIDR=$2 + log "INFO: --service-ipv6-cidr='${SERVICE_IPV6_CIDR}'" shift shift ;; --enable-local-outpost) ENABLE_LOCAL_OUTPOST=$2 + log "INFO: --enable-local-outpost='${ENABLE_LOCAL_OUTPOST}'" shift shift ;; --cluster-id) CLUSTER_ID=$2 + log "INFO: --cluster-id='${CLUSTER_ID}'" shift shift ;; --mount-bpf-fs) MOUNT_BPF_FS=$2 + log "INFO: --mount-bpf-fs='${MOUNT_BPF_FS}'" shift shift ;; @@ -143,7 +169,7 @@ CLUSTER_NAME="$1" set -u KUBELET_VERSION=$(kubelet --version | grep -Eo '[0-9]\.[0-9]+\.[0-9]+') -echo "Using kubelet version $KUBELET_VERSION" +log "INFO: Using kubelet version $KUBELET_VERSION" # ecr-credential-provider only implements credentialprovider.kubelet.k8s.io/v1alpha1 prior to 1.27.1: https://github.com/kubernetes/cloud-provider-aws/pull/597 # TODO: remove this when 1.26 is EOL @@ -165,10 +191,10 @@ if vercmp "$KUBELET_VERSION" gteq "1.24.0"; then fi CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-$DEFAULT_CONTAINER_RUNTIME}" -echo "Using $CONTAINER_RUNTIME as the container runtime" +log "INFO: Using $CONTAINER_RUNTIME as the container runtime" if vercmp "$KUBELET_VERSION" gteq "1.24.0" && [ $CONTAINER_RUNTIME != "containerd" ]; then - echo "ERROR: containerd is the only supported container runtime as of Kubernetes version 1.24" + log "ERROR: containerd is the only supported container runtime as of Kubernetes version 1.24" exit 1 fi @@ -254,21 +280,21 @@ get_cpu_millicores_to_reserve() { } if [ -z "$CLUSTER_NAME" ]; then - echo "CLUSTER_NAME is not defined" + log "ERROR: cluster name is not defined!" exit 1 fi if [[ ! -z "${IP_FAMILY}" ]]; then IP_FAMILY="$(tr [A-Z] [a-z] <<< "$IP_FAMILY")" if [[ "${IP_FAMILY}" != "ipv4" ]] && [[ "${IP_FAMILY}" != "ipv6" ]]; then - echo "Invalid IpFamily. Only ipv4 or ipv6 are allowed" + log "ERROR: Invalid --ip-family. Only ipv4 or ipv6 are allowed" exit 1 fi fi if [[ ! -z "${SERVICE_IPV6_CIDR}" ]]; then if [[ "${IP_FAMILY}" == "ipv4" ]]; then - echo "ip-family should be ipv6 when service-ipv6-cidr is specified" + log "ERROR: --ip-family should be ipv6 when --service-ipv6-cidr is specified" exit 1 fi IP_FAMILY="ipv6" @@ -279,7 +305,7 @@ AWS_SERVICES_DOMAIN=$(imds 'latest/meta-data/services/domain') MACHINE=$(uname -m) if [[ "$MACHINE" != "x86_64" && "$MACHINE" != "aarch64" ]]; then - echo "Unknown machine architecture '$MACHINE'" >&2 + log "ERROR: Unknown machine architecture: '$MACHINE'" exit 1 fi @@ -297,13 +323,14 @@ CA_CERTIFICATE_DIRECTORY=/etc/kubernetes/pki CA_CERTIFICATE_FILE_PATH=$CA_CERTIFICATE_DIRECTORY/ca.crt mkdir -p $CA_CERTIFICATE_DIRECTORY if [[ -z "${B64_CLUSTER_CA}" ]] || [[ -z "${APISERVER_ENDPOINT}" ]]; then + log "INFO: --cluster-ca or --api-server-endpoint is not defined, describing cluster..." DESCRIBE_CLUSTER_RESULT="/tmp/describe_cluster_result.txt" # Retry the DescribeCluster API for API_RETRY_ATTEMPTS for attempt in $(seq 0 $API_RETRY_ATTEMPTS); do rc=0 if [[ $attempt -gt 0 ]]; then - echo "Attempt $attempt of $API_RETRY_ATTEMPTS" + log "INFO: Attempt $attempt of $API_RETRY_ATTEMPTS" fi aws eks wait cluster-active \ @@ -319,6 +346,7 @@ if [[ -z "${B64_CLUSTER_CA}" ]] || [[ -z "${APISERVER_ENDPOINT}" ]]; then break fi if [[ $attempt -eq $API_RETRY_ATTEMPTS ]]; then + log "ERROR: Exhausted retries while describing cluster!" exit $rc fi jitter=$((1 + RANDOM % 10)) @@ -355,6 +383,8 @@ if [[ -z "${IP_FAMILY}" ]] || [[ "${IP_FAMILY}" == "None" ]]; then IP_FAMILY="ipv4" fi +log "INFO: Using IP family: ${IP_FAMILY}" + echo $B64_CLUSTER_CA | base64 -d > $CA_CERTIFICATE_FILE_PATH sed -i s,MASTER_ENDPOINT,$APISERVER_ENDPOINT,g /var/lib/kubelet/kubeconfig @@ -385,7 +415,7 @@ if [[ "${ENABLE_LOCAL_OUTPOST}" == "true" ]]; then ### - if "aws eks describe-cluster" is bypassed, for local outpost, the value of CLUSTER_NAME parameter will be cluster id. ### - otherwise, the cluster id will use the id returned by "aws eks describe-cluster". if [[ -z "${CLUSTER_ID}" ]]; then - echo "Cluster ID is required when local outpost support is enabled" + log "ERROR: Cluster ID is required when local outpost support is enabled" exit 1 else sed -i s,CLUSTER_NAME,$CLUSTER_ID,g /var/lib/kubelet/kubeconfig @@ -405,7 +435,7 @@ MAC=$(imds 'latest/meta-data/network/interfaces/macs/' | head -n 1 | sed 's/\/$/ if [[ -z "${DNS_CLUSTER_IP}" ]]; then if [[ "${IP_FAMILY}" == "ipv6" ]]; then if [[ -z "${SERVICE_IPV6_CIDR}" ]]; then - echo "One of --service-ipv6-cidr or --dns-cluster-ip must be provided when ip-family is specified as ipv6" + log "ERROR: One of --service-ipv6-cidr or --dns-cluster-ip must be provided when --ip-family is ipv6" exit 1 fi DNS_CLUSTER_IP=$(awk -F/ '{print $1}' <<< $SERVICE_IPV6_CIDR)a @@ -455,7 +485,7 @@ set +o pipefail MAX_PODS=$(cat $MAX_PODS_FILE | awk "/^${INSTANCE_TYPE:-unset}/"' { print $2 }') set -o pipefail if [ -z "$MAX_PODS" ] || [ -z "$INSTANCE_TYPE" ]; then - echo "No entry for type '$INSTANCE_TYPE' in $MAX_PODS_FILE. Will attempt to auto-discover value." + log "INFO: No entry for type '$INSTANCE_TYPE' in $MAX_PODS_FILE. Will attempt to auto-discover value." # When determining the value of maxPods, we're using the legacy calculation by default since it's more restrictive than # the PrefixDelegation based alternative and is likely to be in-use by more customers. # The legacy numbers also maintain backwards compatibility when used to calculate `kubeReserved.memory` @@ -497,11 +527,11 @@ mkdir -p /etc/systemd/system if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then if $ENABLE_DOCKER_BRIDGE; then - echo "WARNING: Flag --enable-docker-bridge was set but will be ignored as it's not relevant to containerd" + log "WARNING: Flag --enable-docker-bridge was set but will be ignored as it's not relevant to containerd" fi if [ ! -z "$DOCKER_CONFIG_JSON" ]; then - echo "WARNING: Flag --docker-config-json was set but will be ignored as it's not relevant to containerd" + log "WARNING: Flag --docker-config-json was set but will be ignored as it's not relevant to containerd" fi sudo mkdir -p /etc/containerd @@ -560,7 +590,7 @@ elif [[ "$CONTAINER_RUNTIME" = "dockerd" ]]; then systemctl enable docker systemctl restart docker else - echo "Container runtime ${CONTAINER_RUNTIME} is not supported." + log "ERROR: unsupported container runtime: '${CONTAINER_RUNTIME}'" exit 1 fi @@ -584,7 +614,7 @@ systemctl start kubelet # gpu boost clock if command -v nvidia-smi &> /dev/null; then - echo "nvidia-smi found" + log "INFO: nvidia-smi found" nvidia-smi -q > /tmp/nvidia-smi-check if [[ "$?" == "0" ]]; then @@ -592,7 +622,7 @@ if command -v nvidia-smi &> /dev/null; then sudo nvidia-smi --auto-boost-default=0 GPUNAME=$(nvidia-smi -L | head -n1) - echo $GPUNAME + log "INFO: GPU name: $GPUNAME" # set application clock to maximum if [[ $GPUNAME == *"A100"* ]]; then @@ -609,8 +639,9 @@ if command -v nvidia-smi &> /dev/null; then echo "unsupported gpu" fi else + log "ERROR: nvidia-smi check failed!" cat /tmp/nvidia-smi-check fi -else - echo "nvidia-smi not found" fi + +log "INFO: complete!" From fcfca678d8b7088d94e7bf5f07fb71abfc353b46 Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Tue, 25 Apr 2023 12:11:10 -0500 Subject: [PATCH 455/621] add bootstrap option to create a local NVMe raid0 or individual volume mounts (#1171) --- doc/USER_GUIDE.md | 19 ++++ files/bin/setup-local-disks | 220 ++++++++++++++++++++++++++++++++++++ files/bootstrap.sh | 11 ++ scripts/install-worker.sh | 3 +- 4 files changed, 252 insertions(+), 1 deletion(-) create mode 100644 files/bin/setup-local-disks diff --git a/doc/USER_GUIDE.md b/doc/USER_GUIDE.md index b37348d60..b2271c9a9 100644 --- a/doc/USER_GUIDE.md +++ b/doc/USER_GUIDE.md @@ -12,6 +12,7 @@ This document includes details about using the AMI template and the resulting AM 1. [Updating known instance types](#updating-known-instance-types) 1. [Version-locked packages](#version-locked-packages) 1. [Image credential provider plugins](#image-credential-provider-plugins) +1. [Ephemeral Storage](#ephemeral-storage) --- @@ -321,3 +322,21 @@ ECR credentials should now be obtained via a plugin, the `ecr-credential-provide Additional image credential provider plugins may be appended to `/etc/eks/image-credential-provider/config.json`. In Kubernetes versions 1.26 and below, all plugins in this file must support `credentialprovider.kubelet.k8s.io/v1alpha1`. In Kubernetes versions 1.27 and above, they must support `credentialprovider.kubelet.k8s.io/v1`. For more information about image credential provider plugins, refer to the [Kubernetes documentation](https://kubernetes.io/docs/tasks/administer-cluster/kubelet-credential-provider/). + +--- + +## Ephemeral Storage + +Some instance types launch with ephemeral NVMe instance storage (i3, i4i, c5d, c6id, etc). There are two main ways of utilizing this storage within Kubernetes: a single RAID-0 array for use by kubelet and containerd or mounting the individual disks for pod usage. + +The EKS Optimized AMI includes a utility script to configure ephemeral storage. The script can be invoked by passing the `--local-disks ` flag to the `/etc/eks/bootstrap.sh` script or the script can be invoked directly at `/bin/setup-local-disks`. All disks are formatted with an XFS file system. + +Below are details on the two disk setup options: + +### RAID-0 for Kubelet and Containerd (raid0) + +A RAID-0 array is setup that includes all ephemeral NVMe instance storage disks. The containerd and kubelet state directories (`/var/lib/containerd` and `/var/lib/kubelet`) will then use the ephemeral storage for more and faster node ephemeral-storage. The node's ephemeral storage can be shared among pods that request ephemeral storage and container images that are downloaded to the node. + +### Mount for Persistent Volumes (mount) + +Another way of utilizing the ephemeral disks is to format and mount the individual disks. Mounting individual disks allows the [local-static-provisioner](https://github.com/kubernetes-sigs/sig-storage-local-static-provisioner) DaemonSet to create Persistent Volume Claims that pods can utilize. diff --git a/files/bin/setup-local-disks b/files/bin/setup-local-disks new file mode 100644 index 000000000..9cdb18dae --- /dev/null +++ b/files/bin/setup-local-disks @@ -0,0 +1,220 @@ +#!/usr/bin/env bash + +set -o errexit +set -o pipefail +set -o nounset + +err_report() { + echo "Exited with error on line $1" +} +trap 'err_report $LINENO' ERR + +print_help() { + echo "usage: $0 " + echo "Sets up Amazon EC2 Instance Store NVMe disks" + echo "" + echo "-d, --dir directory to mount the filesystem(s) (default: /mnt/k8s-disks/)" + echo "-h, --help print this help" +} + +# Sets up a RAID-0 of NVMe instance storage disks, moves +# the contents of /var/lib/kubelet and /var/lib/containerd +# to the new mounted RAID, and bind mounts the kubelet and +# containerd state directories. +maybe_raid0() { + local md_name="kubernetes" + local md_device="/dev/md/${md_name}" + local md_config="/.aws/mdadm.conf" + local array_mount_point="${MNT_DIR}/0" + mkdir -p "$(dirname "${md_config}")" + + if [[ ! -s "${md_config}" ]]; then + mdadm --create --force --verbose \ + "${md_device}" \ + --level=0 \ + --name="${md_name}" \ + --raid-devices="${#EPHEMERAL_DISKS[@]}" \ + "${EPHEMERAL_DISKS[@]}" + while [ -n "$(mdadm --detail "${md_device}" | grep -ioE 'State :.*resyncing')" ]; do + echo "Raid is resyncing..." + sleep 1 + done + mdadm --detail --scan > "${md_config}" + fi + + ## Check if the device symlink has changed on reboot to include a homehost identifier + local current_md_device=$(find /dev/md/ -type l -regex ".*/${md_name}_?[0-9a-z]*$" | tail -n1) + if [[ ! -z ${current_md_device} ]]; then + md_device="${current_md_device}" + fi + + # Format the array if not already formatted. + if [[ -z "$(lsblk "${md_device}" -o fstype --noheadings)" ]]; then + ## By default, mkfs tries to use the stripe unit of the array (512k), + ## for the log stripe unit, but the max log stripe unit is 256k. + ## So instead, we use 32k (8 blocks) to avoid a warning of breaching the max. + ## mkfs.xfs defaults to 32k after logging the warning since the default log buffer size is 32k. + mkfs.xfs -l su=8b "${md_device}" + fi + + ## Create the mount directory + mkdir -p "${array_mount_point}" + + local dev_uuid=$(blkid -s UUID -o value "${md_device}") + local mount_unit_name="$(systemd-escape --path --suffix=mount "${array_mount_point}")" + cat > "/etc/systemd/system/${mount_unit_name}" << EOF + [Unit] + Description=Mount EC2 Instance Store NVMe disk RAID0 + [Mount] + What=UUID=${dev_uuid} + Where=${array_mount_point} + Type=xfs + Options=defaults,noatime + [Install] + WantedBy=multi-user.target +EOF + systemd-analyze verify "${mount_unit_name}" + systemctl enable "${mount_unit_name}" --now + + prev_running="" + needs_linked="" + for unit in "kubelet" "containerd"; do + ## Check if the bind mount from the RAID already exists + if [[ "$(systemctl is-active var-lib-${unit}.mount)" != "active" ]]; then + # Check if components that depend on the RAID are running and, if so, stop them + if systemctl is-active "${unit}" > /dev/null 2>&1; then + prev_running+=" ${unit}" + fi + needs_linked+=" /var/lib/${unit}" + fi + done + + ## Check if /var/log/pods has been bind mounted and make sure kubelet is stopped + if [[ "$(systemctl is-active var-log-pods.mount)" != "active" ]]; then + if systemctl is-active "kubelet" > /dev/null 2>&1; then + prev_running+=" ${unit}" + fi + needs_linked+=" /var/log/pods" + fi + + if [[ ! -z "${prev_running}" ]]; then + systemctl stop ${prev_running} + fi + + # Transfer state directories to the array, if they exist. + for mount_point in ${needs_linked}; do + local unit="$(basename "${mount_point}")" + local array_mount_point_unit="${array_mount_point}/${unit}" + mkdir -p "${mount_point}" + echo "Copying ${mount_point}/ to ${array_mount_point_unit}/" + cp -a "${mount_point}/" "${array_mount_point_unit}/" + local mount_unit_name="$(systemd-escape --path --suffix=mount "${mount_point}")" + cat > "/etc/systemd/system/${mount_unit_name}" << EOF + [Unit] + Description=Mount ${unit} on EC2 Instance Store NVMe RAID0 + [Mount] + What=${array_mount_point_unit} + Where=${mount_point} + Type=none + Options=bind + [Install] + WantedBy=multi-user.target +EOF + systemd-analyze verify "${mount_unit_name}" + systemctl enable "${mount_unit_name}" --now + done + + if [[ ! -z "${prev_running}" ]]; then + systemctl start ${prev_running} + fi +} + +# Mounts and creates xfs file systems on all EC2 instance store NVMe disks +# without existing file systems. Mounts in /mnt/k8s-disks/{1..} by default +maybe_mount() { + idx=1 + for dev in "${EPHEMERAL_DISKS[@]}"; do + if [[ -z "$(lsblk "${dev}" -o fstype --noheadings)" ]]; then + mkfs.xfs -l su=8b "${dev}" + fi + if [[ ! -z "$(lsblk "${dev}" -o MOUNTPOINT --noheadings)" ]]; then + echo "${dev} is already mounted." + continue + fi + local mount_point="${MNT_DIR}/${idx}" + local mount_unit_name="$(systemd-escape --path --suffix=mount "${mount_point}")" + mkdir -p "${mount_point}" + cat > "/etc/systemd/system/${mount_unit_name}" << EOF + [Unit] + Description=Mount EC2 Instance Store NVMe disk ${idx} + [Mount] + What=${dev} + Where=${mount_point} + Type=xfs + Options=defaults,noatime + [Install] + WantedBy=multi-user.target +EOF + systemd-analyze verify "${mount_unit_name}" + systemctl enable "${mount_unit_name}" --now + idx=$((idx + 1)) + done +} + +## Main logic +MNT_DIR="/mnt/k8s-disks" + +while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -h | --help) + print_help + exit 0 + ;; + -d | --dir) + MNT_DIR="$2" + shift + shift + ;; + *) # unknown option + POSITIONAL+=("$1") # save it in an array for later + shift # past argument + ;; + esac +done + +set +u +set -- "${POSITIONAL[@]}" # restore positional parameters +DISK_SETUP="$1" +set -u + +if [[ "${DISK_SETUP}" != "raid0" && "${DISK_SETUP}" != "mount" ]]; then + echo "Valid disk setup options are: raid0 or mount" + exit 1 +fi + +disks=($(find -L /dev/disk/by-id/ -xtype l -name '*NVMe_Instance_Storage_*')) +## Bail early if there are no ephemeral disks to setup +if [[ "${#disks[@]}" -eq 0 ]]; then + echo "no ephemeral disks found, skipping disk setup" + exit 0 +fi + +if [ "$(id --user)" -ne 0 ]; then + echo "Must be run as root" + exit 1 +fi + +## Get devices of NVMe instance storage ephemeral disks +EPHEMERAL_DISKS=($(realpath "${disks[@]}" | sort -u)) + +case "${DISK_SETUP}" in + "raid0") + maybe_raid0 + echo "Successfully setup RAID-0 consisting of ${EPHEMERAL_DISKS[@]}" + ;; + "mount") + maybe_mount + echo "Successfully setup disk mounts consisting of ${EPHEMERAL_DISKS[@]}" + ;; +esac diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 205cb5458..38f1894ea 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -32,6 +32,7 @@ function print_help { echo "--enable-local-outpost Enable support for worker nodes to communicate with the local control plane when running on a disconnected Outpost. (true or false)" echo "--ip-family Specify ip family of the cluster" echo "--kubelet-extra-args Extra arguments to add to the kubelet. Useful for adding labels or taints." + echo "--local-disks Setup instance storage NVMe disks in raid0 or mount the individual disks for use by pods [mount | raid0]" echo "--mount-bfs-fs Mount a bpffs at /sys/fs/bpf (default: true, for Kubernetes 1.27+; false otherwise)" echo "--pause-container-account The AWS account (number) to pull the pause container from" echo "--pause-container-version The tag of the pause container" @@ -156,6 +157,11 @@ while [[ $# -gt 0 ]]; do shift shift ;; + --local-disks) + LOCAL_DISKS=$2 + shift + shift + ;; *) # unknown option POSITIONAL+=("$1") # save it in an array for later shift # past argument @@ -211,6 +217,11 @@ IP_FAMILY="${IP_FAMILY:-}" SERVICE_IPV6_CIDR="${SERVICE_IPV6_CIDR:-}" ENABLE_LOCAL_OUTPOST="${ENABLE_LOCAL_OUTPOST:-}" CLUSTER_ID="${CLUSTER_ID:-}" +LOCAL_DISKS="${LOCAL_DISKS:-}" + +if [[ ! -z ${LOCAL_DISKS} ]]; then + setup-local-disks "${LOCAL_DISKS}" +fi DEFAULT_MOUNT_BPF_FS="true" if vercmp "$KUBELET_VERSION" lt "1.27.0"; then diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index b40bf4d5a..11a631b21 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -70,7 +70,8 @@ sudo yum install -y \ unzip \ wget \ yum-utils \ - yum-plugin-versionlock + yum-plugin-versionlock \ + mdadm # Remove any old kernel versions. `--count=1` here means "only leave 1 kernel version installed" sudo package-cleanup --oldkernels --count=1 -y From a6f718674cdcfdcdbbca9dee6d7163729402a6d4 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 25 Apr 2023 17:32:01 -0700 Subject: [PATCH 456/621] Define region so CLI uses correct endpoint for localzones (#1284) --- files/bootstrap.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 38f1894ea..c6aedcdd5 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -528,7 +528,10 @@ else # then /etc/hostname is not the same as EC2's PrivateDnsName. # The name of the Node object must be equal to EC2's PrivateDnsName for the aws-iam-authenticator to allow this kubelet to manage it. INSTANCE_ID=$(imds /latest/meta-data/instance-id) - PRIVATE_DNS_NAME=$(AWS_RETRY_MODE=standard AWS_MAX_ATTEMPTS=10 aws ec2 describe-instances --instance-ids $INSTANCE_ID --query 'Reservations[].Instances[].PrivateDnsName' --output text) + # the AWS CLI currently constructs the wrong endpoint URL on localzones (the availability zone group will be used instead of the parent region) + # more info: https://github.com/aws/aws-cli/issues/7043 + REGION=$(imds /latest/meta-data/placement/region) + PRIVATE_DNS_NAME=$(AWS_RETRY_MODE=standard AWS_MAX_ATTEMPTS=10 aws ec2 describe-instances --region $REGION --instance-ids $INSTANCE_ID --query 'Reservations[].Instances[].PrivateDnsName' --output text) KUBELET_ARGS="$KUBELET_ARGS --hostname-override=$PRIVATE_DNS_NAME" fi From 3986eb0653f89428b87020661dd154f174df2a45 Mon Sep 17 00:00:00 2001 From: Steven Davidovitz Date: Wed, 26 Apr 2023 18:58:39 -0700 Subject: [PATCH 457/621] Add pigz (#1283) --- scripts/install-worker.sh | 3 ++- scripts/validate.sh | 11 +++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 11a631b21..0cdceff0c 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -71,7 +71,8 @@ sudo yum install -y \ wget \ yum-utils \ yum-plugin-versionlock \ - mdadm + mdadm \ + pigz # Remove any old kernel versions. `--count=1` here means "only leave 1 kernel version installed" sudo package-cleanup --oldkernels --count=1 -y diff --git a/scripts/validate.sh b/scripts/validate.sh index ae329005e..0b007e386 100644 --- a/scripts/validate.sh +++ b/scripts/validate.sh @@ -73,3 +73,14 @@ if [ $LOCKED_PACKAGES -ne $UNIQUE_LOCKED_PACKAGES ]; then fi echo "Package versionlocks are correct!" + +REQUIRED_COMMANDS=(unpigz) + +for ENTRY in "${REQUIRED_COMMANDS[@]}"; do + if ! command -v "$ENTRY" > /dev/null; then + echo "Required command does not exist: '$ENTRY'" + exit 1 + fi +done + +echo "Required commands were found: ${REQUIRED_COMMANDS[*]}" From f247c96cf1d9a659d6203db16cdd011862521349 Mon Sep 17 00:00:00 2001 From: Laxmi Soumya Josyula <42261978+ljosyula@users.noreply.github.com> Date: Tue, 2 May 2023 14:41:33 -0700 Subject: [PATCH 458/621] Adding inf2 and trn1n instances (#1290) Co-authored-by: ljosyula --- files/get-ecr-uri.sh | 3 +++ scripts/install-worker.sh | 9 ++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/files/get-ecr-uri.sh b/files/get-ecr-uri.sh index 134dc39cc..ba719ac06 100755 --- a/files/get-ecr-uri.sh +++ b/files/get-ecr-uri.sh @@ -27,6 +27,9 @@ else us-gov-east-1) acct="151742754352" ;; + us-iso-west-1) + acct="608367168043" + ;; us-iso-east-1) acct="725322719131" ;; diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 0cdceff0c..e58c3a733 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -122,7 +122,9 @@ sudo mv $TEMPLATE_DIR/iptables-restore.service /etc/eks/iptables-restore.service ### awscli ##################################################### ################################################################################ -if [[ "$BINARY_BUCKET_REGION" != "us-iso-east-1" && "$BINARY_BUCKET_REGION" != "us-isob-east-1" ]]; then +### isolated regions can't communicate to awscli.amazonaws.com so installing awscli through yum +ISOLATED_REGIONS=(us-iso-east-1 us-iso-west-1 us-isob-east-1) +if ! [[ " ${ISOLATED_REGIONS[*]} " =~ " ${BINARY_BUCKET_REGION} " ]]; then # https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html echo "Installing awscli v2 bundle" AWSCLI_DIR=$(mktemp -d) @@ -247,7 +249,7 @@ echo "Downloading binaries from: s3://$BINARY_BUCKET_NAME" S3_DOMAIN="amazonaws.com" if [ "$BINARY_BUCKET_REGION" = "cn-north-1" ] || [ "$BINARY_BUCKET_REGION" = "cn-northwest-1" ]; then S3_DOMAIN="amazonaws.com.cn" -elif [ "$BINARY_BUCKET_REGION" = "us-iso-east-1" ]; then +elif [ "$BINARY_BUCKET_REGION" = "us-iso-east-1" ] || [ "$BINARY_BUCKET_REGION" = "us-iso-west-1" ]; then S3_DOMAIN="c2s.ic.gov" elif [ "$BINARY_BUCKET_REGION" = "us-isob-east-1" ]; then S3_DOMAIN="sc2s.sgov.gov" @@ -369,7 +371,8 @@ sudo mv $TEMPLATE_DIR/ecr-credential-provider-config.json /etc/eks/image-credent ################################################################################ ### Cache Images ############################################################### ################################################################################ -if [[ "$CACHE_CONTAINER_IMAGES" == "true" && "$BINARY_BUCKET_REGION" != "us-iso-east-1" && "$BINARY_BUCKET_REGION" != "us-isob-east-1" ]]; then + +if [[ "$CACHE_CONTAINER_IMAGES" == "true" ]] && ! [[ " ${ISOLATED_REGIONS[*]} " =~ " ${BINARY_BUCKET_REGION} " ]]; then AWS_DOMAIN=$(imds 'latest/meta-data/services/domain') ECR_URI=$(/etc/eks/get-ecr-uri.sh "${BINARY_BUCKET_REGION}" "${AWS_DOMAIN}") From c70686c8e41b27a3e716cf4a4c2f99798b1729c8 Mon Sep 17 00:00:00 2001 From: Laxmi Soumya Josyula <42261978+ljosyula@users.noreply.github.com> Date: Wed, 3 May 2023 21:06:51 -0700 Subject: [PATCH 459/621] Update CHANGELOG.md (#1292) Co-authored-by: ljosyula --- CHANGELOG.md | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index acafb611e..6b776796c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,58 @@ # Changelog +### AMI Release v20230501 +* amazon-eks-gpu-node-1.26-v20230501 +* amazon-eks-gpu-node-1.25-v20230501 +* amazon-eks-gpu-node-1.24-v20230501 +* amazon-eks-gpu-node-1.23-v20230501 +* amazon-eks-gpu-node-1.22-v20230501 +* amazon-eks-arm64-node-1.26-v20230501 +* amazon-eks-arm64-node-1.25-v20230501 +* amazon-eks-arm64-node-1.24-v20230501 +* amazon-eks-arm64-node-1.23-v20230501 +* amazon-eks-arm64-node-1.22-v20230501 +* amazon-eks-node-1.26-v20230501 +* amazon-eks-node-1.25-v20230501 +* amazon-eks-node-1.24-v20230501 +* amazon-eks-node-1.23-v20230501 +* amazon-eks-node-1.22-v20230501 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.26.2-20230501` +* `1.25.7-20230501` +* `1.24.11-20230501` +* `1.23.17-20230501` +* `1.22.17-20230501` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.26.2/2023-03-17/ +* s3://amazon-eks/1.25.7/2023-03-17/ +* s3://amazon-eks/1.24.11/2023-03-17/ +* s3://amazon-eks/1.23.17/2023-03-17/ +* s3://amazon-eks/1.22.17/2023-03-17/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.241-150.347.amzn2 + * Kubernetes 1.24 and above: 5.10.178-162.673.amzn2 +* `dockerd`: 20.10.17-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +- Add bootstrap option to create a local NVMe raid0 or individual volume mounts ([#1171](https://github.com/awslabs/amazon-eks-ami/pull/1171)) +- Improve bootstrap logging ([#1276](https://github.com/awslabs/amazon-eks-ami/pull/1276)) +- Use credential provider API v1 in 1.27+, v1alpha1 in 1.26- ([#1269](https://github.com/awslabs/amazon-eks-ami/pull/1269)) +- Override hostname to match EC2's PrivateDnsName ([#1264](https://github.com/awslabs/amazon-eks-ami/pull/1264)) +- Add ethtool ([#1261](https://github.com/awslabs/amazon-eks-ami/pull/1261)) +- Update `kernel-5.10` for [ALASKERNEL-5.10-2023-031](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-031.html) +- Kernel version upgrade to `5.10.178-162.673.amzn2` fixes the [Containers failing to create and probe exec errors related to seccomp on recent kernel-5.10 versions](https://github.com/awslabs/amazon-eks-ami/issues/1219) issue + + ### AMI Release v20230411 * amazon-eks-gpu-node-1.26-v20230411 * amazon-eks-gpu-node-1.25-v20230411 From 3e40a8bb13173b781fde4171e2de7e2e5031b411 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 5 May 2023 12:31:07 -0700 Subject: [PATCH 460/621] Upgrades docker to 20.10.23-1.amzn2.0.1 (#1293) --- eks-worker-al2-variables.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index 30d769e03..68cbad7eb 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -15,7 +15,7 @@ "cni_plugin_version": "v0.8.6", "containerd_version": "1.6.*", "creator": "{{env `USER`}}", - "docker_version": "20.10.17-1.amzn2.0.1", + "docker_version": "20.10.23-1.amzn2.0.1", "encrypted": "false", "kernel_version": "", "kms_key_id": "", From e4dd2abb52a43477f9aa32134fa6683dcc8de048 Mon Sep 17 00:00:00 2001 From: Laxmi Soumya Josyula <42261978+ljosyula@users.noreply.github.com> Date: Mon, 8 May 2023 16:43:49 -0700 Subject: [PATCH 461/621] Updating instance type for AMI build and making changes for us-iso-west-1 support (#1294) Co-authored-by: ljosyula --- Makefile | 2 +- scripts/install-worker.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 271faa8fb..6ff6ba3ec 100644 --- a/Makefile +++ b/Makefile @@ -46,7 +46,7 @@ ifeq ($(arch), arm64) instance_type ?= m6g.large ami_name ?= amazon-eks-arm64-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') else - instance_type ?= m4.large + instance_type ?= m5.large ami_name ?= amazon-eks-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') endif diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index e58c3a733..d8bdafdcd 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -123,8 +123,8 @@ sudo mv $TEMPLATE_DIR/iptables-restore.service /etc/eks/iptables-restore.service ################################################################################ ### isolated regions can't communicate to awscli.amazonaws.com so installing awscli through yum -ISOLATED_REGIONS=(us-iso-east-1 us-iso-west-1 us-isob-east-1) -if ! [[ " ${ISOLATED_REGIONS[*]} " =~ " ${BINARY_BUCKET_REGION} " ]]; then +ISOLATED_REGIONS="${ISOLATED_REGIONS:-us-iso-east-1 us-iso-west-1 us-isob-east-1}" +if ! [[ ${ISOLATED_REGIONS} =~ $BINARY_BUCKET_REGION ]]; then # https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html echo "Installing awscli v2 bundle" AWSCLI_DIR=$(mktemp -d) @@ -372,7 +372,7 @@ sudo mv $TEMPLATE_DIR/ecr-credential-provider-config.json /etc/eks/image-credent ### Cache Images ############################################################### ################################################################################ -if [[ "$CACHE_CONTAINER_IMAGES" == "true" ]] && ! [[ " ${ISOLATED_REGIONS[*]} " =~ " ${BINARY_BUCKET_REGION} " ]]; then +if [[ "$CACHE_CONTAINER_IMAGES" == "true" ]] && ! [[ ${ISOLATED_REGIONS} =~ $BINARY_BUCKET_REGION ]]; then AWS_DOMAIN=$(imds 'latest/meta-data/services/domain') ECR_URI=$(/etc/eks/get-ecr-uri.sh "${BINARY_BUCKET_REGION}" "${AWS_DOMAIN}") From 59212d4a8ec42929f19b1c47e8f9a61168e48faf Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 9 May 2023 18:15:17 +0300 Subject: [PATCH 462/621] Fix ECR pattern for aws-cn (#1280) --- files/ecr-credential-provider-config.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/ecr-credential-provider-config.json b/files/ecr-credential-provider-config.json index 7fe687156..21581c4e9 100644 --- a/files/ecr-credential-provider-config.json +++ b/files/ecr-credential-provider-config.json @@ -6,7 +6,7 @@ "name": "ecr-credential-provider", "matchImages": [ "*.dkr.ecr.*.amazonaws.com", - "*.dkr.ecr.*.amazonaws.cn", + "*.dkr.ecr.*.amazonaws.com.cn", "*.dkr.ecr-fips.*.amazonaws.com", "*.dkr.ecr.us-iso-east-1.c2s.ic.gov", "*.dkr.ecr.us-isob-east-1.sc2s.sgov.gov" From f68e0e6afa2cc61c481aa6a8ce6c9e4ab692491d Mon Sep 17 00:00:00 2001 From: willgleich <22464726+willgleich@users.noreply.github.com> Date: Tue, 9 May 2023 11:51:44 -0600 Subject: [PATCH 463/621] Fix imds setting for multiple enis on ipv6 (#1275) --- files/bootstrap.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index c6aedcdd5..d200eb431 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -441,7 +441,7 @@ fi ### kubelet.service configuration -MAC=$(imds 'latest/meta-data/network/interfaces/macs/' | head -n 1 | sed 's/\/$//') +MAC=$(imds 'latest/meta-data/mac') if [[ -z "${DNS_CLUSTER_IP}" ]]; then if [[ "${IP_FAMILY}" == "ipv6" ]]; then From 2bf329314672f7b82e3efc9de8a02d45d3d01bf2 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 11 May 2023 19:31:43 +0300 Subject: [PATCH 464/621] Revert gp3 volume_type due to lack of Local Zone support (#1295) --- Makefile | 15 --------------- eks-worker-al2-variables.json | 6 +++--- 2 files changed, 3 insertions(+), 18 deletions(-) diff --git a/Makefile b/Makefile index 6ff6ba3ec..c6b13f43e 100644 --- a/Makefile +++ b/Makefile @@ -17,21 +17,6 @@ packer_variable_file_contains = $(if $(PACKER_VARIABLE_FILE),$(shell grep -Fq $1 # otherwise expands to 'false' vercmp = $(shell $(MAKEFILE_DIR)/files/bin/vercmp "$1" "$2" "$3") -# expands to 'true' if the 'aws_region' contains 'us-iso' (an isolated region) -# otherwise, expands to 'false' -in_iso_region = $(if $(findstring us-iso,$(aws_region)),true,false) - -# gp3 volumes are used by default for 1.27+ -# TODO: remove when 1.26 reaches EOL -# TODO: remove when gp3 is supported in isolated regions -ifneq ($(call packer_variable_file_contains,volume_type), true) - ifeq ($(call in_iso_region), true) - volume_type ?= gp2 - else ifeq ($(call vercmp,$(kubernetes_version),lt,1.27.0), true) - volume_type ?= gp2 - endif -endif - # Docker is not present on 1.25+ AMI's # TODO: remove this when 1.24 reaches EOL ifeq ($(call vercmp,$(kubernetes_version),gteq,1.25.0), true) diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index 68cbad7eb..075702c65 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -11,7 +11,7 @@ "aws_session_token": "{{env `AWS_SESSION_TOKEN`}}", "binary_bucket_name": "amazon-eks", "binary_bucket_region": "us-west-2", - "cache_container_images": "false", + "cache_container_images": "false", "cni_plugin_version": "v0.8.6", "containerd_version": "1.6.*", "creator": "{{env `USER`}}", @@ -20,7 +20,7 @@ "kernel_version": "", "kms_key_id": "", "launch_block_device_mappings_volume_size": "4", - "pause_container_version": "3.5", + "pause_container_version": "3.5", "pull_cni_from_github": "true", "remote_folder": "", "runc_version": "1.1.4-1.amzn2", @@ -33,5 +33,5 @@ "ssh_username": "ec2-user", "subnet_id": "", "temporary_security_group_source_cidrs": "", - "volume_type": "gp3" + "volume_type": "gp2" } From 07194608a40a341bb37271804d1b30bb66aebd6d Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Thu, 11 May 2023 19:17:42 -0500 Subject: [PATCH 465/621] fix: add local-disk info log and fix typo (#1298) --- files/bootstrap.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index d200eb431..8178fcb06 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -105,7 +105,7 @@ while [[ $# -gt 0 ]]; do ;; --pause-container-account) PAUSE_CONTAINER_ACCOUNT=$2 - log "INFO: --pause-container-accounte='${PAUSE_CONTAINER_ACCOUNT}'" + log "INFO: --pause-container-account='${PAUSE_CONTAINER_ACCOUNT}'" shift shift ;; @@ -159,6 +159,7 @@ while [[ $# -gt 0 ]]; do ;; --local-disks) LOCAL_DISKS=$2 + log "INFO: --local-disks='${LOCAL_DISKS}'" shift shift ;; From 0de475c5f802acd470d9a2f1fdd521b7949a25ec Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 12 May 2023 01:36:28 -0700 Subject: [PATCH 466/621] Update eni-max-pods.txt (#1296) Co-authored-by: cartermckinnon --- files/eni-max-pods.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 2c0253e90..d24d8d28f 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -253,6 +253,12 @@ i3en.6xlarge 234 i3en.large 29 i3en.metal 737 i3en.xlarge 58 +i4g.16xlarge 737 +i4g.2xlarge 58 +i4g.4xlarge 234 +i4g.8xlarge 234 +i4g.large 29 +i4g.xlarge 58 i4i.16xlarge 737 i4i.2xlarge 58 i4i.32xlarge 737 From e2618f9504e85d84153e7ee9a147415a032584ac Mon Sep 17 00:00:00 2001 From: camrakin <113552683+camrakin@users.noreply.github.com> Date: Fri, 12 May 2023 14:12:02 -0700 Subject: [PATCH 467/621] Update Makefile for 2023-05-11 Binaries (#1300) --- Makefile | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index c6b13f43e..d5e58398f 100644 --- a/Makefile +++ b/Makefile @@ -49,7 +49,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: latest -latest: 1.26 ## Build EKS Optimized AL2 AMI with the latest supported version of Kubernetes +latest: 1.27 ## Build EKS Optimized AL2 AMI with the latest supported version of Kubernetes # ensure that these flags are equivalent to the rules in the .editorconfig SHFMT_FLAGS := --list \ @@ -102,24 +102,28 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI .PHONY: 1.22 1.22: ## Build EKS Optimized AL2 AMI - K8s 1.22 - $(MAKE) k8s kubernetes_version=1.22.17 kubernetes_build_date=2023-03-17 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.22.17 kubernetes_build_date=2023-05-11 pull_cni_from_github=true .PHONY: 1.23 1.23: ## Build EKS Optimized AL2 AMI - K8s 1.23 - $(MAKE) k8s kubernetes_version=1.23.17 kubernetes_build_date=2023-03-17 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.23.17 kubernetes_build_date=2023-05-11 pull_cni_from_github=true .PHONY: 1.24 1.24: ## Build EKS Optimized AL2 AMI - K8s 1.24 - $(MAKE) k8s kubernetes_version=1.24.11 kubernetes_build_date=2023-03-17 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.24.13 kubernetes_build_date=2023-05-11 pull_cni_from_github=true .PHONY: 1.25 1.25: ## Build EKS Optimized AL2 AMI - K8s 1.25 - $(MAKE) k8s kubernetes_version=1.25.7 kubernetes_build_date=2023-03-17 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.25.9 kubernetes_build_date=2023-05-11 pull_cni_from_github=true .PHONY: 1.26 1.26: ## Build EKS Optimized AL2 AMI - K8s 1.26 - $(MAKE) k8s kubernetes_version=1.26.2 kubernetes_build_date=2023-03-17 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.26.4 kubernetes_build_date=2023-05-11 pull_cni_from_github=true +.PHONY: 1.27 +1.27: ## Build EKS Optimized AL2 AMI - K8s 1.27 + $(MAKE) k8s kubernetes_version=1.27.1 kubernetes_build_date=2023-04-19 pull_cni_from_github=true + .PHONY: clean clean: rm *-manifest.json From 7465fc8287626e4d261a28d20d7a8ead7afb75d1 Mon Sep 17 00:00:00 2001 From: Zaid Ahmed Farooq <38226823+zaf6862@users.noreply.github.com> Date: Fri, 12 May 2023 18:18:03 -0700 Subject: [PATCH 468/621] AMI Release v20230509 (#1299) * AMI Release v20230509 --------- Co-authored-by: Zaid Farooq --- CHANGELOG.md | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b776796c..f5b7a5459 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,54 @@ # Changelog +### AMI Release v20230509 +* amazon-eks-gpu-node-1.26-v20230509 +* amazon-eks-gpu-node-1.25-v20230509 +* amazon-eks-gpu-node-1.24-v20230509 +* amazon-eks-gpu-node-1.23-v20230509 +* amazon-eks-gpu-node-1.22-v20230509 +* amazon-eks-arm64-node-1.26-v20230509 +* amazon-eks-arm64-node-1.25-v20230509 +* amazon-eks-arm64-node-1.24-v20230509 +* amazon-eks-arm64-node-1.23-v20230509 +* amazon-eks-arm64-node-1.22-v20230509 +* amazon-eks-node-1.26-v20230509 +* amazon-eks-node-1.25-v20230509 +* amazon-eks-node-1.24-v20230509 +* amazon-eks-node-1.23-v20230509 +* amazon-eks-node-1.22-v20230509 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.26.2-20230509` +* `1.25.7-20230509` +* `1.24.11-20230509` +* `1.23.17-20230509` +* `1.22.17-20230509` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.26.2/2023-03-17/ +* s3://amazon-eks/1.25.7/2023-03-17/ +* s3://amazon-eks/1.24.11/2023-03-17/ +* s3://amazon-eks/1.23.17/2023-03-17/ +* s3://amazon-eks/1.22.17/2023-03-17/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.241-150.347.amzn2 + * Kubernetes 1.24 and above: 5.10.178-162.673.amzn2 +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +- The new AMIs have updated docker version 20.10.23-1.amzn2.0.1 that addresses two docker CVEs; [CVE-2022-36109 - docker](https://alas.aws.amazon.com/cve/html/CVE-2022-36109.html) and [CVE-2022-37708 - docker](https://alas.aws.amazon.com/cve/html/CVE-2022-37708.html). +- For the GPU Variants of these AMIs, the Nvidia Fabric Manager version is upgraded from 470.161.03-1 to 470.182.03-1. +- Fix ECR pattern for aws-cn ([#1280](https://github.com/awslabs/amazon-eks-ami/pull/1280)) +- Fix imds setting for multiple enis on ipv6 ([1275](https://github.com/awslabs/amazon-eks-ami/pull/1275)) + ### AMI Release v20230501 * amazon-eks-gpu-node-1.26-v20230501 * amazon-eks-gpu-node-1.25-v20230501 From 3e27dbc3b24c87fdf53fbe6bef610d23732f03a6 Mon Sep 17 00:00:00 2001 From: camrakin <113552683+camrakin@users.noreply.github.com> Date: Thu, 18 May 2023 16:30:34 -0700 Subject: [PATCH 469/621] Update CHANGELOG for AMI Release v20230513 (#1305) Update the CHANGELOG.md for the v20230513 AMI Release --- CHANGELOG.md | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f5b7a5459..3200e89eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,59 @@ # Changelog +### AMI Release v20230513 +* amazon-eks-gpu-node-1.27-v20230513 +* amazon-eks-gpu-node-1.26-v20230513 +* amazon-eks-gpu-node-1.25-v20230513 +* amazon-eks-gpu-node-1.24-v20230513 +* amazon-eks-gpu-node-1.23-v20230513 +* amazon-eks-gpu-node-1.22-v20230513 +* amazon-eks-arm64-node-1.27-v20230513 +* amazon-eks-arm64-node-1.26-v20230513 +* amazon-eks-arm64-node-1.25-v20230513 +* amazon-eks-arm64-node-1.24-v20230513 +* amazon-eks-arm64-node-1.23-v20230513 +* amazon-eks-arm64-node-1.22-v20230513 +* amazon-eks-node-1.27-v20230513 +* amazon-eks-node-1.26-v20230513 +* amazon-eks-node-1.25-v20230513 +* amazon-eks-node-1.24-v20230513 +* amazon-eks-node-1.23-v20230513 +* amazon-eks-node-1.22-v20230513 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.27.1-20230513` +* `1.26.4-20230513` +* `1.25.9-20230513` +* `1.24.13-20230513` +* `1.23.17-20230513` +* `1.22.17-20230513` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.27.1/2023-04-19/ +* s3://amazon-eks/1.26.4/2023-05-11/ +* s3://amazon-eks/1.25.9/2023-05-11/ +* s3://amazon-eks/1.24.13/2023-05-11/ +* s3://amazon-eks/1.23.17/2023-05-11/ +* s3://amazon-eks/1.22.17/2023-05-11/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.241-150.347.amzn2 + * Kubernetes 1.24 and above: 5.10.178-162.673.amzn2 +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: + - Add support for Kubernetes 1.27 ([#1300](https://github.com/awslabs/amazon-eks-ami/pull/1300)) + +Other changes: + - Updated max pods for i4g instance types ([#1296](https://github.com/awslabs/amazon-eks-ami/commit/0de475c5f802acd470d9a2f1fdd521b7949a25ec)) + ### AMI Release v20230509 * amazon-eks-gpu-node-1.26-v20230509 * amazon-eks-gpu-node-1.25-v20230509 From 63334e86e02ade2140c740689ead89dacf055d89 Mon Sep 17 00:00:00 2001 From: Kulwant Singh Date: Fri, 19 May 2023 19:18:31 +0000 Subject: [PATCH 470/621] Collect vpc-bridge CNI conf on Windows (#1306) --- log-collector-script/windows/eks-log-collector.ps1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/log-collector-script/windows/eks-log-collector.ps1 b/log-collector-script/windows/eks-log-collector.ps1 index f96916e7e..31fa84ba2 100644 --- a/log-collector-script/windows/eks-log-collector.ps1 +++ b/log-collector-script/windows/eks-log-collector.ps1 @@ -289,7 +289,7 @@ Function get_k8s_info{ Write-Host "Collecting kubelet information" copy C:\ProgramData\kubernetes\kubeconfig $info_system\kubelet\ copy C:\ProgramData\kubernetes\kubelet-config.json $info_system\kubelet\ - copy C:\ProgramData\Amazon\EKS\cni\config\vpc-shared-eni.conf $info_system\cni\ + copy C:\ProgramData\Amazon\EKS\cni\config\* $info_system\cni\ Write-Host "OK" -foregroundcolor "green" } catch { From 43cc4599720511700fe732b67938a0146ce7119c Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 24 May 2023 09:18:53 -0700 Subject: [PATCH 471/621] Collect Karpenter user-data.log (#1310) --- log-collector-script/linux/eks-log-collector.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index dee5654a6..75eada625 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -71,6 +71,7 @@ COMMON_LOGS=( pods # eks cloud-init.log cloud-init-output.log + user-data.log kube-proxy.log ) From 8369d06e5ae2a9c4c8cb99531ce525a767080019 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 26 May 2023 15:43:07 -0700 Subject: [PATCH 472/621] Update managed policy name for SSM (#1311) --- log-collector-script/linux/README.md | 2 +- log-collector-script/windows/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/log-collector-script/linux/README.md b/log-collector-script/linux/README.md index 69bc088b3..4119e4410 100644 --- a/log-collector-script/linux/README.md +++ b/log-collector-script/linux/README.md @@ -91,7 +91,7 @@ Trying to archive gathered information... * SSM agent should be installed and running on Worker Node(s). [How to Install SSM Agent link](https://docs.aws.amazon.com/systems-manager/latest/userguide/sysman-manual-agent-install.html) -* Worker Node(s) should have required permissions to communicate with SSM service. IAM managed role `AmazonEC2RoleforSSM` will have all the required permission for SSM agent to run on EC2 instances. The IAM managed role `AmazonEC2RoleforSSM` has `S3:PutObject` permission to all S3 resources. +* Worker Node(s) should have required permissions to communicate with SSM service. IAM managed role `AmazonSSMManagedInstanceCore` will have all the required permission for SSM agent to run on EC2 instances. The IAM managed role `AmazonSSMManagedInstanceCore` has `S3:PutObject` permission to all S3 resources. *Note:* For more granular control of the IAM permission check [Actions defined by AWS Systems Manager](https://docs.aws.amazon.com/IAM/latest/UserGuide/list_awssystemsmanager.html%23awssystemsmanager-actions-as-permissions) diff --git a/log-collector-script/windows/README.md b/log-collector-script/windows/README.md index 374a4053b..945211c14 100644 --- a/log-collector-script/windows/README.md +++ b/log-collector-script/windows/README.md @@ -84,7 +84,7 @@ Done... your bundled logs are located in C:\log-collector\eks_i-0b318f704c74b6a * SSM agent should be installed and running on Worker Node(s). [How to Install SSM Agent link](https://docs.aws.amazon.com/systems-manager/latest/userguide/sysman-manual-agent-install.html) -* Worker Node(s) should have required permissions to communicate with SSM service. IAM managed role `AmazonEC2RoleforSSM` will have all the required permission for SSM agent to run on EC2 instances. The IAM managed role `AmazonEC2RoleforSSM` has `S3:PutObject` permission to all S3 resources. +* Worker Node(s) should have required permissions to communicate with SSM service. IAM managed role `AmazonSSMManagedInstanceCore` will have all the required permission for SSM agent to run on EC2 instances. The IAM managed role `AmazonSSMManagedInstanceCore` has `S3:PutObject` permission to all S3 resources. *Note:* For more granular control of the IAM permission check [Actions defined by AWS Systems Manager](https://docs.aws.amazon.com/IAM/latest/UserGuide/list_awssystemsmanager.html%23awssystemsmanager-actions-as-permissions) From cee3aadb6dad8e5c4a89b690600ac846a77c136c Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 1 Jun 2023 08:38:30 -0700 Subject: [PATCH 473/621] Update CHANGELOG.md `v20230526` release (#1314) Update changelog for `v20230526` release --- CHANGELOG.md | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3200e89eb..419f1240f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,58 @@ # Changelog +### AMI Release v20230526 +* amazon-eks-gpu-node-1.27-v20230526 +* amazon-eks-gpu-node-1.26-v20230526 +* amazon-eks-gpu-node-1.25-v20230526 +* amazon-eks-gpu-node-1.24-v20230526 +* amazon-eks-gpu-node-1.23-v20230526 +* amazon-eks-gpu-node-1.22-v20230526 +* amazon-eks-arm64-node-1.27-v20230526 +* amazon-eks-arm64-node-1.26-v20230526 +* amazon-eks-arm64-node-1.25-v20230526 +* amazon-eks-arm64-node-1.24-v20230526 +* amazon-eks-arm64-node-1.23-v20230526 +* amazon-eks-arm64-node-1.22-v20230526 +* amazon-eks-node-1.27-v20230526 +* amazon-eks-node-1.26-v20230526 +* amazon-eks-node-1.25-v20230526 +* amazon-eks-node-1.24-v20230526 +* amazon-eks-node-1.23-v20230526 +* amazon-eks-node-1.22-v20230526 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.27.1-20230526` +* `1.26.4-20230526` +* `1.25.9-20230526` +* `1.24.13-20230526` +* `1.23.17-20230526` +* `1.22.17-20230526` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.27.1/2023-04-19/ +* s3://amazon-eks/1.26.4/2023-05-11/ +* s3://amazon-eks/1.25.9/2023-05-11/ +* s3://amazon-eks/1.24.13/2023-05-11/ +* s3://amazon-eks/1.23.17/2023-05-11/ +* s3://amazon-eks/1.22.17/2023-05-11/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.242-155.348.amzn2 + * Kubernetes 1.24 and above: 5.10.179-166.674.amzn2 +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +* `5.4` kernel update to `5.4.242-155.348.amzn2` addresses CVE [ALAS2KERNEL-5.4-2023-045](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2023-045.html) +* `5.10` kernel update to `5.10.179-166.674.amzn2` addresses [ALAS2KERNEL-5.10-2023-032](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-032.html) +* `Glib` update to `glib2-2.56.1-9.amzn2` addresses [ALAS-2023-2049](https://alas.aws.amazon.com/AL2/ALAS-2023-2049.html) + ### AMI Release v20230513 * amazon-eks-gpu-node-1.27-v20230513 * amazon-eks-gpu-node-1.26-v20230513 From 487830668b1f556e530fd1d6310763b337cfb74e Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 6 Jun 2023 18:42:07 -0700 Subject: [PATCH 474/621] Updates runc to 1.1.5 (#1319) --- eks-worker-al2-variables.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index 075702c65..f5b3e616b 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -23,7 +23,7 @@ "pause_container_version": "3.5", "pull_cni_from_github": "true", "remote_folder": "", - "runc_version": "1.1.4-1.amzn2", + "runc_version": "1.1.5-1.amzn2", "security_group_id": "", "sonobuoy_e2e_registry": "", "source_ami_filter_name": "amzn2-ami-minimal-hvm-*", From 3cd35c7833c2e79a310470fa18b7ae3bcffc28aa Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Sat, 10 Jun 2023 00:30:23 -0700 Subject: [PATCH 475/621] Updating changelog for release (#1323) Co-authored-by: Ravi Sinha --- CHANGELOG.md | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 419f1240f..9532c7419 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,57 @@ # Changelog +### AMI Release v20230607 +* amazon-eks-gpu-node-1.27-v20230607 +* amazon-eks-gpu-node-1.26-v20230607 +* amazon-eks-gpu-node-1.25-v20230607 +* amazon-eks-gpu-node-1.24-v20230607 +* amazon-eks-gpu-node-1.23-v20230607 +* amazon-eks-gpu-node-1.22-v20230607 +* amazon-eks-arm64-node-1.27-v20230607 +* amazon-eks-arm64-node-1.26-v20230607 +* amazon-eks-arm64-node-1.25-v20230607 +* amazon-eks-arm64-node-1.24-v20230607 +* amazon-eks-arm64-node-1.23-v20230607 +* amazon-eks-arm64-node-1.22-v20230607 +* amazon-eks-node-1.27-v20230607 +* amazon-eks-node-1.26-v20230607 +* amazon-eks-node-1.25-v20230607 +* amazon-eks-node-1.24-v20230607 +* amazon-eks-node-1.23-v20230607 +* amazon-eks-node-1.22-v20230607 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.27.1-20230607` +* `1.26.4-20230607` +* `1.25.9-20230607` +* `1.24.13-20230607` +* `1.23.17-20230607` +* `1.22.17-20230607` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.27.1/2023-04-19/ +* s3://amazon-eks/1.26.4/2023-05-11/ +* s3://amazon-eks/1.25.9/2023-05-11/ +* s3://amazon-eks/1.24.13/2023-05-11/ +* s3://amazon-eks/1.23.17/2023-05-11/ +* s3://amazon-eks/1.22.17/2023-05-11/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.242-156.349.amzn2 + * Kubernetes 1.24 and above: 5.10.179-168.710.amzn2 +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.5-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +* `5.4` kernel update to `5.4.242-156.349.amzn2` and `5.10` kernel update to `5.10.179-168.710.amzn2` address [CVE-2023-32233](https://alas.aws.amazon.com/cve/html/CVE-2023-32233.html) +* Updating `runc` version to `1.1.5-1.amzn2` which contains fixes for [CVE-2023-28642](https://explore.alas.aws.amazon.com/CVE-2023-27561.html) and [CVE-2023-27561](https://explore.alas.aws.amazon.com/CVE-2023-28642.html). + ### AMI Release v20230526 * amazon-eks-gpu-node-1.27-v20230526 * amazon-eks-gpu-node-1.26-v20230526 From 05f1146017927a043e12bbc369eeab28f96bcdde Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 14 Jun 2023 10:26:07 -0700 Subject: [PATCH 476/621] Mount bpffs by default on 1.25+ (#1320) --- files/bootstrap.sh | 6 +++--- test/cases/mount-bpf-fs.sh | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 8178fcb06..3729dad2c 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -33,7 +33,7 @@ function print_help { echo "--ip-family Specify ip family of the cluster" echo "--kubelet-extra-args Extra arguments to add to the kubelet. Useful for adding labels or taints." echo "--local-disks Setup instance storage NVMe disks in raid0 or mount the individual disks for use by pods [mount | raid0]" - echo "--mount-bfs-fs Mount a bpffs at /sys/fs/bpf (default: true, for Kubernetes 1.27+; false otherwise)" + echo "--mount-bpf-fs Mount a bpffs at /sys/fs/bpf (default: true, for Kubernetes 1.25+; false otherwise)" echo "--pause-container-account The AWS account (number) to pull the pause container from" echo "--pause-container-version The tag of the pause container" echo "--service-ipv6-cidr ipv6 cidr range of the cluster" @@ -225,7 +225,7 @@ if [[ ! -z ${LOCAL_DISKS} ]]; then fi DEFAULT_MOUNT_BPF_FS="true" -if vercmp "$KUBELET_VERSION" lt "1.27.0"; then +if vercmp "$KUBELET_VERSION" lt "1.25.0"; then DEFAULT_MOUNT_BPF_FS="false" fi MOUNT_BPF_FS="${MOUNT_BPF_FS:-$DEFAULT_MOUNT_BPF_FS}" @@ -322,7 +322,7 @@ if [[ "$MACHINE" != "x86_64" && "$MACHINE" != "aarch64" ]]; then fi if [ "$MOUNT_BPF_FS" = "true" ]; then - sudo mount-bpf-fs + mount-bpf-fs fi ECR_URI=$(/etc/eks/get-ecr-uri.sh "${AWS_DEFAULT_REGION}" "${AWS_SERVICES_DOMAIN}" "${PAUSE_CONTAINER_ACCOUNT:-}") diff --git a/test/cases/mount-bpf-fs.sh b/test/cases/mount-bpf-fs.sh index 61b2f3844..c5281d4e2 100755 --- a/test/cases/mount-bpf-fs.sh +++ b/test/cases/mount-bpf-fs.sh @@ -66,14 +66,14 @@ if [[ ${EXIT_CODE} -ne 0 ]]; then echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" exit 1 fi -if [ "$(cat $MOUNT_BPF_FS_MOCK)" = "called" ]; then +if [ ! "$(cat $MOUNT_BPF_FS_MOCK)" = "called" ]; then echo "❌ Test Failed: expected mount-bpf-fs to be called once but it was not!" exit 1 fi export -nf mount-bpf-fs -echo "--> Should default to false on 1.26-" -export KUBELET_VERSION=v1.26.0-eks-ba74326 +echo "--> Should default to false on 1.24-" +export KUBELET_VERSION=v1.24.0-eks-ba74326 MOUNT_BPF_FS_MOCK=$(mktemp) function mount-bpf-fs() { echo "called" >> $MOUNT_BPF_FS_MOCK From 6412beeede80cdd09c9a0437a7a8856e062777f7 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 16 Jun 2023 10:26:05 -0700 Subject: [PATCH 477/621] Add configurable working directory (#1231) --- doc/USER_GUIDE.md | 53 +++++++++++++++++++++- eks-worker-al2-variables.json | 5 ++- eks-worker-al2.json | 36 +++++++++------ hack/generate-template-variable-doc.py | 62 ++++++++++++++++++++++++++ scripts/cleanup.sh | 3 -- scripts/install-worker.sh | 54 +++++++++++----------- 6 files changed, 165 insertions(+), 48 deletions(-) create mode 100755 hack/generate-template-variable-doc.py diff --git a/doc/USER_GUIDE.md b/doc/USER_GUIDE.md index b2271c9a9..c8f79a5bf 100644 --- a/doc/USER_GUIDE.md +++ b/doc/USER_GUIDE.md @@ -23,9 +23,58 @@ Default values for most variables are defined in [a default variable file](eks-w Users have the following options for specifying their own values: 1. Provide a variable file with the `PACKER_VARIABLE_FILE` argument to `make`. Values in this file will override values in the default variable file. Your variable file does not need to include all possible variables, as it will be merged with the default variable file. -2. Pass a key-value pair for any template variable to `make`. These values will override any values that were specified with the first method. +2. Pass a key-value pair for any template variable to `make`. These values will override any values that were specified with the first method. In the table below, these variables have a default value of "None". -**Note** that some variables (such as `arch` and `kubernetes_version`) do not have a sensible, static default, and are satisfied by the Makefile. Such variables do not appear in the default variable file, and must be overridden (if necessary) by the second method described above. +> **Note** +> Some variables (such as `arch` and `kubernetes_version`) do not have a sensible, static default, and are satisfied by the Makefile. +> Such variables do not appear in the default variable file, and must be overridden (if necessary) by the second method described above. + + + +| Variable | Default value | Description | +| - | - | - | +| `additional_yum_repos` | `""` | | +| `ami_component_description` | ```{{user `remote_folder`}}/worker``` | | +| `ami_description` | ```{{user `remote_folder`}}/worker``` | | +| `ami_name` | None | | +| `ami_regions` | `""` | | +| `ami_users` | `""` | | +| `arch` | None | | +| `associate_public_ip_address` | `""` | | +| `aws_access_key_id` | ```{{user `remote_folder`}}/worker``` | | +| `aws_region` | ```{{user `remote_folder`}}/worker``` | | +| `aws_secret_access_key` | ```{{user `remote_folder`}}/worker``` | | +| `aws_session_token` | ```{{user `remote_folder`}}/worker``` | | +| `binary_bucket_name` | ```{{user `remote_folder`}}/worker``` | | +| `binary_bucket_region` | ```{{user `remote_folder`}}/worker``` | | +| `cache_container_images` | ```{{user `remote_folder`}}/worker``` | | +| `cni_plugin_version` | ```{{user `remote_folder`}}/worker``` | | +| `containerd_version` | ```{{user `remote_folder`}}/worker``` | | +| `creator` | ```{{user `remote_folder`}}/worker``` | | +| `docker_version` | ```{{user `remote_folder`}}/worker``` | | +| `encrypted` | ```{{user `remote_folder`}}/worker``` | | +| `instance_type` | None | | +| `kernel_version` | `""` | | +| `kms_key_id` | `""` | | +| `kubernetes_build_date` | None | | +| `kubernetes_version` | None | | +| `launch_block_device_mappings_volume_size` | ```{{user `remote_folder`}}/worker``` | | +| `pause_container_version` | ```{{user `remote_folder`}}/worker``` | | +| `pull_cni_from_github` | ```{{user `remote_folder`}}/worker``` | | +| `remote_folder` | ```{{user `remote_folder`}}/worker``` | Directory path for shell provisioner scripts on the builder instance | +| `runc_version` | ```{{user `remote_folder`}}/worker``` | | +| `security_group_id` | `""` | | +| `sonobuoy_e2e_registry` | `""` | | +| `source_ami_filter_name` | ```{{user `remote_folder`}}/worker``` | | +| `source_ami_id` | `""` | | +| `source_ami_owners` | ```{{user `remote_folder`}}/worker``` | | +| `ssh_interface` | `""` | | +| `ssh_username` | ```{{user `remote_folder`}}/worker``` | | +| `subnet_id` | `""` | | +| `temporary_security_group_source_cidrs` | `""` | | +| `volume_type` | ```{{user `remote_folder`}}/worker``` | | +| `working_dir` | ```{{user `remote_folder`}}/worker``` | Directory path for ephemeral resources on the builder instance | + --- diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index f5b3e616b..1f30250c4 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -22,7 +22,7 @@ "launch_block_device_mappings_volume_size": "4", "pause_container_version": "3.5", "pull_cni_from_github": "true", - "remote_folder": "", + "remote_folder": "/tmp", "runc_version": "1.1.5-1.amzn2", "security_group_id": "", "sonobuoy_e2e_registry": "", @@ -33,5 +33,6 @@ "ssh_username": "ec2-user", "subnet_id": "", "temporary_security_group_source_cidrs": "", - "volume_type": "gp2" + "volume_type": "gp2", + "working_dir": "{{user `remote_folder`}}/worker" } diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 1f805c85b..c9385ddc6 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -40,7 +40,8 @@ "ssh_username": null, "subnet_id": null, "temporary_security_group_source_cidrs": null, - "volume_type": null + "volume_type": null, + "working_dir": null }, "builders": [ { @@ -114,34 +115,34 @@ "provisioners": [ { "type": "shell", - "remote_folder": "{{ user `remote_folder`}}", - "script": "{{template_dir}}/scripts/install_additional_repos.sh", - "environment_vars": [ - "ADDITIONAL_YUM_REPOS={{user `additional_yum_repos`}}" + "inline": [ + "mkdir -p {{user `working_dir`}}", + "mkdir -p {{user `working_dir`}}/log-collector-script" ] }, { "type": "shell", "remote_folder": "{{ user `remote_folder`}}", - "inline": [ - "mkdir -p /tmp/worker/log-collector-script/" + "script": "{{template_dir}}/scripts/install_additional_repos.sh", + "environment_vars": [ + "ADDITIONAL_YUM_REPOS={{user `additional_yum_repos`}}" ] }, { "type": "file", "source": "{{template_dir}}/files/", - "destination": "/tmp/worker/" + "destination": "{{user `working_dir`}}" }, { "type": "file", "source": "{{template_dir}}/log-collector-script/linux/", - "destination": "/tmp/worker/log-collector-script/" + "destination": "{{user `working_dir`}}/log-collector-script/" }, { "type": "shell", "inline": [ - "sudo chmod -R a+x /tmp/worker/bin/", - "sudo mv /tmp/worker/bin/* /usr/bin/" + "sudo chmod -R a+x {{user `working_dir`}}/bin/", + "sudo mv {{user `working_dir`}}/bin/* /usr/bin/" ] }, { @@ -174,7 +175,8 @@ "AWS_SESSION_TOKEN={{user `aws_session_token`}}", "SONOBUOY_E2E_REGISTRY={{user `sonobuoy_e2e_registry`}}", "PAUSE_CONTAINER_VERSION={{user `pause_container_version`}}", - "CACHE_CONTAINER_IMAGES={{user `cache_container_images`}}" + "CACHE_CONTAINER_IMAGES={{user `cache_container_images`}}", + "WORKING_DIR={{user `working_dir`}}" ] }, { @@ -202,13 +204,19 @@ "type": "shell", "remote_folder": "{{ user `remote_folder`}}", "script": "{{template_dir}}/scripts/generate-version-info.sh", - "execute_command": "chmod +x {{ .Path }}; {{ .Path }} /tmp/version-info.json" + "execute_command": "chmod +x {{ .Path }}; {{ .Path }} {{user `working_dir`}}/version-info.json" }, { "type": "file", "direction": "download", - "source": "/tmp/version-info.json", + "source": "{{user `working_dir`}}/version-info.json", "destination": "{{ user `ami_name` }}-version-info.json" + }, + { + "type": "shell", + "inline": [ + "rm -rf {{user `working_dir`}}" + ] } ], "post-processors": [ diff --git a/hack/generate-template-variable-doc.py b/hack/generate-template-variable-doc.py new file mode 100755 index 000000000..35cdde476 --- /dev/null +++ b/hack/generate-template-variable-doc.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 + +import json +import os +import re + +whereami = os.path.abspath(__file__) +os.chdir(os.path.dirname(whereami)) + +template = {} +with open('../eks-worker-al2.json') as template_file: + template = json.load(template_file) + +default_vars = {} +with open('../eks-worker-al2-variables.json') as default_var_file: + default_vars = json.load(default_var_file) + +all_vars = {} + +for var in template['variables']: + all_vars[var] = None +for var, default_val in default_vars.items(): + all_vars[var] = default_val + +doc_file_name = '../doc/USER_GUIDE.md' +doc = None +with open(doc_file_name) as doc_file: + doc = doc_file.read() + +table_boundary = '' +existing_table_pattern = f"{table_boundary}([\S\s]*){table_boundary}" +existing_table_matches = re.search(existing_table_pattern, doc) +existing_table_lines = existing_table_matches.group(1).splitlines() + +new_table = f"{table_boundary}\n" +new_table += f"{existing_table_lines[1]}\n" +new_table += f"{existing_table_lines[2]}\n" + +existing_descriptions = {} +for line in existing_table_lines[3:]: + columns = line.split('|') + var = columns[1].strip(" `") + existing_descriptions[var] = columns[3].strip(" `") + +for var, val in all_vars.items(): + if val is not None: + if val == "": + val = f"`\"\"`" + else: + val = f"```{default_val}```" + description = "" + if var in existing_descriptions: + description = existing_descriptions[var] + new_table += f"| `{var}` | {val} | {description} |\n" + +new_table += table_boundary + +replace_doc_pattern = f"{table_boundary}[\S\s]*{table_boundary}" +new_doc = re.sub(replace_doc_pattern, new_table, doc) + +with open(doc_file_name, 'w') as doc_file: + doc_file.write(new_doc) diff --git a/scripts/cleanup.sh b/scripts/cleanup.sh index 24861c3e9..f99893412 100644 --- a/scripts/cleanup.sh +++ b/scripts/cleanup.sh @@ -4,9 +4,6 @@ sudo yum clean all sudo rm -rf /var/cache/yum -# Clean up build artifacts -sudo rm -rf /tmp/worker - # Clean up files to reduce confusion during debug sudo rm -rf \ /etc/hostname \ diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index d8bdafdcd..6255069ef 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -6,8 +6,6 @@ set -o errexit IFS=$'\n\t' export AWS_DEFAULT_OUTPUT="json" -TEMPLATE_DIR=${TEMPLATE_DIR:-/tmp/worker} - ################################################################################ ### Validate Required Arguments ################################################ ################################################################################ @@ -33,6 +31,7 @@ validate_env_set KUBERNETES_BUILD_DATE validate_env_set PULL_CNI_FROM_GITHUB validate_env_set PAUSE_CONTAINER_VERSION validate_env_set CACHE_CONTAINER_IMAGES +validate_env_set WORKING_DIR ################################################################################ ### Machine Architecture ####################################################### @@ -116,7 +115,7 @@ sudo systemctl restart sshd.service ### iptables ################################################################### ################################################################################ sudo mkdir -p /etc/eks -sudo mv $TEMPLATE_DIR/iptables-restore.service /etc/eks/iptables-restore.service +sudo mv $WORKING_DIR/iptables-restore.service /etc/eks/iptables-restore.service ################################################################################ ### awscli ##################################################### @@ -127,7 +126,8 @@ ISOLATED_REGIONS="${ISOLATED_REGIONS:-us-iso-east-1 us-iso-west-1 us-isob-east-1 if ! [[ ${ISOLATED_REGIONS} =~ $BINARY_BUCKET_REGION ]]; then # https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html echo "Installing awscli v2 bundle" - AWSCLI_DIR=$(mktemp -d) + AWSCLI_DIR="${WORKING_DIR}/awscli-install" + mkdir "${AWSCLI_DIR}" curl \ --silent \ --show-error \ @@ -145,7 +145,7 @@ fi ### systemd #################################################################### ################################################################################ -sudo mv "${TEMPLATE_DIR}/runtime.slice" /etc/systemd/system/runtime.slice +sudo mv "${WORKING_DIR}/runtime.slice" /etc/systemd/system/runtime.slice ############################################################################### ### Containerd setup ########################################################## @@ -164,13 +164,13 @@ if [ -f "/etc/eks/containerd/containerd-config.toml" ]; then ## this means we are building a gpu ami and have already placed a containerd configuration file in /etc/eks echo "containerd config is already present" else - sudo mv $TEMPLATE_DIR/containerd-config.toml /etc/eks/containerd/containerd-config.toml + sudo mv $WORKING_DIR/containerd-config.toml /etc/eks/containerd/containerd-config.toml fi -sudo mv $TEMPLATE_DIR/kubelet-containerd.service /etc/eks/containerd/kubelet-containerd.service -sudo mv $TEMPLATE_DIR/sandbox-image.service /etc/eks/containerd/sandbox-image.service -sudo mv $TEMPLATE_DIR/pull-sandbox-image.sh /etc/eks/containerd/pull-sandbox-image.sh -sudo mv $TEMPLATE_DIR/pull-image.sh /etc/eks/containerd/pull-image.sh +sudo mv $WORKING_DIR/kubelet-containerd.service /etc/eks/containerd/kubelet-containerd.service +sudo mv $WORKING_DIR/sandbox-image.service /etc/eks/containerd/sandbox-image.service +sudo mv $WORKING_DIR/pull-sandbox-image.sh /etc/eks/containerd/pull-sandbox-image.sh +sudo mv $WORKING_DIR/pull-image.sh /etc/eks/containerd/pull-image.sh sudo chmod +x /etc/eks/containerd/pull-sandbox-image.sh sudo chmod +x /etc/eks/containerd/pull-image.sh @@ -217,7 +217,7 @@ if [[ "$INSTALL_DOCKER" == "true" ]]; then sudo sed -i '/OPTIONS/d' /etc/sysconfig/docker sudo mkdir -p /etc/docker - sudo mv $TEMPLATE_DIR/docker-daemon.json /etc/docker/daemon.json + sudo mv $WORKING_DIR/docker-daemon.json /etc/docker/daemon.json sudo chown root:root /etc/docker/daemon.json # Enable docker daemon to start on boot. @@ -230,8 +230,8 @@ fi # kubelet uses journald which has built-in rotation and capped size. # See man 5 journald.conf -sudo mv $TEMPLATE_DIR/logrotate-kube-proxy /etc/logrotate.d/kube-proxy -sudo mv $TEMPLATE_DIR/logrotate.conf /etc/logrotate.conf +sudo mv $WORKING_DIR/logrotate-kube-proxy /etc/logrotate.d/kube-proxy +sudo mv $WORKING_DIR/logrotate.conf /etc/logrotate.conf sudo chown root:root /etc/logrotate.d/kube-proxy sudo chown root:root /etc/logrotate.conf sudo mkdir -p /var/log/journal @@ -314,19 +314,19 @@ sudo rm ./*.sha256 sudo mkdir -p /etc/kubernetes/kubelet sudo mkdir -p /etc/systemd/system/kubelet.service.d -sudo mv $TEMPLATE_DIR/kubelet-kubeconfig /var/lib/kubelet/kubeconfig +sudo mv $WORKING_DIR/kubelet-kubeconfig /var/lib/kubelet/kubeconfig sudo chown root:root /var/lib/kubelet/kubeconfig # Inject CSIServiceAccountToken feature gate to kubelet config if kubernetes version starts with 1.20. # This is only injected for 1.20 since CSIServiceAccountToken will be moved to beta starting 1.21. if [[ $KUBERNETES_VERSION == "1.20"* ]]; then - KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED=$(cat $TEMPLATE_DIR/kubelet-config.json | jq '.featureGates += {CSIServiceAccountToken: true}') - echo $KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED > $TEMPLATE_DIR/kubelet-config.json + KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED=$(cat $WORKING_DIR/kubelet-config.json | jq '.featureGates += {CSIServiceAccountToken: true}') + echo $KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED > $WORKING_DIR/kubelet-config.json fi -sudo mv $TEMPLATE_DIR/kubelet.service /etc/systemd/system/kubelet.service +sudo mv $WORKING_DIR/kubelet.service /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/kubelet.service -sudo mv $TEMPLATE_DIR/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json +sudo mv $WORKING_DIR/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json sudo chown root:root /etc/kubernetes/kubelet/kubelet-config.json sudo systemctl daemon-reload @@ -338,17 +338,17 @@ sudo systemctl disable kubelet ################################################################################ sudo mkdir -p /etc/eks -sudo mv $TEMPLATE_DIR/get-ecr-uri.sh /etc/eks/get-ecr-uri.sh +sudo mv $WORKING_DIR/get-ecr-uri.sh /etc/eks/get-ecr-uri.sh sudo chmod +x /etc/eks/get-ecr-uri.sh -sudo mv $TEMPLATE_DIR/eni-max-pods.txt /etc/eks/eni-max-pods.txt -sudo mv $TEMPLATE_DIR/bootstrap.sh /etc/eks/bootstrap.sh +sudo mv $WORKING_DIR/eni-max-pods.txt /etc/eks/eni-max-pods.txt +sudo mv $WORKING_DIR/bootstrap.sh /etc/eks/bootstrap.sh sudo chmod +x /etc/eks/bootstrap.sh -sudo mv $TEMPLATE_DIR/max-pods-calculator.sh /etc/eks/max-pods-calculator.sh +sudo mv $WORKING_DIR/max-pods-calculator.sh /etc/eks/max-pods-calculator.sh sudo chmod +x /etc/eks/max-pods-calculator.sh SONOBUOY_E2E_REGISTRY="${SONOBUOY_E2E_REGISTRY:-}" if [[ -n "$SONOBUOY_E2E_REGISTRY" ]]; then - sudo mv $TEMPLATE_DIR/sonobuoy-e2e-registry-config /etc/eks/sonobuoy-e2e-registry-config + sudo mv $WORKING_DIR/sonobuoy-e2e-registry-config /etc/eks/sonobuoy-e2e-registry-config sudo sed -i s,SONOBUOY_E2E_REGISTRY,$SONOBUOY_E2E_REGISTRY,g /etc/eks/sonobuoy-e2e-registry-config fi @@ -366,7 +366,7 @@ fi sudo chmod +x $ECR_CREDENTIAL_PROVIDER_BINARY sudo mkdir -p /etc/eks/image-credential-provider sudo mv $ECR_CREDENTIAL_PROVIDER_BINARY /etc/eks/image-credential-provider/ -sudo mv $TEMPLATE_DIR/ecr-credential-provider-config.json /etc/eks/image-credential-provider/config.json +sudo mv $WORKING_DIR/ecr-credential-provider-config.json /etc/eks/image-credential-provider/config.json ################################################################################ ### Cache Images ############################################################### @@ -489,13 +489,13 @@ sudo yum install -y amazon-ssm-agent ################################################################################ BASE_AMI_ID=$(imds /latest/meta-data/ami-id) -cat << EOF > /tmp/release +cat << EOF > "${WORKING_DIR}/release" BASE_AMI_ID="$BASE_AMI_ID" BUILD_TIME="$(date)" BUILD_KERNEL="$(uname -r)" ARCH="$(uname -m)" EOF -sudo mv /tmp/release /etc/eks/release +sudo mv "${WORKING_DIR}/release" /etc/eks/release sudo chown -R root:root /etc/eks ################################################################################ @@ -520,7 +520,7 @@ echo vm.max_map_count=524288 | sudo tee -a /etc/sysctl.conf ### adding log-collector-script ################################################ ################################################################################ sudo mkdir -p /etc/eks/log-collector-script/ -sudo cp $TEMPLATE_DIR/log-collector-script/eks-log-collector.sh /etc/eks/log-collector-script/ +sudo cp $WORKING_DIR/log-collector-script/eks-log-collector.sh /etc/eks/log-collector-script/ ################################################################################ ### Remove Yum Update from cloud-init config ################################### From 7c45ddef58bbb50c869095eeb2185e41a745db6f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 22 Jun 2023 17:06:29 -0700 Subject: [PATCH 478/621] Update eni-max-pods.txt (#1330) --- files/eni-max-pods.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index d24d8d28f..f82b87d9f 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -176,6 +176,14 @@ c7g.large 29 c7g.medium 8 c7g.metal 737 c7g.xlarge 58 +c7gn.12xlarge 234 +c7gn.16xlarge 737 +c7gn.2xlarge 58 +c7gn.4xlarge 234 +c7gn.8xlarge 234 +c7gn.large 29 +c7gn.medium 8 +c7gn.xlarge 58 cr1.8xlarge 234 d2.2xlarge 58 d2.4xlarge 234 @@ -233,6 +241,9 @@ h1.4xlarge 234 h1.8xlarge 234 hpc6a.48xlarge 100 hpc6id.32xlarge 51 +hpc7g.16xlarge 198 +hpc7g.4xlarge 198 +hpc7g.8xlarge 198 hs1.8xlarge 234 i2.2xlarge 58 i2.4xlarge 234 From 056e31f8c7477e893424abce468cb32bbcd1f079 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 29 Jun 2023 09:15:21 -0700 Subject: [PATCH 479/621] Use recommended clocksources (#1328) --- files/bin/configure-clocksource | 46 +++++++++++++++++++++++++++++ files/bootstrap.sh | 5 ++++ files/configure-clocksource.service | 8 +++++ scripts/install-worker.sh | 22 +++----------- scripts/upgrade_kernel.sh | 6 ++++ test/Dockerfile | 1 + 6 files changed, 70 insertions(+), 18 deletions(-) create mode 100755 files/bin/configure-clocksource create mode 100644 files/configure-clocksource.service diff --git a/files/bin/configure-clocksource b/files/bin/configure-clocksource new file mode 100755 index 000000000..9815401f8 --- /dev/null +++ b/files/bin/configure-clocksource @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +set -o errexit +set -o pipefail +set -o nounset + +CLOCK_PATH="/sys/devices/system/clocksource/clocksource0" + +function log() { + echo >&2 "$@" +} + +function current-clocksource() { + cat "${CLOCK_PATH}/current_clocksource" +} + +function check-available-clocksource() { + grep --quiet "${1}" "${CLOCK_PATH}/available_clocksource" +} + +function try-set-clocksource() { + if check-available-clocksource "${1}"; then + echo "${1}" > "${CLOCK_PATH}/current_clocksource" + log "configured clocksource: ${1}" + else + log "clocksource not available: ${1}" + fi +} + +case "$(imds /latest/meta-data/system)" in + nitro) + CLOCKSOURCE="kvm-clock" + ;; + + **) + CLOCKSOURCE="tsc" + ;; +esac + +log "desired clocksource: ${CLOCKSOURCE}" + +if [ ! "$(current-clocksource)" = "${CLOCKSOURCE}" ]; then + try-set-clocksource "${CLOCKSOURCE}" +fi + +log "final clocksource: $(current-clocksource)" diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 3729dad2c..8937784bb 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -325,6 +325,11 @@ if [ "$MOUNT_BPF_FS" = "true" ]; then mount-bpf-fs fi +cp -v /etc/eks/configure-clocksource.service /etc/systemd/system/configure-clocksource.service +chown root:root /etc/systemd/system/configure-clocksource.service +systemctl daemon-reload +systemctl enable --now configure-clocksource + ECR_URI=$(/etc/eks/get-ecr-uri.sh "${AWS_DEFAULT_REGION}" "${AWS_SERVICES_DOMAIN}" "${PAUSE_CONTAINER_ACCOUNT:-}") PAUSE_CONTAINER_IMAGE=${PAUSE_CONTAINER_IMAGE:-$ECR_URI/eks/pause} PAUSE_CONTAINER="$PAUSE_CONTAINER_IMAGE:$PAUSE_CONTAINER_VERSION" diff --git a/files/configure-clocksource.service b/files/configure-clocksource.service new file mode 100644 index 000000000..5274ca041 --- /dev/null +++ b/files/configure-clocksource.service @@ -0,0 +1,8 @@ +[Unit] +Description=Configure kernel clocksource + +[Service] +ExecStart=/usr/bin/configure-clocksource + +[Install] +WantedBy=multi-user.target diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 6255069ef..b62f81394 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -81,27 +81,13 @@ sudo yum versionlock kernel-$(uname -r) # Remove the ec2-net-utils package, if it's installed. This package interferes with the route setup on the instance. if yum list installed | grep ec2-net-utils; then sudo yum remove ec2-net-utils -y -q; fi +sudo mkdir -p /etc/eks/ + ################################################################################ ### Time ####################################################################### ################################################################################ -# Make sure Amazon Time Sync Service starts on boot. -sudo chkconfig chronyd on - -# Make sure that chronyd syncs RTC clock to the kernel. -cat << EOF | sudo tee -a /etc/chrony.conf -# This directive enables kernel synchronisation (every 11 minutes) of the -# real-time clock. Note that it can’t be used along with the 'rtcfile' directive. -rtcsync -EOF - -# If current clocksource is xen, switch to tsc -if grep --quiet xen /sys/devices/system/clocksource/clocksource0/current_clocksource \ - && grep --quiet tsc /sys/devices/system/clocksource/clocksource0/available_clocksource; then - echo "tsc" | sudo tee /sys/devices/system/clocksource/clocksource0/current_clocksource -else - echo "tsc as a clock source is not applicable, skipping." -fi +sudo mv $WORKING_DIR/configure-clocksource.service /etc/eks/configure-clocksource.service ################################################################################ ### SSH ######################################################################## @@ -114,7 +100,7 @@ sudo systemctl restart sshd.service ################################################################################ ### iptables ################################################################### ################################################################################ -sudo mkdir -p /etc/eks + sudo mv $WORKING_DIR/iptables-restore.service /etc/eks/iptables-restore.service ################################################################################ diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh index 67e509caa..52d696056 100755 --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -24,4 +24,10 @@ sudo grubby \ --update-kernel=ALL \ --args="psi=1" +# use the tsc clocksource by default +# https://repost.aws/knowledge-center/manage-ec2-linux-clock-source +sudo grubby \ + --update-kernel=ALL \ + --args="clocksource=tsc tsc=reliable" + sudo reboot diff --git a/test/Dockerfile b/test/Dockerfile index 5470ad965..d00837c3e 100644 --- a/test/Dockerfile +++ b/test/Dockerfile @@ -7,6 +7,7 @@ RUN amazon-linux-extras enable docker && \ ENV IMDS_ENDPOINT=127.0.0.1:1338 COPY --from=aemm /ec2-metadata-mock /sbin/ec2-metadata-mock +RUN mkdir -p /etc/systemd/system RUN mkdir -p /etc/eks/containerd COPY files/ /etc/eks/ COPY files/containerd-config.toml files/kubelet-containerd.service files/pull-sandbox-image.sh files/sandbox-image.service /etc/eks/containerd/ From fac5ae3204c22f33afa1744c0d8795a980f21984 Mon Sep 17 00:00:00 2001 From: Xavier Ryan <108886506+xr1776@users.noreply.github.com> Date: Wed, 5 Jul 2023 20:56:32 -0400 Subject: [PATCH 480/621] Update CHANGELOG.md for v20230703 AMI release (#1337) * Update CHANGELOG.md for v20230703 AMI release * Update CHANGELOG.md Co-authored-by: Carter * Update CHANGELOG.md --------- Co-authored-by: Carter --- CHANGELOG.md | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9532c7419..f7ec24852 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,62 @@ # Changelog +### AMI Release v20230703 +* amazon-eks-gpu-node-1.27-v20230703 +* amazon-eks-gpu-node-1.26-v20230703 +* amazon-eks-gpu-node-1.25-v20230703 +* amazon-eks-gpu-node-1.24-v20230703 +* amazon-eks-gpu-node-1.23-v20230703 +* amazon-eks-gpu-node-1.22-v20230703 +* amazon-eks-arm64-node-1.27-v20230703 +* amazon-eks-arm64-node-1.26-v20230703 +* amazon-eks-arm64-node-1.25-v20230703 +* amazon-eks-arm64-node-1.24-v20230703 +* amazon-eks-arm64-node-1.23-v20230703 +* amazon-eks-arm64-node-1.22-v20230703 +* amazon-eks-node-1.27-v20230703 +* amazon-eks-node-1.26-v20230703 +* amazon-eks-node-1.25-v20230703 +* amazon-eks-node-1.24-v20230703 +* amazon-eks-node-1.23-v20230703 +* amazon-eks-node-1.22-v20230703 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.27.1-20230703` +* `1.26.4-20230703` +* `1.25.9-20230703` +* `1.24.13-20230703` +* `1.23.17-20230703` +* `1.22.17-20230703` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.27.1/2023-04-19/ +* s3://amazon-eks/1.26.4/2023-05-11/ +* s3://amazon-eks/1.25.9/2023-05-11/ +* s3://amazon-eks/1.24.13/2023-05-11/ +* s3://amazon-eks/1.23.17/2023-05-11/ +* s3://amazon-eks/1.22.17/2023-05-11/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.247-162.350.amzn2 + * Kubernetes 1.24 and above: 5.10.184-175.731.amzn2 +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.5-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +Notable changes: +- Update Kernel to 5.4.247-162.350.amzn2 to address [ALASKERNEL-5.4-2023-048](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2023-048.html), [CVE-2023-1206](https://alas.aws.amazon.com/cve/html/CVE-2023-1206.html) +- Update Kernel to 5.10.184-175.731.amzn2 to address [ALASKERNEL-5.10-2023-035](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-035.html), [CVE-2023-1206](https://alas.aws.amazon.com/cve/html/CVE-2023-1206.html) +- Use recommended clocksources ([#1328](https://github.com/awslabs/amazon-eks-ami/pull/1328)) +- Add configurable working directory ([#1231](https://github.com/awslabs/amazon-eks-ami/pull/1231)) +- Update eni-max-pods.txt ([#1330](https://github.com/awslabs/amazon-eks-ami/pull/1330)) +- Mount bpffs by default on 1.25+ ([#1320](https://github.com/awslabs/amazon-eks-ami/pull/1320)) + ### AMI Release v20230607 * amazon-eks-gpu-node-1.27-v20230607 * amazon-eks-gpu-node-1.26-v20230607 From 2ae9f65073ad47e5c75307d4b3b4c8498757c290 Mon Sep 17 00:00:00 2001 From: Xavier Ryan <108886506+xr1776@users.noreply.github.com> Date: Wed, 5 Jul 2023 21:13:18 -0400 Subject: [PATCH 481/621] Update CHANGELOG.md (#1338) --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f7ec24852..3d6d91f0c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,7 +48,6 @@ AMI details: * `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 * `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 -Notable changes: Notable changes: - Update Kernel to 5.4.247-162.350.amzn2 to address [ALASKERNEL-5.4-2023-048](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2023-048.html), [CVE-2023-1206](https://alas.aws.amazon.com/cve/html/CVE-2023-1206.html) - Update Kernel to 5.10.184-175.731.amzn2 to address [ALASKERNEL-5.10-2023-035](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-035.html), [CVE-2023-1206](https://alas.aws.amazon.com/cve/html/CVE-2023-1206.html) From fdee26ac401ab75ab6b12a2c6f59d4a319e39cbc Mon Sep 17 00:00:00 2001 From: jacobwolfaws <113703057+jacobwolfaws@users.noreply.github.com> Date: Thu, 6 Jul 2023 21:59:38 -0400 Subject: [PATCH 482/621] Add logging for aws managed csi drivers (#1336) --- log-collector-script/linux/eks-log-collector.sh | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 75eada625..c8e2048ae 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -20,7 +20,7 @@ export LANG="C" export LC_ALL="C" # Global options -readonly PROGRAM_VERSION="0.7.5" +readonly PROGRAM_VERSION="0.7.6" readonly PROGRAM_SOURCE="https://github.com/awslabs/amazon-eks-ami/blob/master/log-collector-script/" readonly PROGRAM_NAME="$(basename "$0" .sh)" readonly PROGRAM_DIR="/opt/log-collector" @@ -50,6 +50,7 @@ REQUIRED_UTILS=( COMMON_DIRECTORIES=( kernel + modinfo system docker containerd @@ -263,6 +264,7 @@ collect() { get_region get_common_logs get_kernel_info + get_modinfo get_mounts_info get_selinux_info get_iptables_info @@ -354,6 +356,7 @@ get_common_logs() { cp --force --dereference --recursive /var/log/containers/ebs-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null cp --force --dereference --recursive /var/log/containers/efs-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null cp --force --dereference --recursive /var/log/containers/fsx-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/fsx-openzfs-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null cp --force --dereference --recursive /var/log/containers/file-cache-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null continue fi @@ -364,6 +367,9 @@ get_common_logs() { cp --force --dereference --recursive /var/log/pods/kube-system_kube-proxy* "${COLLECT_DIR}"/var_log/ 2> /dev/null cp --force --dereference --recursive /var/log/pods/kube-system_ebs-csi-* "${COLLECT_DIR}"/var_log/ 2> /dev/null cp --force --dereference --recursive /var/log/pods/kube-system_efs-csi-* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_fsx-csi-* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_fsx-openzfs-csi-* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_file-cache-csi-* "${COLLECT_DIR}"/var_log/ 2> /dev/null continue fi cp --force --recursive --dereference /var/log/"${entry}" "${COLLECT_DIR}"/var_log/ 2> /dev/null @@ -386,6 +392,12 @@ get_kernel_info() { ok } +# collect modinfo on specific modules for debugging purposes +get_modinfo() { + try "collect modinfo" + modinfo lustre > "${COLLECT_DIR}/modinfo/lustre" +} + get_docker_logs() { try "collect Docker daemon logs" From b21ce6e61f465e66e79428948c3d881506cf69bc Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Fri, 7 Jul 2023 12:01:39 -0700 Subject: [PATCH 483/621] Update CHANGELOG.md latest AMI release notes to highlight this was last 1.22 AMI (#1342) --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3d6d91f0c..c316c3b97 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,6 +49,7 @@ AMI details: * `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 Notable changes: +- This is the last AMI release for Kubernetes 1.22 - Update Kernel to 5.4.247-162.350.amzn2 to address [ALASKERNEL-5.4-2023-048](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2023-048.html), [CVE-2023-1206](https://alas.aws.amazon.com/cve/html/CVE-2023-1206.html) - Update Kernel to 5.10.184-175.731.amzn2 to address [ALASKERNEL-5.10-2023-035](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-035.html), [CVE-2023-1206](https://alas.aws.amazon.com/cve/html/CVE-2023-1206.html) - Use recommended clocksources ([#1328](https://github.com/awslabs/amazon-eks-ami/pull/1328)) From 453da19bbb5cd743416cfb181f50bd61adef3949 Mon Sep 17 00:00:00 2001 From: camrakin <113552683+camrakin@users.noreply.github.com> Date: Fri, 7 Jul 2023 12:02:09 -0700 Subject: [PATCH 484/621] Removing 1.22 from Makefile (#1343) --- Makefile | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Makefile b/Makefile index d5e58398f..1bac82ae2 100644 --- a/Makefile +++ b/Makefile @@ -100,10 +100,6 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html -.PHONY: 1.22 -1.22: ## Build EKS Optimized AL2 AMI - K8s 1.22 - $(MAKE) k8s kubernetes_version=1.22.17 kubernetes_build_date=2023-05-11 pull_cni_from_github=true - .PHONY: 1.23 1.23: ## Build EKS Optimized AL2 AMI - K8s 1.23 $(MAKE) k8s kubernetes_version=1.23.17 kubernetes_build_date=2023-05-11 pull_cni_from_github=true From 6ab0808f4d3f37ac9c55027cc8db3dd84da22a47 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 7 Jul 2023 15:17:22 -0700 Subject: [PATCH 485/621] Generate version info for cached images only when is active (#1341) --- eks-worker-al2.json | 5 ++++- scripts/generate-version-info.sh | 7 ++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index c9385ddc6..75ea08830 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -204,7 +204,10 @@ "type": "shell", "remote_folder": "{{ user `remote_folder`}}", "script": "{{template_dir}}/scripts/generate-version-info.sh", - "execute_command": "chmod +x {{ .Path }}; {{ .Path }} {{user `working_dir`}}/version-info.json" + "execute_command": "chmod +x {{ .Path }}; {{ .Path }} {{user `working_dir`}}/version-info.json", + "environment_vars": [ + "CACHE_CONTAINER_IMAGES={{user `cache_container_images`}}" + ] }, { "type": "file", diff --git a/scripts/generate-version-info.sh b/scripts/generate-version-info.sh index 9a52f42ce..3f75cc01d 100644 --- a/scripts/generate-version-info.sh +++ b/scripts/generate-version-info.sh @@ -20,4 +20,9 @@ echo $(jq ".binaries.kubelet = \"$(kubelet --version | awk '{print $2}')\"" $OUT echo $(jq ".binaries.awscli = \"$(aws --version | awk '{print $1}' | cut -d '/' -f 2)\"" $OUTPUT_FILE) > $OUTPUT_FILE # cached images -echo $(jq ".images = [ $(sudo ctr -n k8s.io image ls -q | cut -d'/' -f2- | sort | uniq | grep -v 'sha256' | xargs -r printf "\"%s\"," | sed 's/,$//') ]" $OUTPUT_FILE) > $OUTPUT_FILE +if systemctl is-active --quiet containerd; then + echo $(jq ".images = [ $(sudo ctr -n k8s.io image ls -q | cut -d'/' -f2- | sort | uniq | grep -v 'sha256' | xargs -r printf "\"%s\"," | sed 's/,$//') ]" $OUTPUT_FILE) > $OUTPUT_FILE +elif [ "${CACHE_CONTAINER_IMAGES}" = "true" ]; then + echo "containerd must be active to generate version info for cached images" + exit 1 +fi From 2e92ea09e60f742738927fece3002c6e4d44e9d4 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Mon, 10 Jul 2023 09:48:30 -0700 Subject: [PATCH 486/621] Remove region names from us-iso/us-isob credential provider config (#1344) --- files/ecr-credential-provider-config.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/files/ecr-credential-provider-config.json b/files/ecr-credential-provider-config.json index 21581c4e9..6b251d69c 100644 --- a/files/ecr-credential-provider-config.json +++ b/files/ecr-credential-provider-config.json @@ -8,8 +8,8 @@ "*.dkr.ecr.*.amazonaws.com", "*.dkr.ecr.*.amazonaws.com.cn", "*.dkr.ecr-fips.*.amazonaws.com", - "*.dkr.ecr.us-iso-east-1.c2s.ic.gov", - "*.dkr.ecr.us-isob-east-1.sc2s.sgov.gov" + "*.dkr.ecr.*.c2s.ic.gov", + "*.dkr.ecr.*.sc2s.sgov.gov" ], "defaultCacheDuration": "12h", "apiVersion": "credentialprovider.kubelet.k8s.io/v1" From 070ddc31ca14f4d06c4ad5c9b0a3a45627070476 Mon Sep 17 00:00:00 2001 From: Davanum Srinivas Date: Mon, 10 Jul 2023 13:42:03 -0400 Subject: [PATCH 487/621] Amazon Linux 2023 proof-of-concept (#1340) --- Makefile | 15 +++++++++++++-- hack/transform-al2-to-al2023.sh | 32 +++++++++++++++++++++++++++++++ scripts/install-worker.sh | 17 ++++++++++++++--- scripts/validate.sh | 34 +++++++++++++++++++-------------- 4 files changed, 79 insertions(+), 19 deletions(-) create mode 100755 hack/transform-al2-to-al2023.sh diff --git a/Makefile b/Makefile index 1bac82ae2..b070995ad 100644 --- a/Makefile +++ b/Makefile @@ -26,13 +26,18 @@ ifeq ($(call vercmp,$(kubernetes_version),gteq,1.25.0), true) ami_component_description ?= (k8s: {{ user `kubernetes_version` }}, containerd: {{ user `containerd_version` }}) endif +OS= +ifneq (,$(findstring al2023, $(PACKER_TEMPLATE_FILE))) + OS=-al2023 +endif + arch ?= x86_64 ifeq ($(arch), arm64) instance_type ?= m6g.large - ami_name ?= amazon-eks-arm64-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') + ami_name ?= amazon-eks-arm64-node$(OS)-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') else instance_type ?= m5.large - ami_name ?= amazon-eks-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') + ami_name ?= amazon-eks-node$(OS)-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') endif ifeq ($(aws_region), cn-northwest-1) @@ -74,6 +79,12 @@ ifeq (, $(SHELLCHECK_COMMAND)) endif SHELL_FILES := $(shell find $(MAKEFILE_DIR) -type f -name '*.sh') +.PHONY: transform-al2-to-al2023 +transform-al2-to-al2023: + PACKER_TEMPLATE_FILE=$(PACKER_TEMPLATE_FILE) \ + PACKER_DEFAULT_VARIABLE_FILE=$(PACKER_DEFAULT_VARIABLE_FILE) \ + hack/transform-al2-to-al2023.sh + .PHONY: lint lint: ## Check the source files for syntax and format issues $(SHFMT_COMMAND) $(SHFMT_FLAGS) --diff $(MAKEFILE_DIR) diff --git a/hack/transform-al2-to-al2023.sh b/hack/transform-al2-to-al2023.sh new file mode 100755 index 000000000..d7ebd29b3 --- /dev/null +++ b/hack/transform-al2-to-al2023.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +set -o pipefail +set -o nounset +set -o errexit + +if [[ -z "${PACKER_TEMPLATE_FILE:-}" ]]; then + echo "PACKER_TEMPLATE_FILE must be set." >&2 + exit 1 +fi +if [[ -z "${PACKER_DEFAULT_VARIABLE_FILE:-}" ]]; then + echo "PACKER_DEFAULT_VARIABLE_FILE must be set." >&2 + exit 1 +fi + +# rsa keys are not supported in al2023, switch to ed25519 +# delete the upgrade kernel provisioner as we don't need it for al2023 +cat "${PACKER_TEMPLATE_FILE}" \ + | jq '._comment = "All template variables are enumerated here; and most variables have a default value defined in eks-worker-al2023-variables.json"' \ + | jq '.variables.temporary_key_pair_type = "ed25519"' \ + | jq '.provisioners |= map(select(.script//empty|endswith("upgrade_kernel.sh")|not))' \ + > "${PACKER_TEMPLATE_FILE/al2/al2023}" + +# use newer versions of containerd and runc, do not install docker +# use al2023 6.1 minimal image +cat "${PACKER_DEFAULT_VARIABLE_FILE}" \ + | jq '.ami_component_description = "(k8s: {{ user `kubernetes_version` }}, containerd: {{ user `containerd_version` }})"' \ + | jq '.ami_description = "EKS-optimized Kubernetes node based on Amazon Linux 2023"' \ + | jq '.containerd_version = "*" | .runc_version = "*" | .docker_version = "" ' \ + | jq '.source_ami_filter_name = "al2023-ami-minimal-2023.*-kernel-6.1-x86_64"' \ + | jq '.volume_type = "gp3"' \ + > "${PACKER_DEFAULT_VARIABLE_FILE/al2/al2023}" diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index b62f81394..394f4c605 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -59,7 +59,6 @@ sudo yum install -y \ aws-cfn-bootstrap \ chrony \ conntrack \ - curl \ ec2-instance-connect \ ethtool \ ipvsadm \ @@ -73,8 +72,20 @@ sudo yum install -y \ mdadm \ pigz -# Remove any old kernel versions. `--count=1` here means "only leave 1 kernel version installed" -sudo package-cleanup --oldkernels --count=1 -y +# skip kernel version cleanup on al2023 +if ! cat /etc/*release | grep "al2023" > /dev/null 2>&1; then + # Remove any old kernel versions. `--count=1` here means "only leave 1 kernel version installed" + sudo package-cleanup --oldkernels --count=1 -y +fi + +# packages that need special handling +if cat /etc/*release | grep "al2023" > /dev/null 2>&1; then + # exists in al2023 only (needed by kubelet) + sudo yum install -y iptables-legacy +else + # curl-minimal already exists in al2023 so install curl only on al2 + sudo yum install -y curl +fi sudo yum versionlock kernel-$(uname -r) diff --git a/scripts/validate.sh b/scripts/validate.sh index 0b007e386..da6a31627 100644 --- a/scripts/validate.sh +++ b/scripts/validate.sh @@ -45,8 +45,6 @@ else exit 1 fi -echo "Verifying that the package versionlocks are correct..." - function versionlock-entries() { # the format of this output is EPOCH:NAME-VERSION-RELEASE.ARCH # more info in yum-versionlock(1) @@ -58,21 +56,29 @@ function versionlock-packages() { versionlock-entries | xargs -I '{}' rpm --query '{}' --queryformat '%{NAME}\n' } -for ENTRY in $(versionlock-entries); do - if ! rpm --query "$ENTRY" &> /dev/null; then - echo "There is no package matching the versionlock entry: '$ENTRY'" - exit 1 +function verify-versionlocks() { + for ENTRY in $(versionlock-entries); do + if ! rpm --query "$ENTRY" &> /dev/null; then + echo "There is no package matching the versionlock entry: '$ENTRY'" + exit 1 + fi + done + + LOCKED_PACKAGES=$(versionlock-packages | wc -l) + UNIQUE_LOCKED_PACKAGES=$(versionlock-packages | sort -u | wc -l) + if [ $LOCKED_PACKAGES -ne $UNIQUE_LOCKED_PACKAGES ]; then + echo "Package(s) have multiple version locks!" + versionlock-entries fi -done -LOCKED_PACKAGES=$(versionlock-packages | wc -l) -UNIQUE_LOCKED_PACKAGES=$(versionlock-packages | sort -u | wc -l) -if [ $LOCKED_PACKAGES -ne $UNIQUE_LOCKED_PACKAGES ]; then - echo "Package(s) have multiple version locks!" - versionlock-entries -fi + echo "Package versionlocks are correct!" +} -echo "Package versionlocks are correct!" +# run verify-versionlocks on al2 only, as it is not needed on al2023 +if ! cat /etc/*release | grep "al2023" > /dev/null 2>&1; then + echo "Verifying that the package versionlocks are correct..." + verify-versionlocks +fi REQUIRED_COMMANDS=(unpigz) From f9da7e766a5d1662c7c4d7778ce5d28617718fe5 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 11 Jul 2023 13:10:37 -0700 Subject: [PATCH 488/621] Remove hardcoded pull_cni_from_github var (#1346) --- Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index b070995ad..e4449c4c7 100644 --- a/Makefile +++ b/Makefile @@ -113,23 +113,23 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI .PHONY: 1.23 1.23: ## Build EKS Optimized AL2 AMI - K8s 1.23 - $(MAKE) k8s kubernetes_version=1.23.17 kubernetes_build_date=2023-05-11 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.23.17 kubernetes_build_date=2023-05-11 .PHONY: 1.24 1.24: ## Build EKS Optimized AL2 AMI - K8s 1.24 - $(MAKE) k8s kubernetes_version=1.24.13 kubernetes_build_date=2023-05-11 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.24.13 kubernetes_build_date=2023-05-11 .PHONY: 1.25 1.25: ## Build EKS Optimized AL2 AMI - K8s 1.25 - $(MAKE) k8s kubernetes_version=1.25.9 kubernetes_build_date=2023-05-11 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.25.9 kubernetes_build_date=2023-05-11 .PHONY: 1.26 1.26: ## Build EKS Optimized AL2 AMI - K8s 1.26 - $(MAKE) k8s kubernetes_version=1.26.4 kubernetes_build_date=2023-05-11 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.26.4 kubernetes_build_date=2023-05-11 .PHONY: 1.27 1.27: ## Build EKS Optimized AL2 AMI - K8s 1.27 - $(MAKE) k8s kubernetes_version=1.27.1 kubernetes_build_date=2023-04-19 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.27.1 kubernetes_build_date=2023-04-19 .PHONY: clean clean: From d31fc7a415b70d9a91e45e846e297bde6d26d5b9 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 11 Jul 2023 13:11:07 -0700 Subject: [PATCH 489/621] Remove sonobuoy_e2e_registry (#1249) --- eks-worker-al2-variables.json | 1 - eks-worker-al2.json | 2 -- files/sonobuoy-e2e-registry-config | 5 ----- scripts/install-worker.sh | 6 ------ 4 files changed, 14 deletions(-) delete mode 100644 files/sonobuoy-e2e-registry-config diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index 1f30250c4..6faf232e6 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -25,7 +25,6 @@ "remote_folder": "/tmp", "runc_version": "1.1.5-1.amzn2", "security_group_id": "", - "sonobuoy_e2e_registry": "", "source_ami_filter_name": "amzn2-ami-minimal-hvm-*", "source_ami_id": "", "source_ami_owners": "137112412989", diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 75ea08830..51d20fbf9 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -32,7 +32,6 @@ "remote_folder": null, "runc_version": null, "security_group_id": null, - "sonobuoy_e2e_registry": null, "source_ami_filter_name": null, "source_ami_id": null, "source_ami_owners": null, @@ -173,7 +172,6 @@ "AWS_ACCESS_KEY_ID={{user `aws_access_key_id`}}", "AWS_SECRET_ACCESS_KEY={{user `aws_secret_access_key`}}", "AWS_SESSION_TOKEN={{user `aws_session_token`}}", - "SONOBUOY_E2E_REGISTRY={{user `sonobuoy_e2e_registry`}}", "PAUSE_CONTAINER_VERSION={{user `pause_container_version`}}", "CACHE_CONTAINER_IMAGES={{user `cache_container_images`}}", "WORKING_DIR={{user `working_dir`}}" diff --git a/files/sonobuoy-e2e-registry-config b/files/sonobuoy-e2e-registry-config deleted file mode 100644 index be3813d86..000000000 --- a/files/sonobuoy-e2e-registry-config +++ /dev/null @@ -1,5 +0,0 @@ -dockerLibraryRegistry: SONOBUOY_E2E_REGISTRY/library -e2eRegistry: SONOBUOY_E2E_REGISTRY/kubernetes-e2e-test-images -gcRegistry: SONOBUOY_E2E_REGISTRY -googleContainerRegistry: SONOBUOY_E2E_REGISTRY/google-containers -sampleRegistry: SONOBUOY_E2E_REGISTRY/google-samples \ No newline at end of file diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 394f4c605..398858df9 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -343,12 +343,6 @@ sudo chmod +x /etc/eks/bootstrap.sh sudo mv $WORKING_DIR/max-pods-calculator.sh /etc/eks/max-pods-calculator.sh sudo chmod +x /etc/eks/max-pods-calculator.sh -SONOBUOY_E2E_REGISTRY="${SONOBUOY_E2E_REGISTRY:-}" -if [[ -n "$SONOBUOY_E2E_REGISTRY" ]]; then - sudo mv $WORKING_DIR/sonobuoy-e2e-registry-config /etc/eks/sonobuoy-e2e-registry-config - sudo sed -i s,SONOBUOY_E2E_REGISTRY,$SONOBUOY_E2E_REGISTRY,g /etc/eks/sonobuoy-e2e-registry-config -fi - ################################################################################ ### ECR CREDENTIAL PROVIDER #################################################### ################################################################################ From 44d652f5769edc67656c410cdd50d6049f81d777 Mon Sep 17 00:00:00 2001 From: Davanum Srinivas Date: Wed, 12 Jul 2023 13:33:18 -0400 Subject: [PATCH 490/621] Revert "avoid hard coding provisioner index array" (#1347) This reverts commit 6c167655de0e40bce46bc786c6e2ab2ae795e25a. Signed-off-by: Davanum Srinivas --- hack/transform-al2-to-al2023.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hack/transform-al2-to-al2023.sh b/hack/transform-al2-to-al2023.sh index d7ebd29b3..e15be44d9 100755 --- a/hack/transform-al2-to-al2023.sh +++ b/hack/transform-al2-to-al2023.sh @@ -18,7 +18,7 @@ fi cat "${PACKER_TEMPLATE_FILE}" \ | jq '._comment = "All template variables are enumerated here; and most variables have a default value defined in eks-worker-al2023-variables.json"' \ | jq '.variables.temporary_key_pair_type = "ed25519"' \ - | jq '.provisioners |= map(select(.script//empty|endswith("upgrade_kernel.sh")|not))' \ + | jq 'del(.provisioners[5])' \ > "${PACKER_TEMPLATE_FILE/al2/al2023}" # use newer versions of containerd and runc, do not install docker From b40829183a6eec75947f5e8dea62b00340c63e32 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 13 Jul 2023 12:01:03 -0700 Subject: [PATCH 491/621] Update sync-eni-max-pods.yaml role ARN (#1350) --- .github/workflows/sync-eni-max-pods.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sync-eni-max-pods.yaml b/.github/workflows/sync-eni-max-pods.yaml index 9bb3275bc..779d13ea5 100644 --- a/.github/workflows/sync-eni-max-pods.yaml +++ b/.github/workflows/sync-eni-max-pods.yaml @@ -17,7 +17,7 @@ jobs: - uses: aws-actions/configure-aws-credentials@v2 with: aws-region: ${{ secrets.AWS_REGION }} - role-to-assume: ${{ secrets.AWS_ROLE_ARN }} + role-to-assume: ${{ secrets.AWS_ROLE_ARN_SYNC_ENI_MAX_PODS }} - uses: actions/checkout@v3 with: repository: awslabs/amazon-eks-ami From 6ff42cfc651709d6b9197dba6f29f00b40e6676f Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 13 Jul 2023 15:10:38 -0700 Subject: [PATCH 492/621] Add CodeCommit sync action (#1351) --- .github/workflows/sync-to-codecommit.yaml | 29 +++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .github/workflows/sync-to-codecommit.yaml diff --git a/.github/workflows/sync-to-codecommit.yaml b/.github/workflows/sync-to-codecommit.yaml new file mode 100644 index 000000000..a5d03d0e4 --- /dev/null +++ b/.github/workflows/sync-to-codecommit.yaml @@ -0,0 +1,29 @@ +name: '[Sync] Push to CodeCommit' + +on: + schedule: + # twice an hour, at :00 and :30 + - cron: '0,30 * * * *' + +jobs: + mirror: + if: github.repository == 'awslabs/amazon-eks-ami' + runs-on: ubuntu-latest + # These permissions are needed to interact with GitHub's OIDC Token endpoint. + permissions: + id-token: write + contents: read + steps: + - uses: actions/checkout@v2 + with: + # fetch complete history + fetch-depth: 0 + - uses: aws-actions/configure-aws-credentials@v1 + with: + aws-region: ${{ secrets.AWS_REGION }} + role-to-assume: ${{ secrets.AWS_ROLE_ARN_SYNC_TO_CODECOMMIT }} + - run: git config credential.helper '!aws codecommit credential-helper $@' + - run: git config credential.UseHttpPath true + - run: git remote add codecommit ${{ secrets.AWS_CODECOMMIT_REPO_URL }} + - run: git checkout master + - run: git push codecommit master From a8198757ec1ef14f863e17ab040fcef9e385d4a0 Mon Sep 17 00:00:00 2001 From: Jeffrey Nelson Date: Fri, 14 Jul 2023 12:31:27 -0500 Subject: [PATCH 493/621] update core CNI plugins version (#1308) --- eks-worker-al2-variables.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index 6faf232e6..a699f03ae 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -12,7 +12,7 @@ "binary_bucket_name": "amazon-eks", "binary_bucket_region": "us-west-2", "cache_container_images": "false", - "cni_plugin_version": "v0.8.6", + "cni_plugin_version": "v1.2.0", "containerd_version": "1.6.*", "creator": "{{env `USER`}}", "docker_version": "20.10.23-1.amzn2.0.1", From 91c6002ff1b3b11e59941aad7417dc91dcf665ef Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 14 Jul 2023 15:15:56 -0700 Subject: [PATCH 494/621] Update internal build config (#1353) --- Config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Config b/Config index 97041313e..51a60f0b5 100644 --- a/Config +++ b/Config @@ -3,7 +3,7 @@ # Copyright 2019 Amazon.com, Inc. or its affiliates. # SPDX-License-Identifier: Apache-2.0 -package.Amazon-eks-ami = { +package.Amazon-eks-ami-mirror = { interfaces = (1.0); deploy = { From 40c96b7d9a1ecde917b8c22fef7a4e2cf568bd27 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Mon, 17 Jul 2023 14:14:17 -0700 Subject: [PATCH 495/621] Update binary references (#1355) --- Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index e4449c4c7..7abefe8e7 100644 --- a/Makefile +++ b/Makefile @@ -113,23 +113,23 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI .PHONY: 1.23 1.23: ## Build EKS Optimized AL2 AMI - K8s 1.23 - $(MAKE) k8s kubernetes_version=1.23.17 kubernetes_build_date=2023-05-11 + $(MAKE) k8s kubernetes_version=1.23.17 kubernetes_build_date=2023-06-30 .PHONY: 1.24 1.24: ## Build EKS Optimized AL2 AMI - K8s 1.24 - $(MAKE) k8s kubernetes_version=1.24.13 kubernetes_build_date=2023-05-11 + $(MAKE) k8s kubernetes_version=1.24.15 kubernetes_build_date=2023-06-30 .PHONY: 1.25 1.25: ## Build EKS Optimized AL2 AMI - K8s 1.25 - $(MAKE) k8s kubernetes_version=1.25.9 kubernetes_build_date=2023-05-11 + $(MAKE) k8s kubernetes_version=1.25.11 kubernetes_build_date=2023-06-30 .PHONY: 1.26 1.26: ## Build EKS Optimized AL2 AMI - K8s 1.26 - $(MAKE) k8s kubernetes_version=1.26.4 kubernetes_build_date=2023-05-11 + $(MAKE) k8s kubernetes_version=1.26.6 kubernetes_build_date=2023-06-30 .PHONY: 1.27 1.27: ## Build EKS Optimized AL2 AMI - K8s 1.27 - $(MAKE) k8s kubernetes_version=1.27.1 kubernetes_build_date=2023-04-19 + $(MAKE) k8s kubernetes_version=1.27.3 kubernetes_build_date=2023-06-30 .PHONY: clean clean: From b1b8bd67e019238b57a70ca6f06b960a07043869 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Tue, 18 Jul 2023 13:22:19 -0700 Subject: [PATCH 496/621] Update CHANGELOG.md for 20230711 AMI release (#1357) --- CHANGELOG.md | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c316c3b97..20f5dfa5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,52 @@ # Changelog +### AMI Release v20230711 +* amazon-eks-gpu-node-1.27-v20230711 +* amazon-eks-gpu-node-1.26-v20230711 +* amazon-eks-gpu-node-1.25-v20230711 +* amazon-eks-gpu-node-1.24-v20230711 +* amazon-eks-gpu-node-1.23-v20230711 +* amazon-eks-arm64-node-1.27-v20230711 +* amazon-eks-arm64-node-1.26-v20230711 +* amazon-eks-arm64-node-1.25-v20230711 +* amazon-eks-arm64-node-1.24-v20230711 +* amazon-eks-arm64-node-1.23-v20230711 +* amazon-eks-node-1.27-v20230711 +* amazon-eks-node-1.26-v20230711 +* amazon-eks-node-1.25-v20230711 +* amazon-eks-node-1.24-v20230711 +* amazon-eks-node-1.23-v20230711 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.27.3-20230711` +* `1.26.6-20230711` +* `1.25.11-20230711` +* `1.24.15-20230711` +* `1.23.17-20230711` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.27.3/2023-06-30/ +* s3://amazon-eks/1.26.6/2023-06-30/ +* s3://amazon-eks/1.25.11/2023-06-30/ +* s3://amazon-eks/1.24.15/2023-06-30/ +* s3://amazon-eks/1.23.17/2023-06-30/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.247-162.350.amzn2 + * Kubernetes 1.24 and above: 5.10.184-175.731.amzn2 +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.5-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +- Kubelet versions bumped up for k8s version 1.23-1.27 to address [bug](https://github.com/kubernetes/kubernetes/issues/116847#issuecomment-1552938714) +- Source VPC CNI plugin version bumped from 0.8.0 to 1.2.0 + ### AMI Release v20230703 * amazon-eks-gpu-node-1.27-v20230703 * amazon-eks-gpu-node-1.26-v20230703 From 80dd9e5616905891d7899903ba4350b11f263a69 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 20 Jul 2023 07:54:40 -0700 Subject: [PATCH 497/621] Enable discard_unpacked_layers by default (#1360) --- files/containerd-config.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/files/containerd-config.toml b/files/containerd-config.toml index 1cddeb2f6..42458568f 100644 --- a/files/containerd-config.toml +++ b/files/containerd-config.toml @@ -7,6 +7,7 @@ address = "/run/containerd/containerd.sock" [plugins."io.containerd.grpc.v1.cri".containerd] default_runtime_name = "runc" +discard_unpacked_layers = true [plugins."io.containerd.grpc.v1.cri"] sandbox_image = "SANDBOX_IMAGE" From 9307b072594d03bfba901520d69c76b0172c030f Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 20 Jul 2023 08:38:59 -0700 Subject: [PATCH 498/621] Mount bpffs on all supported Kubernetes versions (#1349) --- files/bootstrap.sh | 8 ++------ test/cases/mount-bpf-fs.sh | 7 ++++--- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 8937784bb..a073ce646 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -33,7 +33,7 @@ function print_help { echo "--ip-family Specify ip family of the cluster" echo "--kubelet-extra-args Extra arguments to add to the kubelet. Useful for adding labels or taints." echo "--local-disks Setup instance storage NVMe disks in raid0 or mount the individual disks for use by pods [mount | raid0]" - echo "--mount-bpf-fs Mount a bpffs at /sys/fs/bpf (default: true, for Kubernetes 1.25+; false otherwise)" + echo "--mount-bpf-fs Mount a bpffs at /sys/fs/bpf (default: true)" echo "--pause-container-account The AWS account (number) to pull the pause container from" echo "--pause-container-version The tag of the pause container" echo "--service-ipv6-cidr ipv6 cidr range of the cluster" @@ -224,11 +224,7 @@ if [[ ! -z ${LOCAL_DISKS} ]]; then setup-local-disks "${LOCAL_DISKS}" fi -DEFAULT_MOUNT_BPF_FS="true" -if vercmp "$KUBELET_VERSION" lt "1.25.0"; then - DEFAULT_MOUNT_BPF_FS="false" -fi -MOUNT_BPF_FS="${MOUNT_BPF_FS:-$DEFAULT_MOUNT_BPF_FS}" +MOUNT_BPF_FS="${MOUNT_BPF_FS:-true}" # Helper function which calculates the amount of the given resource (either CPU or memory) # to reserve in a given resource range, specified by a start and end of the range and a percentage diff --git a/test/cases/mount-bpf-fs.sh b/test/cases/mount-bpf-fs.sh index c5281d4e2..fe6e45907 100755 --- a/test/cases/mount-bpf-fs.sh +++ b/test/cases/mount-bpf-fs.sh @@ -49,7 +49,7 @@ fi export -nf mount rm $SYSTEMD_UNIT -echo "--> Should default to true on 1.27+" +echo "--> Should default to true" export KUBELET_VERSION=v1.27.0-eks-ba74326 MOUNT_BPF_FS_MOCK=$(mktemp) function mount-bpf-fs() { @@ -72,8 +72,8 @@ if [ ! "$(cat $MOUNT_BPF_FS_MOCK)" = "called" ]; then fi export -nf mount-bpf-fs -echo "--> Should default to false on 1.24-" -export KUBELET_VERSION=v1.24.0-eks-ba74326 +echo "--> Should be disabled by flag" +export KUBELET_VERSION=v1.27.0-eks-ba74326 MOUNT_BPF_FS_MOCK=$(mktemp) function mount-bpf-fs() { echo "called" >> $MOUNT_BPF_FS_MOCK @@ -84,6 +84,7 @@ EXIT_CODE=0 /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ + --mount-bpf-fs false \ test || EXIT_CODE=$? if [[ ${EXIT_CODE} -ne 0 ]]; then echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" From 8d68370eff9a30573d8e8da91903c7ca5376fa01 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 21 Jul 2023 13:57:28 -0700 Subject: [PATCH 499/621] Cleanup /var/log/audit (#1363) --- scripts/cleanup.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/cleanup.sh b/scripts/cleanup.sh index f99893412..a1b223d62 100644 --- a/scripts/cleanup.sh +++ b/scripts/cleanup.sh @@ -24,6 +24,7 @@ sudo rm -rf \ /var/log/secure \ /var/log/wtmp \ /var/log/messages \ + /var/log/audit/* \ /tmp/imds-tokens sudo touch /etc/machine-id From 55cbc55f69a029dc821d16081a7c75c206a5d1cd Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 27 Jul 2023 09:57:18 -0700 Subject: [PATCH 500/621] Use GitHub bot user as committer/author (#1366) --- .github/workflows/sync-eni-max-pods.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/sync-eni-max-pods.yaml b/.github/workflows/sync-eni-max-pods.yaml index 779d13ea5..929ceb2a5 100644 --- a/.github/workflows/sync-eni-max-pods.yaml +++ b/.github/workflows/sync-eni-max-pods.yaml @@ -40,6 +40,8 @@ jobs: path: amazon-eks-ami/ add-paths: files/eni-max-pods.txt commit-message: "Update eni-max-pods.txt" + committer: "GitHub " + author: "GitHub " title: "Update eni-max-pods.txt" body: | Generated by [aws/amazon-vpc-cni-k8s](https://github.com/aws/amazon-vpc-cni-k8s): From e91cf9f65f29df2e7389846774798c0146e86cb6 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 1 Aug 2023 09:46:51 -0700 Subject: [PATCH 501/621] Update eni-max-pods.txt (#1365) --- files/eni-max-pods.txt | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index f82b87d9f..09d7786ec 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -176,6 +176,14 @@ c7g.large 29 c7g.medium 8 c7g.metal 737 c7g.xlarge 58 +c7gd.12xlarge 234 +c7gd.16xlarge 737 +c7gd.2xlarge 58 +c7gd.4xlarge 234 +c7gd.8xlarge 234 +c7gd.large 29 +c7gd.medium 8 +c7gd.xlarge 58 c7gn.12xlarge 234 c7gn.16xlarge 737 c7gn.2xlarge 58 @@ -452,6 +460,14 @@ m7g.large 29 m7g.medium 8 m7g.metal 737 m7g.xlarge 58 +m7gd.12xlarge 234 +m7gd.16xlarge 737 +m7gd.2xlarge 58 +m7gd.4xlarge 234 +m7gd.8xlarge 234 +m7gd.large 29 +m7gd.medium 8 +m7gd.xlarge 58 mac1.metal 234 mac2.metal 234 p2.16xlarge 234 @@ -463,6 +479,7 @@ p3.8xlarge 234 p3dn.24xlarge 737 p4d.24xlarge 737 p4de.24xlarge 737 +p5.48xlarge 100 r3.2xlarge 58 r3.4xlarge 234 r3.8xlarge 234 @@ -613,6 +630,14 @@ r7g.large 29 r7g.medium 8 r7g.metal 737 r7g.xlarge 58 +r7gd.12xlarge 234 +r7gd.16xlarge 737 +r7gd.2xlarge 58 +r7gd.4xlarge 234 +r7gd.8xlarge 234 +r7gd.large 29 +r7gd.medium 8 +r7gd.xlarge 58 t1.micro 4 t2.2xlarge 44 t2.large 35 From d0db8b2dbe81023053460d22c651fcff8045c174 Mon Sep 17 00:00:00 2001 From: Sichaow Date: Tue, 1 Aug 2023 16:14:15 -0700 Subject: [PATCH 502/621] Update CHANGELOG.md for 20230728 AMI release (#1371) --- CHANGELOG.md | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 20f5dfa5f..9059de2a0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,53 @@ # Changelog +### AMI Release v20230728 +* amazon-eks-gpu-node-1.27-v20230728 +* amazon-eks-gpu-node-1.26-v20230728 +* amazon-eks-gpu-node-1.25-v20230728 +* amazon-eks-gpu-node-1.24-v20230728 +* amazon-eks-gpu-node-1.23-v20230728 +* amazon-eks-arm64-node-1.27-v20230728 +* amazon-eks-arm64-node-1.26-v20230728 +* amazon-eks-arm64-node-1.25-v20230728 +* amazon-eks-arm64-node-1.24-v20230728 +* amazon-eks-arm64-node-1.23-v20230728 +* amazon-eks-node-1.27-v20230728 +* amazon-eks-node-1.26-v20230728 +* amazon-eks-node-1.25-v20230728 +* amazon-eks-node-1.24-v20230728 +* amazon-eks-node-1.23-v20230728 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.27.3-20230728` +* `1.26.6-20230728` +* `1.25.11-20230728` +* `1.24.15-20230728` +* `1.23.17-20230728` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.27.3/2023-06-30/ +* s3://amazon-eks/1.26.6/2023-06-30/ +* s3://amazon-eks/1.25.11/2023-06-30/ +* s3://amazon-eks/1.24.15/2023-06-30/ +* s3://amazon-eks/1.23.17/2023-06-30/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.249-163.359.amzn2 + * Kubernetes 1.24 and above: 5.10.184-175.749.amzn2 +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.5-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +- Kernel fix for `CVE-2023-3117` and `CVE-2023-35001` with new versions: [5.10 kernel](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-037.html) and [5.4 kernel](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2023-049.html) +- Mount bpffs on all supported Kubernetes versions. ([#1349](https://github.com/awslabs/amazon-eks-ami/pull/1349)) +- Enable discard_unpacked_layers by default to clean up compressed image layers in containerd's content store.([#1360](https://github.com/awslabs/amazon-eks-ami/pull/1360)) + ### AMI Release v20230711 * amazon-eks-gpu-node-1.27-v20230711 * amazon-eks-gpu-node-1.26-v20230711 From b15ebe1e539a7cc767d1d552d4afaaa20a35df22 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 2 Aug 2023 19:25:41 -0700 Subject: [PATCH 503/621] Update eni-max-pods.txt (#1373) Co-authored-by: GitHub --- files/eni-max-pods.txt | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 09d7786ec..648423dc4 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -468,6 +468,20 @@ m7gd.8xlarge 234 m7gd.large 29 m7gd.medium 8 m7gd.xlarge 58 +m7i-flex.2xlarge 58 +m7i-flex.4xlarge 234 +m7i-flex.8xlarge 234 +m7i-flex.large 29 +m7i-flex.xlarge 58 +m7i.12xlarge 234 +m7i.16xlarge 737 +m7i.24xlarge 737 +m7i.2xlarge 58 +m7i.48xlarge 737 +m7i.4xlarge 234 +m7i.8xlarge 234 +m7i.large 29 +m7i.xlarge 58 mac1.metal 234 mac2.metal 234 p2.16xlarge 234 From 4f891791e0d6ac484cb8bc70fd13f86b09df8e75 Mon Sep 17 00:00:00 2001 From: Vincent Marguerie <24724195+vincentmrg@users.noreply.github.com> Date: Fri, 4 Aug 2023 21:58:39 +0200 Subject: [PATCH 504/621] Install latest amazon-ssm-agent from S3 (#1370) --- doc/USER_GUIDE.md | 2 +- eks-worker-al2-variables.json | 1 + eks-worker-al2.json | 7 +++++-- scripts/install-worker.sh | 8 +++++++- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/doc/USER_GUIDE.md b/doc/USER_GUIDE.md index c8f79a5bf..687bfe507 100644 --- a/doc/USER_GUIDE.md +++ b/doc/USER_GUIDE.md @@ -64,12 +64,12 @@ Users have the following options for specifying their own values: | `remote_folder` | ```{{user `remote_folder`}}/worker``` | Directory path for shell provisioner scripts on the builder instance | | `runc_version` | ```{{user `remote_folder`}}/worker``` | | | `security_group_id` | `""` | | -| `sonobuoy_e2e_registry` | `""` | | | `source_ami_filter_name` | ```{{user `remote_folder`}}/worker``` | | | `source_ami_id` | `""` | | | `source_ami_owners` | ```{{user `remote_folder`}}/worker``` | | | `ssh_interface` | `""` | | | `ssh_username` | ```{{user `remote_folder`}}/worker``` | | +| `ssm_agent_version` | ```{{user `remote_folder`}}/worker``` | | | `subnet_id` | `""` | | | `temporary_security_group_source_cidrs` | `""` | | | `volume_type` | ```{{user `remote_folder`}}/worker``` | | diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index a699f03ae..a1be0fe9b 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -30,6 +30,7 @@ "source_ami_owners": "137112412989", "ssh_interface": "", "ssh_username": "ec2-user", + "ssm_agent_version": "latest", "subnet_id": "", "temporary_security_group_source_cidrs": "", "volume_type": "gp2", diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 51d20fbf9..fe2d3df24 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -37,6 +37,7 @@ "source_ami_owners": null, "ssh_interface": null, "ssh_username": null, + "ssm_agent_version": null, "subnet_id": null, "temporary_security_group_source_cidrs": null, "volume_type": null, @@ -105,7 +106,8 @@ "docker_version": "{{ user `docker_version`}}", "containerd_version": "{{ user `containerd_version`}}", "kubernetes": "{{ user `kubernetes_version`}}/{{ user `kubernetes_build_date` }}/bin/linux/{{ user `arch` }}", - "cni_plugin_version": "{{ user `cni_plugin_version`}}" + "cni_plugin_version": "{{ user `cni_plugin_version`}}", + "ssm_agent_version": "{{ user `ssm_agent_version`}}" }, "ami_name": "{{user `ami_name`}}", "ami_description": "{{ user `ami_description` }}, {{ user `ami_component_description` }}" @@ -174,7 +176,8 @@ "AWS_SESSION_TOKEN={{user `aws_session_token`}}", "PAUSE_CONTAINER_VERSION={{user `pause_container_version`}}", "CACHE_CONTAINER_IMAGES={{user `cache_container_images`}}", - "WORKING_DIR={{user `working_dir`}}" + "WORKING_DIR={{user `working_dir`}}", + "SSM_AGENT_VERSION={{user `ssm_agent_version`}}" ] }, { diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 398858df9..347e11938 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -32,6 +32,7 @@ validate_env_set PULL_CNI_FROM_GITHUB validate_env_set PAUSE_CONTAINER_VERSION validate_env_set CACHE_CONTAINER_IMAGES validate_env_set WORKING_DIR +validate_env_set SSM_AGENT_VERSION ################################################################################ ### Machine Architecture ####################################################### @@ -473,7 +474,12 @@ fi ### SSM Agent ################################################################## ################################################################################ -sudo yum install -y amazon-ssm-agent +echo "Installing amazon-ssm-agent" +if ! [[ ${ISOLATED_REGIONS} =~ $BINARY_BUCKET_REGION ]]; then + sudo yum install -y https://s3.${BINARY_BUCKET_REGION}.${S3_DOMAIN}/amazon-ssm-${BINARY_BUCKET_REGION}/${SSM_AGENT_VERSION}/linux_${ARCH}/amazon-ssm-agent.rpm +else + sudo yum install -y amazon-ssm-agent +fi ################################################################################ ### AMI Metadata ############################################################### From 30ccd211b671362b7d0fbaf271b4fee30d9a4601 Mon Sep 17 00:00:00 2001 From: camrakin <113552683+camrakin@users.noreply.github.com> Date: Fri, 4 Aug 2023 13:51:25 -0700 Subject: [PATCH 505/621] Do not set KubeletCredentialProviders feature flag for 1.28+ (#1375) --- files/kubelet-config.json | 3 +-- scripts/install-worker.sh | 7 +++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/files/kubelet-config.json b/files/kubelet-config.json index 666350e2b..b78510c6a 100644 --- a/files/kubelet-config.json +++ b/files/kubelet-config.json @@ -27,8 +27,7 @@ "cgroupDriver": "cgroupfs", "cgroupRoot": "/", "featureGates": { - "RotateKubeletServerCertificate": true, - "KubeletCredentialProviders": true + "RotateKubeletServerCertificate": true }, "protectKernelDefaults": true, "serializeImagePulls": false, diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 347e11938..bc5ac8e9d 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -322,6 +322,13 @@ if [[ $KUBERNETES_VERSION == "1.20"* ]]; then echo $KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED > $WORKING_DIR/kubelet-config.json fi +# Enable Feature Gate for KubeletCredentialProviders in versions less than 1.28 since this feature flag was removed in 1.28. +# TODO: Remove this during 1.27 EOL +if vercmp $KUBERNETES_VERSION lt "1.28"; then + KUBELET_CONFIG_WITH_KUBELET_CREDENTIAL_PROVIDER_FEATURE_GATE_ENABLED=$(cat $WORKING_DIR/kubelet-config.json | jq '.featureGates += {KubeletCredentialProviders: true}') + echo $KUBELET_CONFIG_WITH_KUBELET_CREDENTIAL_PROVIDER_FEATURE_GATE_ENABLED > $WORKING_DIR/kubelet-config.json +fi + sudo mv $WORKING_DIR/kubelet.service /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/kubelet.service sudo mv $WORKING_DIR/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json From bf391c8dcbda82e1997b306c972058094178b412 Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 7 Aug 2023 09:17:20 -0700 Subject: [PATCH 506/621] Fix bug in var doc gen (#1378) --- doc/USER_GUIDE.md | 60 +++++++++++++------------- hack/generate-template-variable-doc.py | 4 +- 2 files changed, 33 insertions(+), 31 deletions(-) diff --git a/doc/USER_GUIDE.md b/doc/USER_GUIDE.md index 687bfe507..b07ae7264 100644 --- a/doc/USER_GUIDE.md +++ b/doc/USER_GUIDE.md @@ -23,7 +23,7 @@ Default values for most variables are defined in [a default variable file](eks-w Users have the following options for specifying their own values: 1. Provide a variable file with the `PACKER_VARIABLE_FILE` argument to `make`. Values in this file will override values in the default variable file. Your variable file does not need to include all possible variables, as it will be merged with the default variable file. -2. Pass a key-value pair for any template variable to `make`. These values will override any values that were specified with the first method. In the table below, these variables have a default value of "None". +2. Pass a key-value pair for any template variable to `make`. These values will override any values that were specified with the first method. In the table below, these variables have a default value of *None*. > **Note** > Some variables (such as `arch` and `kubernetes_version`) do not have a sensible, static default, and are satisfied by the Makefile. @@ -34,45 +34,45 @@ Users have the following options for specifying their own values: | Variable | Default value | Description | | - | - | - | | `additional_yum_repos` | `""` | | -| `ami_component_description` | ```{{user `remote_folder`}}/worker``` | | -| `ami_description` | ```{{user `remote_folder`}}/worker``` | | -| `ami_name` | None | | +| `ami_component_description` | ```(k8s: {{ user `kubernetes_version` }}, docker: {{ user `docker_version` }}, containerd: {{ user `containerd_version` }})``` | | +| `ami_description` | ```EKS Kubernetes Worker AMI with AmazonLinux2 image``` | | +| `ami_name` | *None* | | | `ami_regions` | `""` | | | `ami_users` | `""` | | -| `arch` | None | | +| `arch` | *None* | | | `associate_public_ip_address` | `""` | | -| `aws_access_key_id` | ```{{user `remote_folder`}}/worker``` | | -| `aws_region` | ```{{user `remote_folder`}}/worker``` | | -| `aws_secret_access_key` | ```{{user `remote_folder`}}/worker``` | | -| `aws_session_token` | ```{{user `remote_folder`}}/worker``` | | -| `binary_bucket_name` | ```{{user `remote_folder`}}/worker``` | | -| `binary_bucket_region` | ```{{user `remote_folder`}}/worker``` | | -| `cache_container_images` | ```{{user `remote_folder`}}/worker``` | | -| `cni_plugin_version` | ```{{user `remote_folder`}}/worker``` | | -| `containerd_version` | ```{{user `remote_folder`}}/worker``` | | -| `creator` | ```{{user `remote_folder`}}/worker``` | | -| `docker_version` | ```{{user `remote_folder`}}/worker``` | | -| `encrypted` | ```{{user `remote_folder`}}/worker``` | | -| `instance_type` | None | | +| `aws_access_key_id` | ```{{env `AWS_ACCESS_KEY_ID`}}``` | | +| `aws_region` | ```us-west-2``` | | +| `aws_secret_access_key` | ```{{env `AWS_SECRET_ACCESS_KEY`}}``` | | +| `aws_session_token` | ```{{env `AWS_SESSION_TOKEN`}}``` | | +| `binary_bucket_name` | ```amazon-eks``` | | +| `binary_bucket_region` | ```us-west-2``` | | +| `cache_container_images` | ```false``` | | +| `cni_plugin_version` | ```v1.2.0``` | | +| `containerd_version` | ```1.6.*``` | | +| `creator` | ```{{env `USER`}}``` | | +| `docker_version` | ```20.10.23-1.amzn2.0.1``` | | +| `encrypted` | ```false``` | | +| `instance_type` | *None* | | | `kernel_version` | `""` | | | `kms_key_id` | `""` | | -| `kubernetes_build_date` | None | | -| `kubernetes_version` | None | | -| `launch_block_device_mappings_volume_size` | ```{{user `remote_folder`}}/worker``` | | -| `pause_container_version` | ```{{user `remote_folder`}}/worker``` | | -| `pull_cni_from_github` | ```{{user `remote_folder`}}/worker``` | | -| `remote_folder` | ```{{user `remote_folder`}}/worker``` | Directory path for shell provisioner scripts on the builder instance | -| `runc_version` | ```{{user `remote_folder`}}/worker``` | | +| `kubernetes_build_date` | *None* | | +| `kubernetes_version` | *None* | | +| `launch_block_device_mappings_volume_size` | ```4``` | | +| `pause_container_version` | ```3.5``` | | +| `pull_cni_from_github` | ```true``` | | +| `remote_folder` | ```/tmp``` | Directory path for shell provisioner scripts on the builder instance | +| `runc_version` | ```1.1.5-1.amzn2``` | | | `security_group_id` | `""` | | -| `source_ami_filter_name` | ```{{user `remote_folder`}}/worker``` | | +| `source_ami_filter_name` | ```amzn2-ami-minimal-hvm-*``` | | | `source_ami_id` | `""` | | -| `source_ami_owners` | ```{{user `remote_folder`}}/worker``` | | +| `source_ami_owners` | ```137112412989``` | | | `ssh_interface` | `""` | | -| `ssh_username` | ```{{user `remote_folder`}}/worker``` | | -| `ssm_agent_version` | ```{{user `remote_folder`}}/worker``` | | +| `ssh_username` | ```ec2-user``` | | +| `ssm_agent_version` | ```latest``` | | | `subnet_id` | `""` | | | `temporary_security_group_source_cidrs` | `""` | | -| `volume_type` | ```{{user `remote_folder`}}/worker``` | | +| `volume_type` | ```gp2``` | | | `working_dir` | ```{{user `remote_folder`}}/worker``` | Directory path for ephemeral resources on the builder instance | diff --git a/hack/generate-template-variable-doc.py b/hack/generate-template-variable-doc.py index 35cdde476..3f08fcb7a 100755 --- a/hack/generate-template-variable-doc.py +++ b/hack/generate-template-variable-doc.py @@ -47,7 +47,9 @@ if val == "": val = f"`\"\"`" else: - val = f"```{default_val}```" + val = f"```{val}```" + else: + val = "*None*" description = "" if var in existing_descriptions: description = existing_descriptions[var] From 3ef7e96e9987d5e77d36688fbb7082a799b74a16 Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 7 Aug 2023 09:21:49 -0700 Subject: [PATCH 507/621] Generate docs for GitHub Pages (#1379) --- .github/workflows/deploy-docs.yaml | 13 +++++++++++++ .gitignore | 1 + Makefile | 8 ++++++-- README.md | 2 +- doc/CHANGELOG.md | 1 + doc/CONTRIBUTING.md | 2 +- doc/README.md | 1 + doc/USER_GUIDE.md | 16 ++-------------- hack/lint-docs.sh | 10 ++++++++++ hack/mkdocs.Dockerfile | 4 ++++ hack/mkdocs.sh | 14 ++++++++++++++ mkdocs.yaml | 19 +++++++++++++++++++ 12 files changed, 73 insertions(+), 18 deletions(-) create mode 100644 .github/workflows/deploy-docs.yaml create mode 120000 doc/CHANGELOG.md create mode 120000 doc/README.md create mode 100755 hack/lint-docs.sh create mode 100644 hack/mkdocs.Dockerfile create mode 100755 hack/mkdocs.sh create mode 100644 mkdocs.yaml diff --git a/.github/workflows/deploy-docs.yaml b/.github/workflows/deploy-docs.yaml new file mode 100644 index 000000000..8754a6790 --- /dev/null +++ b/.github/workflows/deploy-docs.yaml @@ -0,0 +1,13 @@ +name: Deploy documentation +on: + workflow_dispatch: + push: + branches: + - 'master' +jobs: + mkdocs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - run: pip install mkdocs mkdocs-material + - run: mkdocs gh-deploy \ No newline at end of file diff --git a/.gitignore b/.gitignore index 2d9cb419a..1be3dc826 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ .idea *version-info.json .DS_Store +site/ diff --git a/Makefile b/Makefile index 7abefe8e7..66c675525 100644 --- a/Makefile +++ b/Makefile @@ -86,7 +86,7 @@ transform-al2-to-al2023: hack/transform-al2-to-al2023.sh .PHONY: lint -lint: ## Check the source files for syntax and format issues +lint: lint-docs ## Check the source files for syntax and format issues $(SHFMT_COMMAND) $(SHFMT_FLAGS) --diff $(MAKEFILE_DIR) $(SHELLCHECK_COMMAND) --format gcc --severity error $(SHELL_FILES) @@ -130,7 +130,11 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI .PHONY: 1.27 1.27: ## Build EKS Optimized AL2 AMI - K8s 1.27 $(MAKE) k8s kubernetes_version=1.27.3 kubernetes_build_date=2023-06-30 - + +.PHONY: lint-docs +lint-docs: ## Lint the docs + hack/lint-docs.sh + .PHONY: clean clean: rm *-manifest.json diff --git a/README.md b/README.md index 758fb9868..49eb62c26 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ The Makefile chooses a particular kubelet binary to use per Kubernetes version w ## 👩‍💻 Using the AMI -The [AMI user guide](doc/USER_GUIDE.md) has details about the AMI's internals, and the [EKS user guide](https://docs.aws.amazon.com/eks/latest/userguide/launch-templates.html#launch-template-custom-ami) explains how to use a custom AMI in a managed node group. +The [AMI user guide](https://awslabs.github.io/amazon-eks-ami/USER_GUIDE/) has details about the AMI's internals, and the [EKS user guide](https://docs.aws.amazon.com/eks/latest/userguide/launch-templates.html#launch-template-custom-ami) explains how to use a custom AMI in a managed node group. ## 🔒 Security diff --git a/doc/CHANGELOG.md b/doc/CHANGELOG.md new file mode 120000 index 000000000..04c99a55c --- /dev/null +++ b/doc/CHANGELOG.md @@ -0,0 +1 @@ +../CHANGELOG.md \ No newline at end of file diff --git a/doc/CONTRIBUTING.md b/doc/CONTRIBUTING.md index 2d6946816..5f030d149 100644 --- a/doc/CONTRIBUTING.md +++ b/doc/CONTRIBUTING.md @@ -46,7 +46,7 @@ When submitting PRs, we want to verify that there are no regressions in the AMI **Test #1: Verify that the unit tests pass** -Please add a test case for your changes, if possible. See the [unit test README](test/README.md) for more information. These tests will be run automatically for every pull request. +Please add a test case for your changes, if possible. See the [unit test README](https://github.com/awslabs/amazon-eks-ami/tree/master/test#readme) for more information. These tests will be run automatically for every pull request. ``` make test diff --git a/doc/README.md b/doc/README.md new file mode 120000 index 000000000..32d46ee88 --- /dev/null +++ b/doc/README.md @@ -0,0 +1 @@ +../README.md \ No newline at end of file diff --git a/doc/USER_GUIDE.md b/doc/USER_GUIDE.md index b07ae7264..485b7ebc4 100644 --- a/doc/USER_GUIDE.md +++ b/doc/USER_GUIDE.md @@ -2,23 +2,11 @@ This document includes details about using the AMI template and the resulting AMIs. -1. [AMI template variables](#ami-template-variables) -1. [Building against other versions of Kubernetes binaries](#building-against-other-versions-of-kubernetes-binaries) -1. [Providing your own Kubernetes binaries](#providing-your-own-kubernetes-binaries) -1. [Container image caching](#container-image-caching) -1. [IAM permissions](#iam-permissions) -1. [Customizing kubelet config](#customizing-kubelet-config) -1. [AL2 and Linux kernel information](#al2-and-linux-kernel-information) -1. [Updating known instance types](#updating-known-instance-types) -1. [Version-locked packages](#version-locked-packages) -1. [Image credential provider plugins](#image-credential-provider-plugins) -1. [Ephemeral Storage](#ephemeral-storage) - --- ## AMI template variables -Default values for most variables are defined in [a default variable file](eks-worker-al2-variables.json). +Default values for most variables are defined in [a default variable file](https://github.com/awslabs/amazon-eks-ami/blob/master/eks-worker-al2-variables.json). Users have the following options for specifying their own values: @@ -310,7 +298,7 @@ If `kernel_version` is not set: - For Kubernetes 1.23 and below, `5.4` is used. - For Kubernetes 1.24 and above, `5.10` is used. -The [upgrade_kernel.sh script](../scripts/upgrade_kernel.sh) contains the logic for updating and upgrading the kernel. +The [upgrade_kernel.sh script](https://github.com/awslabs/amazon-eks-ami/blob/master/scripts/upgrade_kernel.sh) contains the logic for updating and upgrading the kernel. --- diff --git a/hack/lint-docs.sh b/hack/lint-docs.sh new file mode 100755 index 000000000..24ef64720 --- /dev/null +++ b/hack/lint-docs.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +set -o errexit +cd $(dirname $0) +./generate-template-variable-doc.py +if ! git diff --exit-code ../doc/USER_GUIDE.md; then + echo "ERROR: doc/USER_GUIDE.md is out of date. Please run hack/generate-template-variable-doc.py and commit the changes." + exit 1 +fi +./mkdocs.sh build --strict diff --git a/hack/mkdocs.Dockerfile b/hack/mkdocs.Dockerfile new file mode 100644 index 000000000..0f02dedce --- /dev/null +++ b/hack/mkdocs.Dockerfile @@ -0,0 +1,4 @@ +FROM python:3.9 +RUN pip install mkdocs mkdocs-material +WORKDIR /workdir +ENTRYPOINT ["mkdocs"] \ No newline at end of file diff --git a/hack/mkdocs.sh b/hack/mkdocs.sh new file mode 100755 index 000000000..4f7c93b95 --- /dev/null +++ b/hack/mkdocs.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +set -o errexit + +cd $(dirname $0) + +IMAGE_ID=$(docker build --file mkdocs.Dockerfile --quiet .) +cd .. + +if [[ "$*" =~ "serve" ]]; then + EXTRA_ARGS="${EXTRA_ARGS} -a 0.0.0.0:8000" +fi + +docker run --rm -v ${PWD}:/workdir -p 8000:8000 ${IMAGE_ID} "${@}" ${EXTRA_ARGS} diff --git a/mkdocs.yaml b/mkdocs.yaml new file mode 100644 index 000000000..56ec4c37e --- /dev/null +++ b/mkdocs.yaml @@ -0,0 +1,19 @@ +site_name: Amazon EKS AMI +docs_dir: doc/ +site_description: Build template and runtime resources for the Amazon EKS AMI +repo_name: awslabs/amazon-eks-ami +repo_url: https://github.com/awslabs/amazon-eks-ami +nav: + - 'Overview': README.md + - 'User Guide': USER_GUIDE.md + - 'Changelog': CHANGELOG.md + - 'Community': + - 'Contribution guidelines': CONTRIBUTING.md + - 'Code of Conduct': CODE_OF_CONDUCT.md + +theme: + name: material + palette: + primary: black + features: + - navigation.sections \ No newline at end of file From dc68dc4c9fffba991232214da9d952e616eea060 Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 7 Aug 2023 09:32:47 -0700 Subject: [PATCH 508/621] Add write permissions to deploy-docs workflow (#1381) --- .github/workflows/deploy-docs.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/deploy-docs.yaml b/.github/workflows/deploy-docs.yaml index 8754a6790..da191c8e5 100644 --- a/.github/workflows/deploy-docs.yaml +++ b/.github/workflows/deploy-docs.yaml @@ -6,8 +6,10 @@ on: - 'master' jobs: mkdocs: + permissions: + contents: write runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - run: pip install mkdocs mkdocs-material - - run: mkdocs gh-deploy \ No newline at end of file + - run: mkdocs gh-deploy From c2cf65d5e7be0ab20de6a66a7f9ae9f65665916e Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 7 Aug 2023 09:46:31 -0700 Subject: [PATCH 509/621] Force-push docs to gh-pages (#1382) --- .github/workflows/deploy-docs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy-docs.yaml b/.github/workflows/deploy-docs.yaml index da191c8e5..30328b76a 100644 --- a/.github/workflows/deploy-docs.yaml +++ b/.github/workflows/deploy-docs.yaml @@ -12,4 +12,4 @@ jobs: steps: - uses: actions/checkout@v3 - run: pip install mkdocs mkdocs-material - - run: mkdocs gh-deploy + - run: mkdocs gh-deploy --strict --no-history --force From 95f1141e791b82047336db4d98e1bbba86fe524a Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 14 Aug 2023 20:46:50 -0700 Subject: [PATCH 510/621] Cache IMDS tokens per-user (#1386) --- files/bin/imds | 7 +++---- test/cases/imds-token-refresh.sh | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/files/bin/imds b/files/bin/imds index 2d23801ba..061c8b070 100755 --- a/files/bin/imds +++ b/files/bin/imds @@ -49,8 +49,8 @@ function imdscurl() { } function get-token() { - local TOKEN_DIR=/tmp/imds-tokens - mkdir -p -m a+wrx $TOKEN_DIR + local TOKEN_DIR="/tmp/imds-tokens/$(whoami)" + mkdir -p -m 0600 $TOKEN_DIR # cleanup expired tokens local DELETED_TOKENS=0 @@ -71,8 +71,7 @@ function get-token() { -X PUT \ "http://$IMDS_ENDPOINT/latest/api/token") echo "$TOKEN" > "$TOKEN_DIR/$TOKEN_FILE" - # make sure any user can utilize (and clean up) these tokens - chmod a+rwx $TOKEN_DIR/$TOKEN_FILE + chmod 0600 "$TOKEN_DIR/$TOKEN_FILE" log "🔑 Retrieved a fresh IMDS token that will expire in $IMDS_TOKEN_TTL_SECONDS seconds." else log "ℹ️ Using cached IMDS token that expires in $(($TOKEN_FILE - $CURRENT_TIME)) seconds." diff --git a/test/cases/imds-token-refresh.sh b/test/cases/imds-token-refresh.sh index 1f4ca7039..76af2c460 100755 --- a/test/cases/imds-token-refresh.sh +++ b/test/cases/imds-token-refresh.sh @@ -6,7 +6,7 @@ set -o pipefail echo "--> Should refresh IMDS token on configured interval" exit_code=0 -TOKEN_DIR=/tmp/imds-tokens +TOKEN_DIR=/tmp/imds-tokens/$(whoami) TTL=5 export IMDS_TOKEN_TTL_SECONDS=$TTL export IMDS_DEBUG=true From 0ff39d48a91aa84f8fc3c76a5e06cc71b36ca347 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 15 Aug 2023 06:26:08 -0700 Subject: [PATCH 511/621] Install latest runc 1.1.* (#1384) --- doc/USER_GUIDE.md | 2 +- eks-worker-al2-variables.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/USER_GUIDE.md b/doc/USER_GUIDE.md index 485b7ebc4..9f90bd7ec 100644 --- a/doc/USER_GUIDE.md +++ b/doc/USER_GUIDE.md @@ -50,7 +50,7 @@ Users have the following options for specifying their own values: | `pause_container_version` | ```3.5``` | | | `pull_cni_from_github` | ```true``` | | | `remote_folder` | ```/tmp``` | Directory path for shell provisioner scripts on the builder instance | -| `runc_version` | ```1.1.5-1.amzn2``` | | +| `runc_version` | ```1.1.*``` | | | `security_group_id` | `""` | | | `source_ami_filter_name` | ```amzn2-ami-minimal-hvm-*``` | | | `source_ami_id` | `""` | | diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index a1be0fe9b..267fd6c82 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -23,7 +23,7 @@ "pause_container_version": "3.5", "pull_cni_from_github": "true", "remote_folder": "/tmp", - "runc_version": "1.1.5-1.amzn2", + "runc_version": "1.1.*", "security_group_id": "", "source_ami_filter_name": "amzn2-ami-minimal-hvm-*", "source_ami_id": "", From 1f49248191efc13c1bd8b87967d93d11465fcd06 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 15 Aug 2023 22:32:44 -0700 Subject: [PATCH 512/621] Update eni-max-pods.txt (#1388) --- files/eni-max-pods.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 648423dc4..2367d815b 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -451,6 +451,18 @@ m6in.8xlarge 234 m6in.large 29 m6in.metal 345 m6in.xlarge 58 +m7a.12xlarge 234 +m7a.16xlarge 737 +m7a.24xlarge 737 +m7a.2xlarge 58 +m7a.32xlarge 737 +m7a.48xlarge 737 +m7a.4xlarge 234 +m7a.8xlarge 234 +m7a.large 29 +m7a.medium 8 +m7a.metal-48xl 737 +m7a.xlarge 58 m7g.12xlarge 234 m7g.16xlarge 737 m7g.2xlarge 58 From 865f9f2b6990e8acc407f4d0456411c16cf90964 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 16 Aug 2023 13:40:44 -0700 Subject: [PATCH 513/621] Update binary build dates (#1390) --- Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 66c675525..e401212aa 100644 --- a/Makefile +++ b/Makefile @@ -113,23 +113,23 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI .PHONY: 1.23 1.23: ## Build EKS Optimized AL2 AMI - K8s 1.23 - $(MAKE) k8s kubernetes_version=1.23.17 kubernetes_build_date=2023-06-30 + $(MAKE) k8s kubernetes_version=1.23.17 kubernetes_build_date=2023-08-15 .PHONY: 1.24 1.24: ## Build EKS Optimized AL2 AMI - K8s 1.24 - $(MAKE) k8s kubernetes_version=1.24.15 kubernetes_build_date=2023-06-30 + $(MAKE) k8s kubernetes_version=1.24.15 kubernetes_build_date=2023-08-14 .PHONY: 1.25 1.25: ## Build EKS Optimized AL2 AMI - K8s 1.25 - $(MAKE) k8s kubernetes_version=1.25.11 kubernetes_build_date=2023-06-30 + $(MAKE) k8s kubernetes_version=1.25.11 kubernetes_build_date=2023-08-14 .PHONY: 1.26 1.26: ## Build EKS Optimized AL2 AMI - K8s 1.26 - $(MAKE) k8s kubernetes_version=1.26.6 kubernetes_build_date=2023-06-30 + $(MAKE) k8s kubernetes_version=1.26.6 kubernetes_build_date=2023-08-14 .PHONY: 1.27 1.27: ## Build EKS Optimized AL2 AMI - K8s 1.27 - $(MAKE) k8s kubernetes_version=1.27.3 kubernetes_build_date=2023-06-30 + $(MAKE) k8s kubernetes_version=1.27.3 kubernetes_build_date=2023-08-14 .PHONY: lint-docs lint-docs: ## Lint the docs From bc2c80cf35cd890c94e2f0af965b06e1a9bb2d87 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 17 Aug 2023 12:04:36 -0700 Subject: [PATCH 514/621] Fetch new IMDS token for every request (#1395) --- files/bin/imds | 51 ++++++----------------- files/bootstrap.sh | 2 + scripts/cleanup.sh | 3 +- test/cases/imds-token-refresh.sh | 69 -------------------------------- 4 files changed, 16 insertions(+), 109 deletions(-) delete mode 100755 test/cases/imds-token-refresh.sh diff --git a/files/bin/imds b/files/bin/imds index 061c8b070..2e87c00d8 100755 --- a/files/bin/imds +++ b/files/bin/imds @@ -5,20 +5,13 @@ set -o pipefail set -o nounset if [ "$#" -ne 1 ]; then - echo >&2 "usage: imds API_PATH" + echo >&2 "usage: imds token|API_PATH" exit 1 fi -# leading slashes will be removed -API_PATH="${1#/}" - -CURRENT_TIME=$(date '+%s') - IMDS_DEBUG="${IMDS_DEBUG:-false}" # default ttl is 15 minutes IMDS_TOKEN_TTL_SECONDS=${IMDS_TOKEN_TTL_SECONDS:-900} -# max ttl is 6 hours, see: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-instance-metadata-service.html -IMDS_MAX_TOKEN_TTL_SECONDS=${IMDS_MAX_TOKEN_TTL_SECONDS:-21600} IMDS_RETRIES=${IMDS_RETRIES:-10} IMDS_RETRY_DELAY_SECONDS=${IMDS_RETRY_DELAY_SECONDS:-1} IMDS_ENDPOINT=${IMDS_ENDPOINT:-169.254.169.254} @@ -49,43 +42,25 @@ function imdscurl() { } function get-token() { - local TOKEN_DIR="/tmp/imds-tokens/$(whoami)" - mkdir -p -m 0600 $TOKEN_DIR - - # cleanup expired tokens - local DELETED_TOKENS=0 - for TOKEN_FILE in $(ls $TOKEN_DIR | awk '$0 < '$(($CURRENT_TIME - $IMDS_MAX_TOKEN_TTL_SECONDS))); do - rm $TOKEN_DIR/$TOKEN_FILE - DELETED_TOKENS=$(($DELETED_TOKENS + 1)) - done - if [ "$DELETED_TOKENS" -gt 0 ]; then - log "🗑️ Deleted $DELETED_TOKENS expired IMDS token(s)." - fi - - local TOKEN_FILE=$(ls $TOKEN_DIR | awk '$0 > '$CURRENT_TIME | sort -n -r | head -n 1) - - if [ "$TOKEN_FILE" = "" ]; then - TOKEN_FILE=$(($CURRENT_TIME + $IMDS_TOKEN_TTL_SECONDS)) - local TOKEN=$(imdscurl \ - -H "X-aws-ec2-metadata-token-ttl-seconds: $IMDS_TOKEN_TTL_SECONDS" \ - -X PUT \ - "http://$IMDS_ENDPOINT/latest/api/token") - echo "$TOKEN" > "$TOKEN_DIR/$TOKEN_FILE" - chmod 0600 "$TOKEN_DIR/$TOKEN_FILE" - log "🔑 Retrieved a fresh IMDS token that will expire in $IMDS_TOKEN_TTL_SECONDS seconds." - else - log "ℹ️ Using cached IMDS token that expires in $(($TOKEN_FILE - $CURRENT_TIME)) seconds." - fi - cat "$TOKEN_DIR/$TOKEN_FILE" + imdscurl \ + -H "X-aws-ec2-metadata-token-ttl-seconds: $IMDS_TOKEN_TTL_SECONDS" \ + -X PUT \ + "http://$IMDS_ENDPOINT/latest/api/token" } function get-with-token() { local API_PATH="$1" imdscurl \ - -H "X-aws-ec2-metadata-token: $(get-token)" \ + -H "X-aws-ec2-metadata-token: ${IMDS_TOKEN:-$(get-token)}" \ "http://$IMDS_ENDPOINT/$API_PATH" } log "ℹ️ Talking to IMDS at $IMDS_ENDPOINT" -get-with-token "$API_PATH" +if [ "$1" = "token" ]; then + get-token +else + # leading slashes will be removed + API_PATH="${1#/}" + get-with-token "$API_PATH" +fi diff --git a/files/bootstrap.sh b/files/bootstrap.sh index a073ce646..7d2ce8098 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -175,6 +175,8 @@ set -- "${POSITIONAL[@]}" # restore positional parameters CLUSTER_NAME="$1" set -u +export IMDS_TOKEN=$(imds token) + KUBELET_VERSION=$(kubelet --version | grep -Eo '[0-9]\.[0-9]+\.[0-9]+') log "INFO: Using kubelet version $KUBELET_VERSION" diff --git a/scripts/cleanup.sh b/scripts/cleanup.sh index a1b223d62..61c399fee 100644 --- a/scripts/cleanup.sh +++ b/scripts/cleanup.sh @@ -24,7 +24,6 @@ sudo rm -rf \ /var/log/secure \ /var/log/wtmp \ /var/log/messages \ - /var/log/audit/* \ - /tmp/imds-tokens + /var/log/audit/* sudo touch /etc/machine-id diff --git a/test/cases/imds-token-refresh.sh b/test/cases/imds-token-refresh.sh deleted file mode 100755 index 76af2c460..000000000 --- a/test/cases/imds-token-refresh.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env bash - -set -o nounset -set -o errexit -set -o pipefail - -echo "--> Should refresh IMDS token on configured interval" -exit_code=0 -TOKEN_DIR=/tmp/imds-tokens/$(whoami) -TTL=5 -export IMDS_TOKEN_TTL_SECONDS=$TTL -export IMDS_DEBUG=true -imds /latest/meta-data/instance-id || exit_code=$? - -if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 -elif [[ $(ls $TOKEN_DIR | wc -l) -ne 1 ]]; then - echo "❌ Test Failed: expected one token to be present after first IMDS call but got '$(ls $TOKEN_DIR)'" - exit 1 -fi - -imds /latest/meta-data/instance-id || exit_code=$? - -if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 -elif [[ $(ls $TOKEN_DIR | wc -l) -ne 1 ]]; then - echo "❌ Test Failed: expected one token to be present after second IMDS call but got '$(ls $TOKEN_DIR)'" - exit 1 -fi - -sleep $(($TTL + 1)) - -imds /latest/meta-data/instance-id || exit_code=$? - -if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 -elif [[ $(ls $TOKEN_DIR | wc -l) -ne 2 ]]; then - echo "❌ Test Failed: expected two tokens to be present after third IMDS call but got '$(ls $TOKEN_DIR)'" - exit 1 -fi - -sleep $(($TTL + 1)) - -# both tokens are now expired, but only one should be garbage-collected with a window of $TTL - -IMDS_MAX_TOKEN_TTL_SECONDS=$TTL imds /latest/meta-data/instance-id || exit_code=$? - -if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 -elif [[ $(ls $TOKEN_DIR | wc -l) -ne 2 ]]; then - echo "❌ Test Failed: expected two tokens to be present after first garbage-collection but got '$(ls $TOKEN_DIR)'" - exit 1 -fi - -# the other expired token should be removed with a window of 0 - -IMDS_MAX_TOKEN_TTL_SECONDS=0 imds /latest/meta-data/instance-id || exit_code=$? - -if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 -elif [[ $(ls $TOKEN_DIR | wc -l) -ne 1 ]]; then - echo "❌ Test Failed: expected one token to be present after second garbage-collection but got '$(ls $TOKEN_DIR)'" - exit 1 -fi From 9d6e2a863aeb932b7ba2a8362aba5463969472ce Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 17 Aug 2023 18:58:10 -0700 Subject: [PATCH 515/621] Update CHANGELOG for v20230816 (#1396) --- CHANGELOG.md | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9059de2a0..7a9dee708 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,58 @@ # Changelog +### AMI Release v20230816 +* amazon-eks-gpu-node-1.27-v20230816 +* amazon-eks-gpu-node-1.26-v20230816 +* amazon-eks-gpu-node-1.25-v20230816 +* amazon-eks-gpu-node-1.24-v20230816 +* amazon-eks-gpu-node-1.23-v20230816 +* amazon-eks-arm64-node-1.27-v20230816 +* amazon-eks-arm64-node-1.26-v20230816 +* amazon-eks-arm64-node-1.25-v20230816 +* amazon-eks-arm64-node-1.24-v20230816 +* amazon-eks-arm64-node-1.23-v20230816 +* amazon-eks-node-1.27-v20230816 +* amazon-eks-node-1.26-v20230816 +* amazon-eks-node-1.25-v20230816 +* amazon-eks-node-1.24-v20230816 +* amazon-eks-node-1.23-v20230816 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.27.3-20230816` +* `1.26.6-20230816` +* `1.25.11-20230816` +* `1.24.15-20230816` +* `1.23.17-20230816` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.27.3/2023-08-14/ +* s3://amazon-eks/1.26.6/2023-08-14/ +* s3://amazon-eks/1.25.11/2023-08-14/ +* s3://amazon-eks/1.24.15/2023-08-14/ +* s3://amazon-eks/1.23.17/2023-08-15/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.250-166.369.amzn2 + * Kubernetes 1.24 and above: 5.10.186-179.751.amzn2 +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.7-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.2.1377.0-1 +Notable changes: +- Install latest runc `1.1.*` ([#1384](https://github.com/awslabs/amazon-eks-ami/pull/1384)). +- Install latest amazon-ssm-agent from S3 ([#1370](https://github.com/awslabs/amazon-eks-ami/pull/1370)). +- `kernel` updated to address: + - [ALASKERNEL-5.4-2023-050](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2023-050.html) + - [ALASKERNEL-5.10-2023-038](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-038.html) + +Other changes: +- Do not set `KubeletCredentialProviders` feature flag for 1.28+ ([#1375](https://github.com/awslabs/amazon-eks-ami/pull/1375)) +- Cache IMDS tokens per-user ([#1386](https://github.com/awslabs/amazon-eks-ami/pull/1386)) + ### AMI Release v20230728 * amazon-eks-gpu-node-1.27-v20230728 * amazon-eks-gpu-node-1.26-v20230728 From ec2aaf62978a9fa52446149d99ef1a46ff135712 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 22 Aug 2023 13:53:19 -0700 Subject: [PATCH 516/621] Update eni-max-pods.txt (#1397) --- files/eni-max-pods.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 2367d815b..efe2ab6d7 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -249,6 +249,10 @@ h1.4xlarge 234 h1.8xlarge 234 hpc6a.48xlarge 100 hpc6id.32xlarge 51 +hpc7a.12xlarge 100 +hpc7a.24xlarge 100 +hpc7a.48xlarge 100 +hpc7a.96xlarge 100 hpc7g.16xlarge 198 hpc7g.4xlarge 198 hpc7g.8xlarge 198 From f74b8e686b9a88c61b42464bf4c2bbf6f8bfe753 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Wed, 23 Aug 2023 11:25:00 -0700 Subject: [PATCH 517/621] Update Makefile with latest binaries (#1403) --- Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index e401212aa..8c39b126a 100644 --- a/Makefile +++ b/Makefile @@ -113,23 +113,23 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI .PHONY: 1.23 1.23: ## Build EKS Optimized AL2 AMI - K8s 1.23 - $(MAKE) k8s kubernetes_version=1.23.17 kubernetes_build_date=2023-08-15 + $(MAKE) k8s kubernetes_version=1.23.17 kubernetes_build_date=2023-08-16 .PHONY: 1.24 1.24: ## Build EKS Optimized AL2 AMI - K8s 1.24 - $(MAKE) k8s kubernetes_version=1.24.15 kubernetes_build_date=2023-08-14 + $(MAKE) k8s kubernetes_version=1.24.16 kubernetes_build_date=2023-08-16 .PHONY: 1.25 1.25: ## Build EKS Optimized AL2 AMI - K8s 1.25 - $(MAKE) k8s kubernetes_version=1.25.11 kubernetes_build_date=2023-08-14 + $(MAKE) k8s kubernetes_version=1.25.12 kubernetes_build_date=2023-08-16 .PHONY: 1.26 1.26: ## Build EKS Optimized AL2 AMI - K8s 1.26 - $(MAKE) k8s kubernetes_version=1.26.6 kubernetes_build_date=2023-08-14 + $(MAKE) k8s kubernetes_version=1.26.7 kubernetes_build_date=2023-08-16 .PHONY: 1.27 1.27: ## Build EKS Optimized AL2 AMI - K8s 1.27 - $(MAKE) k8s kubernetes_version=1.27.3 kubernetes_build_date=2023-08-14 + $(MAKE) k8s kubernetes_version=1.27.4 kubernetes_build_date=2023-08-16 .PHONY: lint-docs lint-docs: ## Lint the docs From 4bae2892cec3b228d00f59570e94bd17d28b48ac Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 25 Aug 2023 11:08:39 -0700 Subject: [PATCH 518/621] Add CI bot (#1402) --- .github/actions/bot/.gitignore | 1 + .github/actions/bot/README.md | 12 + .github/actions/bot/action.yaml | 13 + .github/actions/bot/index.js | 155 +++++++ .github/actions/bot/package-lock.json | 430 ++++++++++++++++++ .github/actions/bot/package.json | 13 + .github/actions/ci/build/action.yaml | 26 ++ .github/actions/ci/launch/action.yaml | 52 +++ .github/actions/ci/sonobuoy/action.yaml | 15 + .../actions/janitor/ami-sweeper/action.yaml | 13 + .github/actions/janitor/ami-sweeper/script.sh | 41 ++ .../janitor/cluster-sweeper/action.yaml | 13 + .../actions/janitor/cluster-sweeper/script.sh | 50 ++ .github/workflows/alas-issues.yaml | 26 -- .github/workflows/bot-trigger.yaml | 14 + .github/workflows/{ci.yaml => ci-auto.yaml} | 8 +- .github/workflows/ci-manual.yaml | 179 ++++++++ .github/workflows/janitor.yaml | 34 ++ kubernetes-versions.json | 7 + 19 files changed, 1070 insertions(+), 32 deletions(-) create mode 100644 .github/actions/bot/.gitignore create mode 100644 .github/actions/bot/README.md create mode 100644 .github/actions/bot/action.yaml create mode 100644 .github/actions/bot/index.js create mode 100644 .github/actions/bot/package-lock.json create mode 100644 .github/actions/bot/package.json create mode 100644 .github/actions/ci/build/action.yaml create mode 100644 .github/actions/ci/launch/action.yaml create mode 100644 .github/actions/ci/sonobuoy/action.yaml create mode 100644 .github/actions/janitor/ami-sweeper/action.yaml create mode 100755 .github/actions/janitor/ami-sweeper/script.sh create mode 100644 .github/actions/janitor/cluster-sweeper/action.yaml create mode 100755 .github/actions/janitor/cluster-sweeper/script.sh delete mode 100644 .github/workflows/alas-issues.yaml create mode 100644 .github/workflows/bot-trigger.yaml rename .github/workflows/{ci.yaml => ci-auto.yaml} (83%) create mode 100644 .github/workflows/ci-manual.yaml create mode 100644 .github/workflows/janitor.yaml create mode 100644 kubernetes-versions.json diff --git a/.github/actions/bot/.gitignore b/.github/actions/bot/.gitignore new file mode 100644 index 000000000..c2658d7d1 --- /dev/null +++ b/.github/actions/bot/.gitignore @@ -0,0 +1 @@ +node_modules/ diff --git a/.github/actions/bot/README.md b/.github/actions/bot/README.md new file mode 100644 index 000000000..7b90fb7bd --- /dev/null +++ b/.github/actions/bot/README.md @@ -0,0 +1,12 @@ +# bot + +This GitHub Action parses commands from pull request comments and executes them. + +Only authorized users (members and owners of this repository) are able to execute commands. + +Commands look like: +``` +/echo hello world +``` + +Multiple commands can be included in a comment, one per line; but each command must be unique. diff --git a/.github/actions/bot/action.yaml b/.github/actions/bot/action.yaml new file mode 100644 index 000000000..dfb471a30 --- /dev/null +++ b/.github/actions/bot/action.yaml @@ -0,0 +1,13 @@ +name: "Bot" +description: "🤖 beep boop" +runs: + using: "composite" + steps: + - uses: "actions/checkout@v3" + - uses: "actions/github-script@v6" + with: + script: | + const crypto = require('crypto'); + const uuid = crypto.randomUUID(); + const bot = require('./.github/actions/bot/index.js'); + await bot(core, github, context, uuid); \ No newline at end of file diff --git a/.github/actions/bot/index.js b/.github/actions/bot/index.js new file mode 100644 index 000000000..d9e67ba35 --- /dev/null +++ b/.github/actions/bot/index.js @@ -0,0 +1,155 @@ +// this script cannot require/import, because it's called by actions/github-script. +// any dependencies must be passed in the inline script in action.yaml + +async function bot(core, github, context, uuid) { + const payload = context.payload; + + if (!payload.comment) { + console.log("No comment found in payload"); + return; + } + console.log("Comment found in payload"); + + const author = payload.comment.user.login; + const authorized = ["OWNER", "MEMBER"].includes(payload.comment.author_association); + if (!authorized) { + console.log(`Comment author is not authorized: ${author}`); + return; + } + console.log(`Comment author is authorized: ${author}`); + + const commands = parseCommands(uuid, payload, payload.comment.body); + if (commands.length === 0) { + console.log("No commands found in comment body"); + return; + } + const uniqueCommands = [...new Set(commands.map(command => typeof command))]; + if (uniqueCommands.length != commands.length) { + console.log("Duplicate commands found in comment body"); + return; + } + console.log(commands.length + " command(s) found in comment body"); + + for (const command of commands) { + const reply = await command.run(author, github); + if (typeof reply === 'string') { + github.rest.issues.createComment({ + owner: payload.repository.owner.login, + repo: payload.repository.name, + issue_number: payload.issue.number, + body: reply + }); + } else if (reply) { + console.log(`Command returned: ${reply}`); + } else { + console.log("Command did not return a reply"); + } + } +} + +// parseCommands splits the comment body into lines and parses each line as a command. +function parseCommands(uuid, payload, commentBody) { + const commands = []; + if (!commentBody) { + return commands; + } + const lines = commentBody.split(/\r?\n/); + for (const line of lines) { + const command = parseCommand(uuid, payload, line); + if (command) { + commands.push(command); + } + } + return commands +} + +// parseCommand parses a line as a command. +// The format of a command is `/NAME ARGS...`. +// Leading and trailing spaces are ignored. +function parseCommand(uuid, payload, line) { + const command = line.trim().match(/^\/([a-z\-]+)(?:\s+(.+))?$/); + if (command) { + return buildCommand(uuid, payload, command[1], command[2]); + } + return null; +} + +// buildCommand builds a command from a name and arguments. +function buildCommand(uuid, payload, name, args) { + switch (name) { + case "echo": + return new EchoCommand(uuid, payload, args); + case "ci": + return new CICommand(uuid, payload, args); + default: + console.log(`Unknown command: ${name}`); + return null; + } +} + +class EchoCommand { + constructor(uuid, payload, args) { + this.phrase = args ? args : "echo"; + } + + run(author) { + return `@${author} *${this.phrase}*`; + } +} + +class CICommand { + constructor(uuid, payload, args) { + this.repository_owner = payload.repository.owner.login; + this.repository_name = payload.repository.name; + this.pr_number = payload.issue.number; + this.comment_url = payload.comment.html_url; + this.uuid = uuid; + this.goal = "test"; + // "test" goal, which executes all CI stages, is the default when no goal is specified + if (args != null && args != "") { + this.goal = args; + } + } + + async run(author, github) { + const pr = await github.rest.pulls.get({ + owner: this.repository_owner, + repo: this.repository_name, + pull_number: this.pr_number + }); + const mergeable = pr.data.mergeable; + switch (mergeable) { + case true: + break; + case false: + case null: + return `@${author} this PR is not currently mergeable, you'll need to rebase it first.`; + default: + throw new Error(`Unknown mergeable value: ${mergeable}`); + } + const inputs = { + uuid: this.uuid, + pr_number: this.pr_number.toString(), + git_sha: pr.data.merge_commit_sha, + goal: this.goal, + requester: author, + comment_url: this.comment_url + }; + console.log(`Dispatching workflow with inputs: ${JSON.stringify(inputs)}`); + await github.rest.actions.createWorkflowDispatch({ + owner: this.repository_owner, + repo: this.repository_name, + workflow_id: 'ci-manual.yaml', + ref: 'master', + inputs: inputs + }); + return null; + } +} + + +module.exports = async (core, github, context, uuid) => { + bot(core, github, context, uuid).catch((error) => { + core.setFailed(error); + }); +} \ No newline at end of file diff --git a/.github/actions/bot/package-lock.json b/.github/actions/bot/package-lock.json new file mode 100644 index 000000000..333a0db57 --- /dev/null +++ b/.github/actions/bot/package-lock.json @@ -0,0 +1,430 @@ +{ + "name": "bot", + "version": "1.0.0", + "lockfileVersion": 2, + "requires": true, + "packages": { + "": { + "name": "bot", + "version": "1.0.0", + "dependencies": { + "@actions/core": "^1.10.0", + "@actions/github": "^5.1.1" + } + }, + "node_modules/@actions/core": { + "version": "1.10.0", + "resolved": "https://registry.npmjs.org/@actions/core/-/core-1.10.0.tgz", + "integrity": "sha512-2aZDDa3zrrZbP5ZYg159sNoLRb61nQ7awl5pSvIq5Qpj81vwDzdMRKzkWJGJuwVvWpvZKx7vspJALyvaaIQyug==", + "dependencies": { + "@actions/http-client": "^2.0.1", + "uuid": "^8.3.2" + } + }, + "node_modules/@actions/github": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/@actions/github/-/github-5.1.1.tgz", + "integrity": "sha512-Nk59rMDoJaV+mHCOJPXuvB1zIbomlKS0dmSIqPGxd0enAXBnOfn4VWF+CGtRCwXZG9Epa54tZA7VIRlJDS8A6g==", + "dependencies": { + "@actions/http-client": "^2.0.1", + "@octokit/core": "^3.6.0", + "@octokit/plugin-paginate-rest": "^2.17.0", + "@octokit/plugin-rest-endpoint-methods": "^5.13.0" + } + }, + "node_modules/@actions/http-client": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/@actions/http-client/-/http-client-2.1.1.tgz", + "integrity": "sha512-qhrkRMB40bbbLo7gF+0vu+X+UawOvQQqNAA/5Unx774RS8poaOhThDOG6BGmxvAnxhQnDp2BG/ZUm65xZILTpw==", + "dependencies": { + "tunnel": "^0.0.6" + } + }, + "node_modules/@octokit/auth-token": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/@octokit/auth-token/-/auth-token-2.5.0.tgz", + "integrity": "sha512-r5FVUJCOLl19AxiuZD2VRZ/ORjp/4IN98Of6YJoJOkY75CIBuYfmiNHGrDwXr+aLGG55igl9QrxX3hbiXlLb+g==", + "dependencies": { + "@octokit/types": "^6.0.3" + } + }, + "node_modules/@octokit/core": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@octokit/core/-/core-3.6.0.tgz", + "integrity": "sha512-7RKRKuA4xTjMhY+eG3jthb3hlZCsOwg3rztWh75Xc+ShDWOfDDATWbeZpAHBNRpm4Tv9WgBMOy1zEJYXG6NJ7Q==", + "dependencies": { + "@octokit/auth-token": "^2.4.4", + "@octokit/graphql": "^4.5.8", + "@octokit/request": "^5.6.3", + "@octokit/request-error": "^2.0.5", + "@octokit/types": "^6.0.3", + "before-after-hook": "^2.2.0", + "universal-user-agent": "^6.0.0" + } + }, + "node_modules/@octokit/endpoint": { + "version": "6.0.12", + "resolved": "https://registry.npmjs.org/@octokit/endpoint/-/endpoint-6.0.12.tgz", + "integrity": "sha512-lF3puPwkQWGfkMClXb4k/eUT/nZKQfxinRWJrdZaJO85Dqwo/G0yOC434Jr2ojwafWJMYqFGFa5ms4jJUgujdA==", + "dependencies": { + "@octokit/types": "^6.0.3", + "is-plain-object": "^5.0.0", + "universal-user-agent": "^6.0.0" + } + }, + "node_modules/@octokit/graphql": { + "version": "4.8.0", + "resolved": "https://registry.npmjs.org/@octokit/graphql/-/graphql-4.8.0.tgz", + "integrity": "sha512-0gv+qLSBLKF0z8TKaSKTsS39scVKF9dbMxJpj3U0vC7wjNWFuIpL/z76Qe2fiuCbDRcJSavkXsVtMS6/dtQQsg==", + "dependencies": { + "@octokit/request": "^5.6.0", + "@octokit/types": "^6.0.3", + "universal-user-agent": "^6.0.0" + } + }, + "node_modules/@octokit/openapi-types": { + "version": "12.11.0", + "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-12.11.0.tgz", + "integrity": "sha512-VsXyi8peyRq9PqIz/tpqiL2w3w80OgVMwBHltTml3LmVvXiphgeqmY9mvBw9Wu7e0QWk/fqD37ux8yP5uVekyQ==" + }, + "node_modules/@octokit/plugin-paginate-rest": { + "version": "2.21.3", + "resolved": "https://registry.npmjs.org/@octokit/plugin-paginate-rest/-/plugin-paginate-rest-2.21.3.tgz", + "integrity": "sha512-aCZTEf0y2h3OLbrgKkrfFdjRL6eSOo8komneVQJnYecAxIej7Bafor2xhuDJOIFau4pk0i/P28/XgtbyPF0ZHw==", + "dependencies": { + "@octokit/types": "^6.40.0" + }, + "peerDependencies": { + "@octokit/core": ">=2" + } + }, + "node_modules/@octokit/plugin-rest-endpoint-methods": { + "version": "5.16.2", + "resolved": "https://registry.npmjs.org/@octokit/plugin-rest-endpoint-methods/-/plugin-rest-endpoint-methods-5.16.2.tgz", + "integrity": "sha512-8QFz29Fg5jDuTPXVtey05BLm7OB+M8fnvE64RNegzX7U+5NUXcOcnpTIK0YfSHBg8gYd0oxIq3IZTe9SfPZiRw==", + "dependencies": { + "@octokit/types": "^6.39.0", + "deprecation": "^2.3.1" + }, + "peerDependencies": { + "@octokit/core": ">=3" + } + }, + "node_modules/@octokit/request": { + "version": "5.6.3", + "resolved": "https://registry.npmjs.org/@octokit/request/-/request-5.6.3.tgz", + "integrity": "sha512-bFJl0I1KVc9jYTe9tdGGpAMPy32dLBXXo1dS/YwSCTL/2nd9XeHsY616RE3HPXDVk+a+dBuzyz5YdlXwcDTr2A==", + "dependencies": { + "@octokit/endpoint": "^6.0.1", + "@octokit/request-error": "^2.1.0", + "@octokit/types": "^6.16.1", + "is-plain-object": "^5.0.0", + "node-fetch": "^2.6.7", + "universal-user-agent": "^6.0.0" + } + }, + "node_modules/@octokit/request-error": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/@octokit/request-error/-/request-error-2.1.0.tgz", + "integrity": "sha512-1VIvgXxs9WHSjicsRwq8PlR2LR2x6DwsJAaFgzdi0JfJoGSO8mYI/cHJQ+9FbN21aa+DrgNLnwObmyeSC8Rmpg==", + "dependencies": { + "@octokit/types": "^6.0.3", + "deprecation": "^2.0.0", + "once": "^1.4.0" + } + }, + "node_modules/@octokit/types": { + "version": "6.41.0", + "resolved": "https://registry.npmjs.org/@octokit/types/-/types-6.41.0.tgz", + "integrity": "sha512-eJ2jbzjdijiL3B4PrSQaSjuF2sPEQPVCPzBvTHJD9Nz+9dw2SGH4K4xeQJ77YfTq5bRQ+bD8wT11JbeDPmxmGg==", + "dependencies": { + "@octokit/openapi-types": "^12.11.0" + } + }, + "node_modules/before-after-hook": { + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/before-after-hook/-/before-after-hook-2.2.3.tgz", + "integrity": "sha512-NzUnlZexiaH/46WDhANlyR2bXRopNg4F/zuSA3OpZnllCUgRaOF2znDioDWrmbNVsuZk6l9pMquQB38cfBZwkQ==" + }, + "node_modules/deprecation": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/deprecation/-/deprecation-2.3.1.tgz", + "integrity": "sha512-xmHIy4F3scKVwMsQ4WnVaS8bHOx0DmVwRywosKhaILI0ywMDWPtBSku2HNxRvF7jtwDRsoEwYQSfbxj8b7RlJQ==" + }, + "node_modules/is-plain-object": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-5.0.0.tgz", + "integrity": "sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/node-fetch": { + "version": "2.6.13", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.13.tgz", + "integrity": "sha512-StxNAxh15zr77QvvkmveSQ8uCQ4+v5FkvNTj0OESmiHu+VRi/gXArXtkWMElOsOUNLtUEvI4yS+rdtOHZTwlQA==", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "dependencies": { + "wrappy": "1" + } + }, + "node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" + }, + "node_modules/tunnel": { + "version": "0.0.6", + "resolved": "https://registry.npmjs.org/tunnel/-/tunnel-0.0.6.tgz", + "integrity": "sha512-1h/Lnq9yajKY2PEbBadPXj3VxsDDu844OnaAo52UVmIzIvwwtBPIuNvkjuzBlTWpfJyUbG3ez0KSBibQkj4ojg==", + "engines": { + "node": ">=0.6.11 <=0.7.0 || >=0.7.3" + } + }, + "node_modules/universal-user-agent": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/universal-user-agent/-/universal-user-agent-6.0.0.tgz", + "integrity": "sha512-isyNax3wXoKaulPDZWHQqbmIx1k2tb9fb3GGDBRxCscfYV2Ch7WxPArBsFEG8s/safwXTT7H4QGhaIkTp9447w==" + }, + "node_modules/uuid": { + "version": "8.3.2", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz", + "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==", + "bin": { + "uuid": "dist/bin/uuid" + } + }, + "node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==" + }, + "node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" + } + }, + "dependencies": { + "@actions/core": { + "version": "1.10.0", + "resolved": "https://registry.npmjs.org/@actions/core/-/core-1.10.0.tgz", + "integrity": "sha512-2aZDDa3zrrZbP5ZYg159sNoLRb61nQ7awl5pSvIq5Qpj81vwDzdMRKzkWJGJuwVvWpvZKx7vspJALyvaaIQyug==", + "requires": { + "@actions/http-client": "^2.0.1", + "uuid": "^8.3.2" + } + }, + "@actions/github": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/@actions/github/-/github-5.1.1.tgz", + "integrity": "sha512-Nk59rMDoJaV+mHCOJPXuvB1zIbomlKS0dmSIqPGxd0enAXBnOfn4VWF+CGtRCwXZG9Epa54tZA7VIRlJDS8A6g==", + "requires": { + "@actions/http-client": "^2.0.1", + "@octokit/core": "^3.6.0", + "@octokit/plugin-paginate-rest": "^2.17.0", + "@octokit/plugin-rest-endpoint-methods": "^5.13.0" + } + }, + "@actions/http-client": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/@actions/http-client/-/http-client-2.1.1.tgz", + "integrity": "sha512-qhrkRMB40bbbLo7gF+0vu+X+UawOvQQqNAA/5Unx774RS8poaOhThDOG6BGmxvAnxhQnDp2BG/ZUm65xZILTpw==", + "requires": { + "tunnel": "^0.0.6" + } + }, + "@octokit/auth-token": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/@octokit/auth-token/-/auth-token-2.5.0.tgz", + "integrity": "sha512-r5FVUJCOLl19AxiuZD2VRZ/ORjp/4IN98Of6YJoJOkY75CIBuYfmiNHGrDwXr+aLGG55igl9QrxX3hbiXlLb+g==", + "requires": { + "@octokit/types": "^6.0.3" + } + }, + "@octokit/core": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@octokit/core/-/core-3.6.0.tgz", + "integrity": "sha512-7RKRKuA4xTjMhY+eG3jthb3hlZCsOwg3rztWh75Xc+ShDWOfDDATWbeZpAHBNRpm4Tv9WgBMOy1zEJYXG6NJ7Q==", + "requires": { + "@octokit/auth-token": "^2.4.4", + "@octokit/graphql": "^4.5.8", + "@octokit/request": "^5.6.3", + "@octokit/request-error": "^2.0.5", + "@octokit/types": "^6.0.3", + "before-after-hook": "^2.2.0", + "universal-user-agent": "^6.0.0" + } + }, + "@octokit/endpoint": { + "version": "6.0.12", + "resolved": "https://registry.npmjs.org/@octokit/endpoint/-/endpoint-6.0.12.tgz", + "integrity": "sha512-lF3puPwkQWGfkMClXb4k/eUT/nZKQfxinRWJrdZaJO85Dqwo/G0yOC434Jr2ojwafWJMYqFGFa5ms4jJUgujdA==", + "requires": { + "@octokit/types": "^6.0.3", + "is-plain-object": "^5.0.0", + "universal-user-agent": "^6.0.0" + } + }, + "@octokit/graphql": { + "version": "4.8.0", + "resolved": "https://registry.npmjs.org/@octokit/graphql/-/graphql-4.8.0.tgz", + "integrity": "sha512-0gv+qLSBLKF0z8TKaSKTsS39scVKF9dbMxJpj3U0vC7wjNWFuIpL/z76Qe2fiuCbDRcJSavkXsVtMS6/dtQQsg==", + "requires": { + "@octokit/request": "^5.6.0", + "@octokit/types": "^6.0.3", + "universal-user-agent": "^6.0.0" + } + }, + "@octokit/openapi-types": { + "version": "12.11.0", + "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-12.11.0.tgz", + "integrity": "sha512-VsXyi8peyRq9PqIz/tpqiL2w3w80OgVMwBHltTml3LmVvXiphgeqmY9mvBw9Wu7e0QWk/fqD37ux8yP5uVekyQ==" + }, + "@octokit/plugin-paginate-rest": { + "version": "2.21.3", + "resolved": "https://registry.npmjs.org/@octokit/plugin-paginate-rest/-/plugin-paginate-rest-2.21.3.tgz", + "integrity": "sha512-aCZTEf0y2h3OLbrgKkrfFdjRL6eSOo8komneVQJnYecAxIej7Bafor2xhuDJOIFau4pk0i/P28/XgtbyPF0ZHw==", + "requires": { + "@octokit/types": "^6.40.0" + } + }, + "@octokit/plugin-rest-endpoint-methods": { + "version": "5.16.2", + "resolved": "https://registry.npmjs.org/@octokit/plugin-rest-endpoint-methods/-/plugin-rest-endpoint-methods-5.16.2.tgz", + "integrity": "sha512-8QFz29Fg5jDuTPXVtey05BLm7OB+M8fnvE64RNegzX7U+5NUXcOcnpTIK0YfSHBg8gYd0oxIq3IZTe9SfPZiRw==", + "requires": { + "@octokit/types": "^6.39.0", + "deprecation": "^2.3.1" + } + }, + "@octokit/request": { + "version": "5.6.3", + "resolved": "https://registry.npmjs.org/@octokit/request/-/request-5.6.3.tgz", + "integrity": "sha512-bFJl0I1KVc9jYTe9tdGGpAMPy32dLBXXo1dS/YwSCTL/2nd9XeHsY616RE3HPXDVk+a+dBuzyz5YdlXwcDTr2A==", + "requires": { + "@octokit/endpoint": "^6.0.1", + "@octokit/request-error": "^2.1.0", + "@octokit/types": "^6.16.1", + "is-plain-object": "^5.0.0", + "node-fetch": "^2.6.7", + "universal-user-agent": "^6.0.0" + } + }, + "@octokit/request-error": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/@octokit/request-error/-/request-error-2.1.0.tgz", + "integrity": "sha512-1VIvgXxs9WHSjicsRwq8PlR2LR2x6DwsJAaFgzdi0JfJoGSO8mYI/cHJQ+9FbN21aa+DrgNLnwObmyeSC8Rmpg==", + "requires": { + "@octokit/types": "^6.0.3", + "deprecation": "^2.0.0", + "once": "^1.4.0" + } + }, + "@octokit/types": { + "version": "6.41.0", + "resolved": "https://registry.npmjs.org/@octokit/types/-/types-6.41.0.tgz", + "integrity": "sha512-eJ2jbzjdijiL3B4PrSQaSjuF2sPEQPVCPzBvTHJD9Nz+9dw2SGH4K4xeQJ77YfTq5bRQ+bD8wT11JbeDPmxmGg==", + "requires": { + "@octokit/openapi-types": "^12.11.0" + } + }, + "before-after-hook": { + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/before-after-hook/-/before-after-hook-2.2.3.tgz", + "integrity": "sha512-NzUnlZexiaH/46WDhANlyR2bXRopNg4F/zuSA3OpZnllCUgRaOF2znDioDWrmbNVsuZk6l9pMquQB38cfBZwkQ==" + }, + "deprecation": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/deprecation/-/deprecation-2.3.1.tgz", + "integrity": "sha512-xmHIy4F3scKVwMsQ4WnVaS8bHOx0DmVwRywosKhaILI0ywMDWPtBSku2HNxRvF7jtwDRsoEwYQSfbxj8b7RlJQ==" + }, + "is-plain-object": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-5.0.0.tgz", + "integrity": "sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==" + }, + "node-fetch": { + "version": "2.6.13", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.13.tgz", + "integrity": "sha512-StxNAxh15zr77QvvkmveSQ8uCQ4+v5FkvNTj0OESmiHu+VRi/gXArXtkWMElOsOUNLtUEvI4yS+rdtOHZTwlQA==", + "requires": { + "whatwg-url": "^5.0.0" + } + }, + "once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "requires": { + "wrappy": "1" + } + }, + "tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" + }, + "tunnel": { + "version": "0.0.6", + "resolved": "https://registry.npmjs.org/tunnel/-/tunnel-0.0.6.tgz", + "integrity": "sha512-1h/Lnq9yajKY2PEbBadPXj3VxsDDu844OnaAo52UVmIzIvwwtBPIuNvkjuzBlTWpfJyUbG3ez0KSBibQkj4ojg==" + }, + "universal-user-agent": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/universal-user-agent/-/universal-user-agent-6.0.0.tgz", + "integrity": "sha512-isyNax3wXoKaulPDZWHQqbmIx1k2tb9fb3GGDBRxCscfYV2Ch7WxPArBsFEG8s/safwXTT7H4QGhaIkTp9447w==" + }, + "uuid": { + "version": "8.3.2", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz", + "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==" + }, + "webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==" + }, + "whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "requires": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, + "wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" + } + } +} diff --git a/.github/actions/bot/package.json b/.github/actions/bot/package.json new file mode 100644 index 000000000..0c3a320e9 --- /dev/null +++ b/.github/actions/bot/package.json @@ -0,0 +1,13 @@ +{ + "name": "bot", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "command": "./local-harness.js $@" + }, + "dependencies": { + "@actions/core": "^1.10.0", + "@actions/github": "^5.1.1" + } +} diff --git a/.github/actions/ci/build/action.yaml b/.github/actions/ci/build/action.yaml new file mode 100644 index 000000000..f7ad76035 --- /dev/null +++ b/.github/actions/ci/build/action.yaml @@ -0,0 +1,26 @@ +name: "[CI] Build" +inputs: + git_sha: + required: true + type: string + build_id: + required: true + type: string + k8s_version: + required: true + type: string +outputs: + ami_id: + value: ${{ steps.build.outputs.ami_id }} +runs: + using: "composite" + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.git_sha }} + - id: build + shell: bash + run: | + AMI_NAME="amazon-eks-node-${{ inputs.k8s_version }}-${{ inputs.build_id }}" + make ${{ inputs.k8s_version }} ami_name=${AMI_NAME} + echo "ami_id=$(jq -r .builds[0].artifact_id "${AMI_NAME}-manifest.json" | cut -d ':' -f 2)" >> $GITHUB_OUTPUT diff --git a/.github/actions/ci/launch/action.yaml b/.github/actions/ci/launch/action.yaml new file mode 100644 index 000000000..c5e6303b8 --- /dev/null +++ b/.github/actions/ci/launch/action.yaml @@ -0,0 +1,52 @@ +name: '[CI] Integration test / Launch' +inputs: + build_id: + required: true + type: string + ami_id: + required: true + type: string + k8s_version: + required: true + type: string + aws_region: + required: true + type: string +outputs: + cluster_name: + value: ${{ steps.launch.outputs.cluster_name }} +runs: + using: "composite" + steps: + - id: launch + shell: bash + run: | + wget --no-verbose -O eksctl.tar.gz "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz" + tar xf eksctl.tar.gz && chmod +x ./eksctl + + SANITIZED_K8S_VERSION=$(echo ${{ inputs.k8s_version }} | tr -d '.') + CLUSTER_NAME="$SANITIZED_K8S_VERSION-${{ inputs.build_id }}" + + echo '--- + apiVersion: eksctl.io/v1alpha5 + kind: ClusterConfig + metadata: + name: "'$CLUSTER_NAME'" + region: "${{ inputs.aws_region }}" + version: "${{ inputs.k8s_version }}" + nodeGroups: + - name: "${{ inputs.build_id }}" + instanceType: m5.large + minSize: 3 + maxSize: 3 + desiredCapacity: 3 + ami: "${{ inputs.ami_id }}" + amiFamily: AmazonLinux2 + overrideBootstrapCommand: | + #!/bin/bash + source /var/lib/cloud/scripts/eksctl/bootstrap.helper.sh + /etc/eks/bootstrap.sh "'$CLUSTER_NAME'" --kubelet-extra-args "--node-labels=${NODE_LABELS}"' >> cluster.yaml + cat cluster.yaml + + ./eksctl create cluster --config-file cluster.yaml + echo "cluster_name=$CLUSTER_NAME" >> $GITHUB_OUTPUT diff --git a/.github/actions/ci/sonobuoy/action.yaml b/.github/actions/ci/sonobuoy/action.yaml new file mode 100644 index 000000000..e829719b9 --- /dev/null +++ b/.github/actions/ci/sonobuoy/action.yaml @@ -0,0 +1,15 @@ +name: '[CI] Integration test / Sonobuoy' +inputs: + cluster_name: + required: true + type: string +runs: + using: "composite" + steps: + - shell: bash + run: | + aws eks update-kubeconfig --name ${{ inputs.cluster_name }} + wget --no-verbose -O sonobuoy.tar.gz "https://github.com/vmware-tanzu/sonobuoy/releases/download/v0.56.11/sonobuoy_0.56.11_linux_amd64.tar.gz" + tar xf sonobuoy.tar.gz && chmod +x ./sonobuoy + ./sonobuoy run --wait + ./sonobuoy results $(./sonobuoy retrieve) diff --git a/.github/actions/janitor/ami-sweeper/action.yaml b/.github/actions/janitor/ami-sweeper/action.yaml new file mode 100644 index 000000000..e7735cc32 --- /dev/null +++ b/.github/actions/janitor/ami-sweeper/action.yaml @@ -0,0 +1,13 @@ +name: "[Janitor] AMI sweeper" +description: "🗑️ Deletes CI AMI's when they're no longer needed" +inputs: + max_age_seconds: + description: "Number of seconds after creation when an AMI becomes eligible for deletion" + required: true +runs: + using: "composite" + steps: + - run: ${{ github.action_path }}/script.sh + shell: bash + env: + MAX_AGE_SECONDS: ${{ inputs.max_age_seconds }} diff --git a/.github/actions/janitor/ami-sweeper/script.sh b/.github/actions/janitor/ami-sweeper/script.sh new file mode 100755 index 000000000..f20e6005a --- /dev/null +++ b/.github/actions/janitor/ami-sweeper/script.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +set -o errexit +set -o pipefail + +MAX_AGE_SECONDS=${MAX_AGE_SECONDS:-$1} +if [ -z "${MAX_AGE_SECONDS}" ]; then + echo "usage: $0 MAX_AGE_SECONDS" + exit 1 +fi + +set -o nounset + +# https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-retries.html +AWS_RETRY_MODE=standard +AWS_MAX_ATTEMPTS=5 + +function jqb64() { + if [ "$#" -lt 2 ]; then + echo "usage: jqb64 BASE64_JSON JQ_ARGS..." + exit 1 + fi + BASE64_JSON="$1" + shift + echo "$BASE64_JSON" | base64 --decode | jq "$@" +} +for IMAGE_DETAILS in $(aws ec2 describe-images --owners self --output json | jq -r '.Images[] | @base64'); do + NAME=$(jqb64 "$IMAGE_DETAILS" -r '.Name') + IMAGE_ID=$(jqb64 "$IMAGE_DETAILS" -r '.ImageId') + CREATION_DATE=$(jqb64 "$IMAGE_DETAILS" -r '.CreationDate') + CREATION_DATE_SECONDS=$(date -d "$CREATION_DATE" '+%s') + CURRENT_TIME_SECONDS=$(date '+%s') + MIN_CREATION_DATE_SECONDS=$(($CURRENT_TIME_SECONDS - $MAX_AGE_SECONDS)) + if [ "$CREATION_DATE_SECONDS" -lt "$MIN_CREATION_DATE_SECONDS" ]; then + aws ec2 deregister-image --image-id "$IMAGE_ID" + for SNAPSHOT_ID in $(jqb64 "$IMAGE_DETAILS" -r '.BlockDeviceMappings[].Ebs.SnapshotId'); do + aws ec2 delete-snapshot --snapshot-id "$SNAPSHOT_ID" + done + echo "Deleted $IMAGE_ID: $NAME" + fi +done diff --git a/.github/actions/janitor/cluster-sweeper/action.yaml b/.github/actions/janitor/cluster-sweeper/action.yaml new file mode 100644 index 000000000..e53de27d1 --- /dev/null +++ b/.github/actions/janitor/cluster-sweeper/action.yaml @@ -0,0 +1,13 @@ +name: "[Janitor] Cluster sweeper" +description: "🗑️ Deletes CI clusters when they're no longer needed" +inputs: + max_age_seconds: + description: "Number of seconds after creation when a cluster becomes eligible for deletion" + required: true +runs: + using: "composite" + steps: + - run: ${{ github.action_path }}/script.sh + shell: bash + env: + MAX_AGE_SECONDS: ${{ inputs.max_age_seconds }} diff --git a/.github/actions/janitor/cluster-sweeper/script.sh b/.github/actions/janitor/cluster-sweeper/script.sh new file mode 100755 index 000000000..97c041eec --- /dev/null +++ b/.github/actions/janitor/cluster-sweeper/script.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +set -o errexit +set -o pipefail + +MAX_AGE_SECONDS=${MAX_AGE_SECONDS:-$1} +if [ -z "${MAX_AGE_SECONDS}" ]; then + echo "usage: $0 MAX_AGE_SECONDS" + exit 1 +fi + +set -o nounset + +# https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-retries.html +AWS_RETRY_MODE=standard +AWS_MAX_ATTEMPTS=5 + +function iso8601_is_eligible_for_deletion() { + local TIME_IN_ISO8601="$1" + local TIME_IN_SECONDS=$(date -d "$TIME_IN_ISO8601" '+%s') + local CURRENT_TIME_IN_SECONDS=$(date '+%s') + MIN_TIME_SECONDS=$(($CURRENT_TIME_IN_SECONDS - $MAX_AGE_SECONDS)) + [ "$TIME_IN_SECONDS" -lt "$MIN_TIME_SECONDS" ] +} +function cluster_is_eligible_for_deletion() { + local CLUSTER_NAME="$1" + local CREATED_AT_ISO8601=$(aws eks describe-cluster --name $CLUSTER_NAME --query 'cluster.createdAt' --output text) + iso8601_is_eligible_for_deletion "$CREATED_AT_ISO8601" +} +function nodegroup_is_eligible_for_deletion() { + local CLUSTER_NAME="$1" + local NODEGROUP_NAME="$2" + local CREATED_AT_ISO8601=$(aws eks describe-nodegroup --cluster-name "$CLUSTER_NAME" --nodegroup-name $NODEGROUP_NAME --query 'nodegroup.createdAt' --output text) + iso8601_is_eligible_for_deletion "$CREATED_AT_ISO8601" +} +wget --no-verbose -O eksctl.tar.gz "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz" +tar xf eksctl.tar.gz && chmod +x ./eksctl +for CLUSTER in $(aws eks list-clusters --query 'clusters[]' --output text); do + for NODEGROUP in $(aws eks list-nodegroups --cluster-name $CLUSTER --query 'nodegroups[]' --output text); do + if nodegroup_is_eligible_for_deletion $CLUSTER $NODEGROUP; then + ./eksctl delete nodegroup --cluster $CLUSTER --name $NODEGROUP + fi + done + if [ "$(aws eks list-nodegroups --cluster-name $CLUSTER --output json | jq '.nodegroups | length')" -gt 0 ]; then + echo "Skipping cluster $CLUSTER" + elif cluster_is_eligible_for_deletion $CLUSTER; then + echo "Deleting cluster $CLUSTER" + ./eksctl delete cluster --name "$CLUSTER" + fi +done diff --git a/.github/workflows/alas-issues.yaml b/.github/workflows/alas-issues.yaml deleted file mode 100644 index d71611bdc..000000000 --- a/.github/workflows/alas-issues.yaml +++ /dev/null @@ -1,26 +0,0 @@ ---- -name: "[ALAS] Open issues for new bulletins" -on: - workflow_dispatch: - inputs: - window: - description: "Only consider bulletins published within this relative time window (golang Duration)" - default: "24h" - required: true - schedule: - # once an hour, at the top of hour - - cron: "0 * * * *" -permissions: - issues: write -jobs: - alas-al2-bulletins: - runs-on: ubuntu-latest - steps: - - uses: guilhem/rss-issues-action@0.5.2 - with: - repo-token: "${{ secrets.GITHUB_TOKEN }}" - feed: "https://alas.aws.amazon.com/AL2/alas.rss" - dry-run: "true" - lastTime: "${{ github.event.inputs.window || '24h' }}" - labels: "alas,alas/al2" - titleFilter: "(medium|low)" diff --git a/.github/workflows/bot-trigger.yaml b/.github/workflows/bot-trigger.yaml new file mode 100644 index 000000000..d728d4f10 --- /dev/null +++ b/.github/workflows/bot-trigger.yaml @@ -0,0 +1,14 @@ +name: Bot +run-name: 🤖 beep boop +on: + issue_comment: + types: + - created +jobs: + bot: + if: ${{ github.event.issue.pull_request }} + runs-on: ubuntu-latest + permissions: write-all + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/bot diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci-auto.yaml similarity index 83% rename from .github/workflows/ci.yaml rename to .github/workflows/ci-auto.yaml index 7f780e683..879ba2bb3 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci-auto.yaml @@ -1,9 +1,5 @@ -name: CI +name: "[CI] Auto" on: - workflow_dispatch: - push: - branches: - - 'master' pull_request: types: - opened @@ -17,7 +13,7 @@ jobs: - run: echo "$(go env GOPATH)/bin" >> $GITHUB_PATH - run: go install mvdan.cc/sh/v3/cmd/shfmt@latest - run: make lint - test: + unit-test: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/ci-manual.yaml b/.github/workflows/ci-manual.yaml new file mode 100644 index 000000000..6bffb2f8a --- /dev/null +++ b/.github/workflows/ci-manual.yaml @@ -0,0 +1,179 @@ +name: '[CI] Manual' +run-name: "#${{ inputs.pr_number }} - ${{ inputs.uuid }}" +on: + workflow_dispatch: + inputs: + requester: + required: true + type: string + comment_url: + required: true + type: string + uuid: + required: true + type: string + pr_number: + required: true + type: string + git_sha: + required: true + type: string + goal: + required: true + type: choice + default: "test" + options: + - "build" + - "launch" + - "test" +jobs: + setup: + runs-on: ubuntu-latest + outputs: + git_sha_short: ${{ steps.variables.outputs.git_sha_short }} + workflow_run_url: ${{ steps.variables.outputs.workflow_run_url }} + kubernetes_versions: ${{ steps.variables.outputs.kubernetes_versions }} + build_id: ${{ steps.variables.outputs.build_id }} + ci_step_name_prefix: ${{ steps.variables.outputs.ci_step_name_prefix }} + steps: + - uses: actions/checkout@v3 + - id: variables + run: | + echo "git_sha_short=$(echo ${{ inputs.git_sha }} | rev | cut -c-7 | rev)" >> $GITHUB_OUTPUT + echo "workflow_run_url=https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" >> $GITHUB_OUTPUT + echo "kubernetes_versions=$(cat kubernetes-versions.json | jq -c .)" >> $GITHUB_OUTPUT + echo "build_id=ci-${{ inputs.pr_number }}-${{ needs.setup.outputs.git_sha_short }}-${{ inputs.uuid }}" >> $GITHUB_OUTPUT + echo 'ci_step_name_prefix=CI:' >> $GITHUB_OUTPUT + notify-start: + runs-on: ubuntu-latest + needs: + - setup + steps: + - uses: actions/github-script@v6 + with: + script: | + github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: ${{ inputs.pr_number }}, + body: `@${{ inputs.requester }} roger [that](${{ inputs.comment_url }})! I've dispatched a [workflow](${{ needs.setup.outputs.workflow_run_url }}). 👍` + }); + kubernetes-versions: + runs-on: ubuntu-latest + name: ${{ matrix.k8s_version }} + needs: + - setup + - notify-start + permissions: + id-token: write + contents: read + strategy: + # don't bail out of all sub-tasks if one fails + fail-fast: false + matrix: + k8s_version: ${{ fromJson(needs.setup.outputs.kubernetes_versions) }} + steps: + - uses: actions/checkout@v3 + with: + ref: 'master' + - uses: aws-actions/configure-aws-credentials@v2 + with: + aws-region: ${{ secrets.AWS_REGION }} + role-to-assume: ${{ secrets.AWS_ROLE_ARN_CI }} + # 2.5 hours (job usually completes within 2 hours) + role-duration-seconds: 9000 + - name: "${{ needs.setup.outputs.ci_step_name_prefix }} Build" + id: build + uses: ./.github/actions/ci/build + with: + git_sha: ${{ inputs.git_sha }} + k8s_version: ${{ matrix.k8s_version }} + build_id: ${{ needs.setup.outputs.build_id }} + - if: ${{ inputs.goal == 'launch' || inputs.goal == 'test' }} + name: "${{ needs.setup.outputs.ci_step_name_prefix }} Launch" + id: launch + uses: ./.github/actions/ci/launch + with: + ami_id: ${{ steps.build.outputs.ami_id }} + k8s_version: ${{ matrix.k8s_version }} + build_id: ${{ needs.setup.outputs.build_id }} + aws_region: ${{ secrets.AWS_REGION }} + - if: ${{ inputs.goal == 'test' }} + name: "${{ needs.setup.outputs.ci_step_name_prefix }} Test" + id: sonobuoy + uses: ./.github/actions/ci/sonobuoy + with: + cluster_name: ${{ steps.launch.outputs.cluster_name }} + notify-outcome: + if: ${{ always() }} + runs-on: ubuntu-latest + needs: + - setup + - kubernetes-versions + steps: + - uses: actions/github-script@v6 + with: + script: | + const { data } = await github.rest.actions.listJobsForWorkflowRun({ + owner: context.repo.owner, + repo: context.repo.repo, + run_id: context.runId + }); + const conclusionEmojis = { + "success": "✅", + "skipped": "⏭️", + "failure": "❌", + "cancelled": "🚮" + }; + const uniqueStepNames = new Set(); + const stepConclusionsByK8sVersion = new Map(); + const ciStepNamePrefix = "${{ needs.setup.outputs.ci_step_name_prefix }}"; + for (const job of data.jobs) { + if (/\d+\.\d+/.test(job.name)) { + const k8sVersion = job.name; + for (const step of job.steps) { + if (step.name.startsWith(ciStepNamePrefix)) { + const stepName = step.name.substring(ciStepNamePrefix.length).trim(); + let stepConclusions = stepConclusionsByK8sVersion.get(k8sVersion); + if (!stepConclusions) { + stepConclusions = new Map(); + stepConclusionsByK8sVersion.set(k8sVersion, stepConclusions); + } + stepConclusions.set(stepName, step.conclusion); + uniqueStepNames.add(stepName); + } + } + } + } + const headers = [{ + data: 'Kubernetes version', + header: true + }]; + for (const stepName of uniqueStepNames.values()) { + headers.push({ + data: stepName, + header: true + }); + } + const rows = []; + for (const stepConclusionsForK8sVersion of [...stepConclusionsByK8sVersion.entries()].sort()) { + const k8sVersion = stepConclusionsForK8sVersion[0]; + const row = [k8sVersion]; + for (const step of stepConclusionsForK8sVersion[1].entries()) { + row.push(`${step[1]} ${conclusionEmojis[step[1]]}`); + } + rows.push(row); + } + const commentBody = core.summary + .addRaw("@${{ inputs.requester }} the workflow that you requested has completed. 🎉") + .addTable([ + headers, + ...rows, + ]) + .stringify(); + github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: ${{ inputs.pr_number }}, + body: commentBody + }); \ No newline at end of file diff --git a/.github/workflows/janitor.yaml b/.github/workflows/janitor.yaml new file mode 100644 index 000000000..5fa30d879 --- /dev/null +++ b/.github/workflows/janitor.yaml @@ -0,0 +1,34 @@ +name: "Janitor" +on: + workflow_dispatch: + schedule: + # hourly at the top of the hour + - cron: "0 * * * *" +permissions: + id-token: write + contents: read +jobs: + cluster-sweeper: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: aws-actions/configure-aws-credentials@v2 + with: + aws-region: ${{ secrets.AWS_REGION }} + role-to-assume: ${{ secrets.AWS_ROLE_ARN_JANITOR }} + - uses: ./.github/actions/janitor/cluster-sweeper + with: + # 3 hours + max_age_seconds: 10800 + ami-sweeper: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: aws-actions/configure-aws-credentials@v2 + with: + aws-region: ${{ secrets.AWS_REGION }} + role-to-assume: ${{ secrets.AWS_ROLE_ARN_JANITOR }} + - uses: ./.github/actions/janitor/ami-sweeper + with: + # 3 days + max_age_seconds: 259200 diff --git a/kubernetes-versions.json b/kubernetes-versions.json new file mode 100644 index 000000000..fc7449f8d --- /dev/null +++ b/kubernetes-versions.json @@ -0,0 +1,7 @@ +[ + "1.23", + "1.24", + "1.25", + "1.26", + "1.27" +] \ No newline at end of file From 7e3dc9b2f2c291131f832a2bc7727d2b7b49e094 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 25 Aug 2023 14:23:50 -0700 Subject: [PATCH 519/621] Disable janitor in forks (#1407) --- .github/workflows/janitor.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/janitor.yaml b/.github/workflows/janitor.yaml index 5fa30d879..47fec1059 100644 --- a/.github/workflows/janitor.yaml +++ b/.github/workflows/janitor.yaml @@ -9,6 +9,8 @@ permissions: contents: read jobs: cluster-sweeper: + # disable in forks + if: github.repository == 'awslabs/amazon-eks-ami' runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 @@ -21,6 +23,8 @@ jobs: # 3 hours max_age_seconds: 10800 ami-sweeper: + # disable in forks + if: github.repository == 'awslabs/amazon-eks-ami' runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 From 93203d9a3c9d73c8639011b10bd8d7791b302865 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 25 Aug 2023 17:25:13 -0700 Subject: [PATCH 520/621] Add note about bot authorization (#1406) --- .github/actions/bot/index.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/actions/bot/index.js b/.github/actions/bot/index.js index d9e67ba35..76134bd9b 100644 --- a/.github/actions/bot/index.js +++ b/.github/actions/bot/index.js @@ -10,6 +10,8 @@ async function bot(core, github, context, uuid) { } console.log("Comment found in payload"); + // user's org membership must be public for the author_association to be MEMBER + // go to the org's member page, find yourself, and set the visibility to public const author = payload.comment.user.login; const authorized = ["OWNER", "MEMBER"].includes(payload.comment.author_association); if (!authorized) { @@ -152,4 +154,4 @@ module.exports = async (core, github, context, uuid) => { bot(core, github, context, uuid).catch((error) => { core.setFailed(error); }); -} \ No newline at end of file +} From e0a53727a4a2d2daeaa96bc7a756b76e31b17ab9 Mon Sep 17 00:00:00 2001 From: Andrew Johnstone Date: Tue, 29 Aug 2023 18:59:32 +0100 Subject: [PATCH 521/621] noproxy for direct communication to apiserver and timeouts of 3 seconds (#1393) --- log-collector-script/linux/eks-log-collector.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index c8e2048ae..cbd454fcb 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -534,7 +534,7 @@ get_networking_info() { CA_CRT=$(grep certificate-authority: "${COLLECT_DIR}"/kubelet/kubeconfig.yaml | sed 's/.*certificate-authority: //') for i in $(seq 5); do echo -e "curling ${API_SERVER} ($i of 5) $(date --utc +%FT%T.%3N%Z)\n\n" >> ${COLLECT_DIR}"/networking/curl_api_server.txt" - timeout 75 curl -v --cacert "${CA_CRT}" "${API_SERVER}"/livez?verbose >> ${COLLECT_DIR}"/networking/curl_api_server.txt" 2>&1 + timeout 75 curl -v --connect-timeout 3 --max-time 10 --noproxy '*' --cacert "${CA_CRT}" "${API_SERVER}"/livez?verbose >> ${COLLECT_DIR}"/networking/curl_api_server.txt" 2>&1 done fi From b882d0aeac94968b5d4f6c37affef73e2a194d18 Mon Sep 17 00:00:00 2001 From: Vela WU <50354807+wwvela@users.noreply.github.com> Date: Tue, 29 Aug 2023 17:13:01 -0700 Subject: [PATCH 522/621] Update CHANGELOG.md for 20230825 AMI release (#1408) * Update CHANGELOG.md for 20230825 AMI release --------- Co-authored-by: Vela WU <50354807+FerrelWallis@users.noreply.github.com> --- CHANGELOG.md | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a9dee708..70e47ca80 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,56 @@ # Changelog +### AMI Release v20230825 +* amazon-eks-gpu-node-1.27-v20230825 +* amazon-eks-gpu-node-1.26-v20230825 +* amazon-eks-gpu-node-1.25-v20230825 +* amazon-eks-gpu-node-1.24-v20230825 +* amazon-eks-gpu-node-1.23-v20230825 +* amazon-eks-arm64-node-1.27-v20230825 +* amazon-eks-arm64-node-1.26-v20230825 +* amazon-eks-arm64-node-1.25-v20230825 +* amazon-eks-arm64-node-1.24-v20230825 +* amazon-eks-arm64-node-1.23-v20230825 +* amazon-eks-node-1.27-v20230825 +* amazon-eks-node-1.26-v20230825 +* amazon-eks-node-1.25-v20230825 +* amazon-eks-node-1.24-v20230825 +* amazon-eks-node-1.23-v20230825 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.27.4-20230825` +* `1.26.7-20230825` +* `1.25.12-20230825` +* `1.24.16-20230825` +* `1.23.17-20230825` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.27.4/2023-08-16/ +* s3://amazon-eks/1.26.7/2023-08-16/ +* s3://amazon-eks/1.25.12/2023-08-16/ +* s3://amazon-eks/1.24.16/2023-08-16/ +* s3://amazon-eks/1.23.17/2023-08-16/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.253-167.359.amzn2 + * Kubernetes 1.24 and above: 5.10.186-179.751.amzn2 + * **Note** that the GPU AMI will continue to use kernel-5.4 as we work to address a [compatibility issue](https://github.com/awslabs/amazon-eks-ami/issues/1222) with `nvidia-driver-latest-dkms`. +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.3 +* `runc`: 1.1.7-3.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.2.1478.0-1 + +Notable changes: + - containerd updated to address: + - [ALAS2DOCKER-2023-029](https://alas.aws.amazon.com/AL2/ALASDOCKER-2023-029.html) + - runc updated to address: + - [ALAS2DOCKER-2023-028](https://alas.aws.amazon.com/AL2/ALASDOCKER-2023-028.html) + - Fetch new IMDS token for every request. ([#1395](https://github.com/awslabs/amazon-eks-ami/pull/1395)) + ### AMI Release v20230816 * amazon-eks-gpu-node-1.27-v20230816 * amazon-eks-gpu-node-1.26-v20230816 From 477759acc7632c15f01a7f6b23b881fa002d2a9e Mon Sep 17 00:00:00 2001 From: Raghvendra Singh <90425886+raghs-aws@users.noreply.github.com> Date: Wed, 30 Aug 2023 12:56:36 -0500 Subject: [PATCH 523/621] Allow --reserved-cpus kubelet arg to be used (#1405) --- files/bootstrap.sh | 14 ++++- test/cases/reserved-cpus-kubelet-arg.sh | 73 +++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 2 deletions(-) create mode 100755 test/cases/reserved-cpus-kubelet-arg.sh diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 7d2ce8098..f9b0fafd8 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -222,6 +222,13 @@ ENABLE_LOCAL_OUTPOST="${ENABLE_LOCAL_OUTPOST:-}" CLUSTER_ID="${CLUSTER_ID:-}" LOCAL_DISKS="${LOCAL_DISKS:-}" +##allow --reserved-cpus options via kubelet arg directly. Disable default reserved cgroup option in such cases +USE_RESERVED_CGROUPS=true +if [[ ${KUBELET_EXTRA_ARGS} == *'--reserved-cpus'* ]]; then + USE_RESERVED_CGROUPS=false + log "INFO: --kubelet-extra-args includes --reserved-cpus, so kube/system-reserved cgroups will not be used." +fi + if [[ ! -z ${LOCAL_DISKS} ]]; then setup-local-disks "${LOCAL_DISKS}" fi @@ -565,8 +572,11 @@ if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then sudo sed -i s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g /etc/eks/containerd/containerd-config.toml echo "$(jq '.cgroupDriver="systemd"' "${KUBELET_CONFIG}")" > "${KUBELET_CONFIG}" - echo "$(jq '.systemReservedCgroup="/system"' "${KUBELET_CONFIG}")" > "${KUBELET_CONFIG}" - echo "$(jq '.kubeReservedCgroup="/runtime"' "${KUBELET_CONFIG}")" > "${KUBELET_CONFIG}" + ##allow --reserved-cpus options via kubelet arg directly. Disable default reserved cgroup option in such cases + if [[ "${USE_RESERVED_CGROUPS}" = true ]]; then + echo "$(jq '.systemReservedCgroup="/system"' "${KUBELET_CONFIG}")" > "${KUBELET_CONFIG}" + echo "$(jq '.kubeReservedCgroup="/runtime"' "${KUBELET_CONFIG}")" > "${KUBELET_CONFIG}" + fi # Check if the containerd config file is the same as the one used in the image build. # If different, then restart containerd w/ proper config diff --git a/test/cases/reserved-cpus-kubelet-arg.sh b/test/cases/reserved-cpus-kubelet-arg.sh new file mode 100755 index 000000000..2002b7060 --- /dev/null +++ b/test/cases/reserved-cpus-kubelet-arg.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "-> Should not set systemReservedCgroup and kubeReservedCgroup when --reserved-cpus is set with containerd" +exit_code=0 +export KUBELET_VERSION=v1.24.15-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --kubelet-extra-args '--node-labels=cnf=cnf1 --reserved-cpus=0-3 --cpu-manager-policy=static' \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +KUBELET_CONFIG=/etc/kubernetes/kubelet/kubelet-config.json +if grep -q systemReservedCgroup ${KUBELET_CONFIG}; then + echo "❌ Test Failed: expected systemReservedCgroup to be absent in ${KUBELET_CONFIG}.Found: $(grep systemReservedCgroup ${KUBELET_CONFIG})" + exit 1 +fi + +if grep -q kubeReservedCgroup ${KUBELET_CONFIG}; then + echo "❌ Test Failed: expected kubeReservedCgroup to be absent ${KUBELET_CONFIG}.Found: $(grep kubeReservedCgroup ${KUBELET_CONFIG})" + exit 1 +fi + +echo "-> Should set systemReservedCgroup and kubeReservedCgroup when --reserved-cpus is not set with containerd" +exit_code=0 +export KUBELET_VERSION=v1.24.15-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +if ! $(grep -q systemReservedCgroup ${KUBELET_CONFIG}); then + echo "❌ Test Failed: expected systemReservedCgroup to be present in ${KUBELET_CONFIG}. Found: $(grep systemReservedCgroup ${KUBELET_CONFIG})" + exit 1 +fi + +if ! $(grep -q kubeReservedCgroup ${KUBELET_CONFIG}); then + echo "❌ Test Failed: expected kubeReservedCgroup to be present ${KUBELET_CONFIG}.Found: $(grep kubeReservedCgroup ${KUBELET_CONFIG})" + exit 1 +fi + +echo "-> Should set systemReservedCgroup and kubeReservedCgroup when --reserved-cpus is set with dockerd" +exit_code=0 +export KUBELET_VERSION=v1.23.15-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +if ! $(grep -q systemReservedCgroup ${KUBELET_CONFIG}); then + echo "❌ Test Failed: expected systemReservedCgroup to be present in ${KUBELET_CONFIG}.Found: $(grep systemReservedCgroup ${KUBELET_CONFIG})" + exit 1 +fi + +if ! $(grep -q kubeReservedCgroup ${KUBELET_CONFIG}); then + echo "❌ Test Failed: expected kubeReservedCgroup to be present ${KUBELET_CONFIG}.Found: $(grep kubeReservedCgroup ${KUBELET_CONFIG})" + exit 1 +fi From b07e59a3cd8461448ad8ea23e01a95a1fa9cfc91 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 31 Aug 2023 10:22:21 -0700 Subject: [PATCH 524/621] Install kernel-headers, kernel-devel (#1302) --- scripts/install-worker.sh | 2 +- scripts/upgrade_kernel.sh | 2 ++ scripts/validate.sh | 23 +++++++++++++++-------- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index bc5ac8e9d..ab209d37d 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -88,7 +88,7 @@ else sudo yum install -y curl fi -sudo yum versionlock kernel-$(uname -r) +sudo yum versionlock kernel-$(uname -r) kernel-headers-$(uname -r) kernel-devel-$(uname -r) # Remove the ec2-net-utils package, if it's installed. This package interferes with the route setup on the instance. if yum list installed | grep ec2-net-utils; then sudo yum remove ec2-net-utils -y -q; fi diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh index 52d696056..4242aad4f 100755 --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -19,6 +19,8 @@ else sudo amazon-linux-extras install -y "kernel-${KERNEL_VERSION}" fi +sudo yum install -y kernel-headers kernel-devel + # enable pressure stall information sudo grubby \ --update-kernel=ALL \ diff --git a/scripts/validate.sh b/scripts/validate.sh index da6a31627..42da83266 100644 --- a/scripts/validate.sh +++ b/scripts/validate.sh @@ -1,13 +1,9 @@ #!/usr/bin/env bash -# -# Do basic validation of the generated AMI -# Validates that a file or blob doesn't exist -# -# Arguments: -# a file name or blob -# Returns: -# 1 if a file exists, after printing an error +set -o nounset +set -o errexit +set -o pipefail + validate_file_nonexists() { local file_blob=$1 for f in $file_blob; do @@ -90,3 +86,14 @@ for ENTRY in "${REQUIRED_COMMANDS[@]}"; do done echo "Required commands were found: ${REQUIRED_COMMANDS[*]}" + +REQUIRED_FREE_MEBIBYTES=1024 +TOTAL_MEBIBYTES=$(df -m / | tail -n1 | awk '{print $2}') +FREE_MEBIBYTES=$(df -m / | tail -n1 | awk '{print $4}') +echo "Disk space in mebibytes (required/free/total): ${REQUIRED_FREE_MEBIBYTES}/${FREE_MEBIBYTES}/${TOTAL_MEBIBYTES}" +if [ ${FREE_MEBIBYTES} -lt ${REQUIRED_FREE_MEBIBYTES} ]; then + echo "Disk space requirements not met!" + exit 1 +else + echo "Disk space requirements were met." +fi From da0c7f5ea836998894e0e3ab57a991b133d12769 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 31 Aug 2023 11:50:58 -0700 Subject: [PATCH 525/621] Handle eventually-consistent PrivateDnsName (#1383) --- files/bin/private-dns-name | 44 +++++ files/bootstrap.sh | 7 +- test/cases/private-dns-name.sh | 31 ++++ test/mocks/aws | 19 ++- .../i-1234567890abcdef0.json | 154 ++++++++++++++++++ 5 files changed, 247 insertions(+), 8 deletions(-) create mode 100755 files/bin/private-dns-name create mode 100755 test/cases/private-dns-name.sh create mode 100644 test/mocks/describe-instances/i-1234567890abcdef0.json diff --git a/files/bin/private-dns-name b/files/bin/private-dns-name new file mode 100755 index 000000000..f8ce371d8 --- /dev/null +++ b/files/bin/private-dns-name @@ -0,0 +1,44 @@ +#!/usr/bin/env bash + +set -o errexit +set -o nounset +set -o xtrace + +# Retrieves the PrivateDnsName from EC2 for this instance, waiting until +# it is available if necessary (due to eventual consistency). + +function log { + echo >&2 "$(date '+%Y-%m-%dT%H:%M:%S%z')" "[private-dns-name]" "$@" +} + +INSTANCE_ID=$(imds /latest/meta-data/instance-id) + +# the AWS CLI currently constructs the wrong endpoint URL on localzones (the availability zone group will be used instead of the parent region) +# more info: https://github.com/aws/aws-cli/issues/7043 +REGION=$(imds /latest/meta-data/placement/region) + +# by default, wait for 120 seconds +PRIVATE_DNS_NAME_MAX_ATTEMPTS=${PRIVATE_DNS_NAME_MAX_ATTEMPTS:-20} +PRIVATE_DNS_NAME_ATTEMPT_INTERVAL=${PRIVATE_DNS_NAME_ATTEMPT_INTERVAL:-6} + +log "will make up to ${PRIVATE_DNS_NAME_MAX_ATTEMPTS} attempt(s) every ${PRIVATE_DNS_NAME_ATTEMPT_INTERVAL} second(s)" + +ATTEMPT=0 +while true; do + PRIVATE_DNS_NAME=$(aws ec2 describe-instances --region $REGION --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].PrivateDnsName') + if [ ! "${PRIVATE_DNS_NAME}" = "" ] || [ ${ATTEMPT} -ge ${PRIVATE_DNS_NAME_MAX_ATTEMPTS} ]; then + break + fi + ATTEMPT=$((ATTEMPT + 1)) + log "WARN: PrivateDnsName is not available, waiting for ${PRIVATE_DNS_NAME_ATTEMPT_INTERVAL} seconds..." + sleep ${PRIVATE_DNS_NAME_ATTEMPT_INTERVAL} +done + +if [ "${PRIVATE_DNS_NAME}" = "" ]; then + log "ERROR: failed to retrieve PrivateDnsName after ${ATTEMPT} attempts!" + exit 1 +else + log "INFO: retrieved PrivateDnsName: ${PRIVATE_DNS_NAME}" + echo "${PRIVATE_DNS_NAME}" + exit 0 +fi diff --git a/files/bootstrap.sh b/files/bootstrap.sh index f9b0fafd8..60dd9006b 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -538,12 +538,7 @@ else # If the VPC has a custom `domain-name` in its DHCP options set, and the VPC has `enableDnsHostnames` set to `true`, # then /etc/hostname is not the same as EC2's PrivateDnsName. # The name of the Node object must be equal to EC2's PrivateDnsName for the aws-iam-authenticator to allow this kubelet to manage it. - INSTANCE_ID=$(imds /latest/meta-data/instance-id) - # the AWS CLI currently constructs the wrong endpoint URL on localzones (the availability zone group will be used instead of the parent region) - # more info: https://github.com/aws/aws-cli/issues/7043 - REGION=$(imds /latest/meta-data/placement/region) - PRIVATE_DNS_NAME=$(AWS_RETRY_MODE=standard AWS_MAX_ATTEMPTS=10 aws ec2 describe-instances --region $REGION --instance-ids $INSTANCE_ID --query 'Reservations[].Instances[].PrivateDnsName' --output text) - KUBELET_ARGS="$KUBELET_ARGS --hostname-override=$PRIVATE_DNS_NAME" + KUBELET_ARGS="$KUBELET_ARGS --hostname-override=$(private-dns-name)" fi KUBELET_ARGS="$KUBELET_ARGS --cloud-provider=$KUBELET_CLOUD_PROVIDER" diff --git a/test/cases/private-dns-name.sh b/test/cases/private-dns-name.sh new file mode 100755 index 000000000..c49246b49 --- /dev/null +++ b/test/cases/private-dns-name.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +set -o nounset +set -o errexit +set -o pipefail + +echo "--> Should fetch PrivateDnsName correctly" +EXPECTED_PRIVATE_DNS_NAME="ip-10-0-0-157.us-east-2.compute.internal" +PRIVATE_DNS_NAME=$(private-dns-name) +if [ ! "$PRIVATE_DNS_NAME" = "$EXPECTED_PRIVATE_DNS_NAME" ]; then + echo "❌ Test Failed: expected private-dns-name=$EXPECTED_PRIVATE_DNS_NAME but got '${PRIVATE_DNS_NAME}'" + exit 1 +fi + +echo "--> Should try to fetch PrivateDnsName until timeout is reached" +export PRIVATE_DNS_NAME_ATTEMPT_INTERVAL=3 +export PRIVATE_DNS_NAME_MAX_ATTEMPTS=2 +export AWS_MOCK_FAIL=true +START_TIME=$(date '+%s') +EXIT_CODE=0 +private-dns-name || EXIT_CODE=$? +STOP_TIME=$(date '+%s') +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code" + exit 1 +fi +ELAPSED_TIME=$((STOP_TIME - START_TIME)) +if [[ "$ELAPSED_TIME" -lt 6 ]]; then + echo "❌ Test Failed: expected 6 seconds to elapse, but got: $ELAPSED_TIME" + exit 1 +fi diff --git a/test/mocks/aws b/test/mocks/aws index da5f00b50..78126330d 100755 --- a/test/mocks/aws +++ b/test/mocks/aws @@ -7,15 +7,30 @@ SCRIPTPATH="$( echo >&2 "mocking 'aws $@'" -if [[ $1 == "ec2" ]]; then +AWS_MOCK_FAIL=${AWS_MOCK_FAIL:-false} +if [ "$AWS_MOCK_FAIL" = "true" ]; then + echo >&2 "failing mocked 'aws $@'" + exit 1 +fi +if [[ $1 == "ec2" ]]; then if [[ $2 == "describe-instance-types" ]]; then instance_type=$(echo "${@}" | grep -o '[a-z]\+[0-9]\+[a-z]*\.[0-9a-z]\+' | tr '.' '-') if [[ -f "${SCRIPTPATH}/describe-instance-types/${instance_type}.json" ]]; then cat "${SCRIPTPATH}/describe-instance-types/${instance_type}.json" exit 0 fi - echo "instance type not found" + echo >&2 "instance type not found" + exit 1 + fi + if [[ $2 == "describe-instances" ]]; then + instance_id=$(echo "${@}" | grep -o 'i\-[a-z0-9]\+') + echo >&2 "instance-id: $instance_id" + if [[ -f "${SCRIPTPATH}/describe-instances/${instance_id}.json" ]]; then + cat "${SCRIPTPATH}/describe-instances/${instance_id}.json" + exit 0 + fi + echo >&2 "instance not found" exit 1 fi fi diff --git a/test/mocks/describe-instances/i-1234567890abcdef0.json b/test/mocks/describe-instances/i-1234567890abcdef0.json new file mode 100644 index 000000000..da64601da --- /dev/null +++ b/test/mocks/describe-instances/i-1234567890abcdef0.json @@ -0,0 +1,154 @@ +{ + "Reservations": [ + { + "Groups": [], + "Instances": [ + { + "AmiLaunchIndex": 0, + "ImageId": "ami-0abcdef1234567890", + "InstanceId": "i-1234567890abcdef0", + "InstanceType": "t3.nano", + "KeyName": "my-key-pair", + "LaunchTime": "2022-11-15T10:48:59+00:00", + "Monitoring": { + "State": "disabled" + }, + "Placement": { + "AvailabilityZone": "us-east-2a", + "GroupName": "", + "Tenancy": "default" + }, + "PrivateDnsName": "ip-10-0-0-157.us-east-2.compute.internal", + "PrivateIpAddress": "10-0-0-157", + "ProductCodes": [], + "PublicDnsName": "ec2-34-253-223-13.us-east-2.compute.amazonaws.com", + "PublicIpAddress": "34.253.223.13", + "State": { + "Code": 16, + "Name": "running" + }, + "StateTransitionReason": "", + "SubnetId": "subnet-04a636d18e83cfacb", + "VpcId": "vpc-1234567890abcdef0", + "Architecture": "x86_64", + "BlockDeviceMappings": [ + { + "DeviceName": "/dev/xvda", + "Ebs": { + "AttachTime": "2022-11-15T10:49:00+00:00", + "DeleteOnTermination": true, + "Status": "attached", + "VolumeId": "vol-02e6ccdca7de29cf2" + } + } + ], + "ClientToken": "1234abcd-1234-abcd-1234-d46a8903e9bc", + "EbsOptimized": true, + "EnaSupport": true, + "Hypervisor": "xen", + "IamInstanceProfile": { + "Arn": "arn:aws:iam::111111111111:instance-profile/AmazonSSMRoleForInstancesQuickSetup", + "Id": "111111111111111111111" + }, + "NetworkInterfaces": [ + { + "Association": { + "IpOwnerId": "amazon", + "PublicDnsName": "ec2-34-253-223-13.us-east-2.compute.amazonaws.com", + "PublicIp": "34.253.223.13" + }, + "Attachment": { + "AttachTime": "2022-11-15T10:48:59+00:00", + "AttachmentId": "eni-attach-1234567890abcdefg", + "DeleteOnTermination": true, + "DeviceIndex": 0, + "Status": "attached", + "NetworkCardIndex": 0 + }, + "Description": "", + "Groups": [ + { + "GroupName": "launch-wizard-146", + "GroupId": "sg-1234567890abcdefg" + } + ], + "Ipv6Addresses": [], + "MacAddress": "00:11:22:33:44:55", + "NetworkInterfaceId": "eni-1234567890abcdefg", + "OwnerId": "104024344472", + "PrivateDnsName": "ip-10-0-0-157.us-east-2.compute.internal", + "PrivateIpAddress": "10-0-0-157", + "PrivateIpAddresses": [ + { + "Association": { + "IpOwnerId": "amazon", + "PublicDnsName": "ec2-34-253-223-13.us-east-2.compute.amazonaws.com", + "PublicIp": "34.253.223.13" + }, + "Primary": true, + "PrivateDnsName": "ip-10-0-0-157.us-east-2.compute.internal", + "PrivateIpAddress": "10-0-0-157" + } + ], + "SourceDestCheck": true, + "Status": "in-use", + "SubnetId": "subnet-1234567890abcdefg", + "VpcId": "vpc-1234567890abcdefg", + "InterfaceType": "interface" + } + ], + "RootDeviceName": "/dev/xvda", + "RootDeviceType": "ebs", + "SecurityGroups": [ + { + "GroupName": "launch-wizard-146", + "GroupId": "sg-1234567890abcdefg" + } + ], + "SourceDestCheck": true, + "Tags": [ + { + "Key": "Name", + "Value": "my-instance" + } + ], + "VirtualizationType": "hvm", + "CpuOptions": { + "CoreCount": 1, + "ThreadsPerCore": 2 + }, + "CapacityReservationSpecification": { + "CapacityReservationPreference": "open" + }, + "HibernationOptions": { + "Configured": false + }, + "MetadataOptions": { + "State": "applied", + "HttpTokens": "optional", + "HttpPutResponseHopLimit": 1, + "HttpEndpoint": "enabled", + "HttpProtocolIpv6": "disabled", + "InstanceMetadataTags": "enabled" + }, + "EnclaveOptions": { + "Enabled": false + }, + "PlatformDetails": "Linux/UNIX", + "UsageOperation": "RunInstances", + "UsageOperationUpdateTime": "2022-11-15T10:48:59+00:00", + "PrivateDnsNameOptions": { + "HostnameType": "ip-name", + "EnableResourceNameDnsARecord": true, + "EnableResourceNameDnsAAAARecord": false + }, + "MaintenanceOptions": { + "AutoRecovery": "default" + } + } + ], + "OwnerId": "111111111111", + "ReservationId": "r-1234567890abcdefg" + } + ] +} From 7c168c0e473a504f4a485651d45fd8f0fa5a8c1f Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 31 Aug 2023 16:17:12 -0700 Subject: [PATCH 526/621] Add .git-commit to archivebuild (#1411) --- .gitignore | 1 + ArchiveBuildConfig.yaml | 1 + build-tools/bin/archivebuild-wrapper | 6 ++++++ 3 files changed, 8 insertions(+) create mode 100755 build-tools/bin/archivebuild-wrapper diff --git a/.gitignore b/.gitignore index 1be3dc826..12527754f 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ *version-info.json .DS_Store site/ +.git-commit diff --git a/ArchiveBuildConfig.yaml b/ArchiveBuildConfig.yaml index ba146715d..d7a4de238 100644 --- a/ArchiveBuildConfig.yaml +++ b/ArchiveBuildConfig.yaml @@ -14,6 +14,7 @@ dependencies: - src: Makefile - src: eks-worker-al2.json - src: eks-worker-al2-variables.json + - src: .git-commit archive: name: amazon-eks-ami.tar.gz type: tgz diff --git a/build-tools/bin/archivebuild-wrapper b/build-tools/bin/archivebuild-wrapper new file mode 100755 index 000000000..cc08866f4 --- /dev/null +++ b/build-tools/bin/archivebuild-wrapper @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +# This file is for Amazon internal build processes + +git rev-parse HEAD > .git-commit +archivebuild From 45a7f306c21397fa1af0e4c7ce48df936cde6168 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 1 Sep 2023 09:37:21 -0700 Subject: [PATCH 527/621] Use archivebuild-wrapper system (#1413) --- Config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Config b/Config index 51a60f0b5..42acc08d9 100644 --- a/Config +++ b/Config @@ -15,7 +15,7 @@ package.Amazon-eks-ami-mirror = { network-access = blocked; }; - build-system = archivebuild; + build-system = archivebuild-wrapper; build-tools = { 1.0 = { ArchiveBuild = 1.0; From 83facb67174ad3780a0409ac29d6a6a13223b517 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 1 Sep 2023 14:28:52 -0700 Subject: [PATCH 528/621] Discover .git-commit from environment (#1418) --- build-tools/bin/archivebuild-wrapper | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/build-tools/bin/archivebuild-wrapper b/build-tools/bin/archivebuild-wrapper index cc08866f4..ba86f6f0f 100755 --- a/build-tools/bin/archivebuild-wrapper +++ b/build-tools/bin/archivebuild-wrapper @@ -2,5 +2,12 @@ # This file is for Amazon internal build processes -git rev-parse HEAD > .git-commit +HEAD_COMMIT="${BRAZIL_PACKAGE_CHANGE_ID:-$(git rev-parse HEAD)}" + +if [ "${HEAD_COMMIT}" = "" ]; then + echo >&2 "could not determine HEAD commit" + exit 1 +fi + +echo "${HEAD_COMMIT}" > .git-commit archivebuild From c36daaf90d680d0a438ef3f5c2b31bfe82289fbc Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 7 Sep 2023 18:42:19 -0700 Subject: [PATCH 529/621] Update eni-max-pods.txt (#1423) Co-authored-by: GitHub --- files/eni-max-pods.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index efe2ab6d7..7f1b98136 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -668,6 +668,14 @@ r7gd.8xlarge 234 r7gd.large 29 r7gd.medium 8 r7gd.xlarge 58 +r7iz.12xlarge 234 +r7iz.16xlarge 737 +r7iz.2xlarge 58 +r7iz.32xlarge 737 +r7iz.4xlarge 234 +r7iz.8xlarge 234 +r7iz.large 29 +r7iz.xlarge 58 t1.micro 4 t2.2xlarge 44 t2.large 35 From 524a5666cb0e4b64efb60c6e25356d269d415137 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 11 Sep 2023 18:34:16 -0700 Subject: [PATCH 530/621] Update eni-max-pods.txt (#1424) Co-authored-by: GitHub --- files/eni-max-pods.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 7f1b98136..e7c30e8c8 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -651,6 +651,17 @@ r6in.8xlarge 234 r6in.large 29 r6in.metal 345 r6in.xlarge 58 +r7a.12xlarge 234 +r7a.16xlarge 737 +r7a.24xlarge 737 +r7a.2xlarge 58 +r7a.32xlarge 737 +r7a.48xlarge 737 +r7a.4xlarge 234 +r7a.8xlarge 234 +r7a.large 29 +r7a.medium 8 +r7a.xlarge 58 r7g.12xlarge 234 r7g.16xlarge 737 r7g.2xlarge 58 From 34ea9405fb9995eac896d7870ac7c197322ea2a2 Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Mon, 11 Sep 2023 22:01:53 -0700 Subject: [PATCH 531/621] Require builder instance to use IMDSv2 (#1422) --- eks-worker-al2.json | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index fe2d3df24..3111f7dd5 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -110,7 +110,10 @@ "ssm_agent_version": "{{ user `ssm_agent_version`}}" }, "ami_name": "{{user `ami_name`}}", - "ami_description": "{{ user `ami_description` }}, {{ user `ami_component_description` }}" + "ami_description": "{{ user `ami_description` }}, {{ user `ami_component_description` }}", + "metadata_options": { + "http_tokens": "required" + } } ], "provisioners": [ From 20ce145b066a374740effe696063e7ff15743f5b Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 12 Sep 2023 13:41:31 -0700 Subject: [PATCH 532/621] Add release note config (#1426) --- .github/release.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .github/release.yaml diff --git a/.github/release.yaml b/.github/release.yaml new file mode 100644 index 000000000..5fbdeeba5 --- /dev/null +++ b/.github/release.yaml @@ -0,0 +1,5 @@ +--- +changelog: + exclude: + labels: + - "changelog/exclude" From a32f4dd68d75c68dc9035001dc0ff1238dd1f255 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 14 Sep 2023 17:38:26 -0700 Subject: [PATCH 533/621] Update eni-max-pods.txt (#1429) Co-authored-by: GitHub --- files/eni-max-pods.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index e7c30e8c8..d10d53b25 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -192,6 +192,15 @@ c7gn.8xlarge 234 c7gn.large 29 c7gn.medium 8 c7gn.xlarge 58 +c7i.12xlarge 234 +c7i.16xlarge 737 +c7i.24xlarge 737 +c7i.2xlarge 58 +c7i.48xlarge 737 +c7i.4xlarge 234 +c7i.8xlarge 234 +c7i.large 29 +c7i.xlarge 58 cr1.8xlarge 234 d2.2xlarge 58 d2.4xlarge 234 From 4462356ecb2ffe8d8710f656fe855e5b025145fc Mon Sep 17 00:00:00 2001 From: Nick Baker Date: Mon, 18 Sep 2023 15:39:56 -0700 Subject: [PATCH 534/621] Use 2023-09-14 binaries, add 1.28 target (#1431) --- Makefile | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 8c39b126a..6794af6e7 100644 --- a/Makefile +++ b/Makefile @@ -113,23 +113,27 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI .PHONY: 1.23 1.23: ## Build EKS Optimized AL2 AMI - K8s 1.23 - $(MAKE) k8s kubernetes_version=1.23.17 kubernetes_build_date=2023-08-16 + $(MAKE) k8s kubernetes_version=1.23.17 kubernetes_build_date=2023-09-14 .PHONY: 1.24 1.24: ## Build EKS Optimized AL2 AMI - K8s 1.24 - $(MAKE) k8s kubernetes_version=1.24.16 kubernetes_build_date=2023-08-16 + $(MAKE) k8s kubernetes_version=1.24.17 kubernetes_build_date=2023-09-14 .PHONY: 1.25 1.25: ## Build EKS Optimized AL2 AMI - K8s 1.25 - $(MAKE) k8s kubernetes_version=1.25.12 kubernetes_build_date=2023-08-16 + $(MAKE) k8s kubernetes_version=1.25.13 kubernetes_build_date=2023-09-14 .PHONY: 1.26 1.26: ## Build EKS Optimized AL2 AMI - K8s 1.26 - $(MAKE) k8s kubernetes_version=1.26.7 kubernetes_build_date=2023-08-16 + $(MAKE) k8s kubernetes_version=1.26.8 kubernetes_build_date=2023-09-14 .PHONY: 1.27 1.27: ## Build EKS Optimized AL2 AMI - K8s 1.27 - $(MAKE) k8s kubernetes_version=1.27.4 kubernetes_build_date=2023-08-16 + $(MAKE) k8s kubernetes_version=1.27.5 kubernetes_build_date=2023-09-14 + +.PHONY: 1.28 +1.28: ## Build EKS Optimized AL2 AMI - K8s 1.28 + $(MAKE) k8s kubernetes_version=1.28.1 kubernetes_build_date=2023-09-14 .PHONY: lint-docs lint-docs: ## Lint the docs From 3b3487d896454e217e2d4e2c1d3ef83c4a90a4a3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 18 Sep 2023 21:18:37 -0700 Subject: [PATCH 535/621] Update eni-max-pods.txt (#1432) Co-authored-by: GitHub --- files/eni-max-pods.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index d10d53b25..e32dbddcd 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -508,6 +508,7 @@ m7i.8xlarge 234 m7i.large 29 m7i.xlarge 58 mac1.metal 234 +mac2-m2pro.metal 234 mac2.metal 234 p2.16xlarge 234 p2.8xlarge 234 From d028dee038a2a0a00de2c232c34f22391737633b Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 20 Sep 2023 21:46:21 -0700 Subject: [PATCH 536/621] Set pid_max to 4194304 (#1434) --- scripts/install-worker.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index ab209d37d..5ee1fae5f 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -519,6 +519,7 @@ EOF echo fs.inotify.max_user_watches=524288 | sudo tee -a /etc/sysctl.conf echo fs.inotify.max_user_instances=8192 | sudo tee -a /etc/sysctl.conf echo vm.max_map_count=524288 | sudo tee -a /etc/sysctl.conf +echo 'kernel.pid_max=4194304' | sudo tee -a /etc/sysctl.conf ################################################################################ ### adding log-collector-script ################################################ From 2b5e6eea3695747780434c5d47750fc27a7685c1 Mon Sep 17 00:00:00 2001 From: ddl-retornam <56278673+ddl-retornam@users.noreply.github.com> Date: Wed, 20 Sep 2023 23:47:03 -0700 Subject: [PATCH 537/621] Install nerdctl (#1321) --- scripts/install-worker.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 5ee1fae5f..92f8c2ef5 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -189,6 +189,12 @@ net.bridge.bridge-nf-call-iptables = 1 net.ipv4.ip_forward = 1 EOF +############################################################################### +### Nerdctl setup ############################################################# +############################################################################### + +sudo yum install -y nerdctl + ################################################################################ ### Docker ##################################################################### ################################################################################ From 6f68d5926d2117945627e73219ea7e9f53bf4a19 Mon Sep 17 00:00:00 2001 From: Nick Baker Date: Fri, 22 Sep 2023 17:21:06 -0700 Subject: [PATCH 538/621] Update CHANGELOG.md for 20230919 AMI release (#1439) * Update CHANGELOG.md for 20230919 AMI release Co-authored-by: Carter --------- Co-authored-by: Carter --- CHANGELOG.md | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 70e47ca80..f2c889bf4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,67 @@ # Changelog +### AMI Release v20230919 +* amazon-eks-gpu-node-1.28-v20230919 +* amazon-eks-gpu-node-1.27-v20230919 +* amazon-eks-gpu-node-1.26-v20230919 +* amazon-eks-gpu-node-1.25-v20230919 +* amazon-eks-gpu-node-1.24-v20230919 +* amazon-eks-gpu-node-1.23-v20230919 +* amazon-eks-arm64-node-1.28-v20230919 +* amazon-eks-arm64-node-1.27-v20230919 +* amazon-eks-arm64-node-1.26-v20230919 +* amazon-eks-arm64-node-1.25-v20230919 +* amazon-eks-arm64-node-1.24-v20230919 +* amazon-eks-arm64-node-1.23-v20230919 +* amazon-eks-node-1.28-v20230919 +* amazon-eks-node-1.27-v20230919 +* amazon-eks-node-1.26-v20230919 +* amazon-eks-node-1.25-v20230919 +* amazon-eks-node-1.24-v20230919 +* amazon-eks-node-1.23-v20230919 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.28.1-20230919` +* `1.27.5-20230919` +* `1.26.8-20230919` +* `1.25.13-20230919` +* `1.24.17-20230919` +* `1.23.17-20230919` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.28.1/20230914/ +* s3://amazon-eks/1.27.5/20230914/ +* s3://amazon-eks/1.26.8/20230914/ +* s3://amazon-eks/1.25.13/20230914/ +* s3://amazon-eks/1.24.17/20230914/ +* s3://amazon-eks/1.23.17/20230914/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.254-170.358.amzn2 + * Kubernetes 1.24 and above: 5.10.192-183.736.amzn2 + * **Note** that the GPU AMI on Kubernetes 1.27 and below will continue to use kernel-5.4 due to a [compatibility issue](https://github.com/awslabs/amazon-eks-ami/issues/1222) with `nvidia-driver-latest-dkms`. +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.3 +* `runc`: 1.1.7-3.amzn2 +* `cuda`: 12.2.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.2.1542.0-1 + +Notable changes: + - kernel-5.10 updated to address: + - [ALAS2KERNEL-5.10-2023-039](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-039.html) + - Add support for Kubernetes 1.28 ([#1431](https://github.com/awslabs/amazon-eks-ami/pull/1431)) + - GPU AMI: + - Released with [Neuron version 2.14.0](https://awsdocs-neuron.readthedocs-hosted.com/en/latest/release-notes/index.html#neuron-2-14-0-09-15-2023) + - GPU AMIs on Kubernetes 1.28 and above: + - Upgraded `kernel` to 5.10 + - Upgraded `cuda` version to 12.2 + - Upgraded Nvidia driver to 535.54.03-1 + - [Installed EFA version 1.26.1](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa-start.html#efa-start-enable) + - Limited deeper [sleep states](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/processor_state_control.html) + ### AMI Release v20230825 * amazon-eks-gpu-node-1.27-v20230825 * amazon-eks-gpu-node-1.26-v20230825 From 5d5db2f0eb99b9851ca064cad89460401caa9072 Mon Sep 17 00:00:00 2001 From: Nick Baker Date: Fri, 22 Sep 2023 20:51:37 -0700 Subject: [PATCH 539/621] bump latest Kubernetes build target version (#1440) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6794af6e7..96712f6b0 100644 --- a/Makefile +++ b/Makefile @@ -54,7 +54,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: latest -latest: 1.27 ## Build EKS Optimized AL2 AMI with the latest supported version of Kubernetes +latest: 1.28 ## Build EKS Optimized AL2 AMI with the latest supported version of Kubernetes # ensure that these flags are equivalent to the rules in the .editorconfig SHFMT_FLAGS := --list \ From be7bc10e9e6b434bb31fe8aa9c9351eee104dd0d Mon Sep 17 00:00:00 2001 From: Bryant Biggs Date: Wed, 27 Sep 2023 18:37:49 -0400 Subject: [PATCH 540/621] fix: Tag cached image with the ECR URI for the target region (#1442) --- scripts/install-worker.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 92f8c2ef5..b7ae53680 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -441,6 +441,7 @@ if [[ "$CACHE_CONTAINER_IMAGES" == "true" ]] && ! [[ ${ISOLATED_REGIONS} =~ $BIN ${VPC_CNI_IMGS[@]+"${VPC_CNI_IMGS[@]}"} ) PULLED_IMGS=() + REGIONS=$(aws ec2 describe-regions --all-regions --output text --query 'Regions[].[RegionName]') for img in "${CACHE_IMGS[@]}"; do ## only kube-proxy-minimal is vended for K8s 1.24+ @@ -465,9 +466,10 @@ if [[ "$CACHE_CONTAINER_IMAGES" == "true" ]] && ! [[ ${ISOLATED_REGIONS} =~ $BIN done #### Tag the pulled down image for all other regions in the partition - for region in $(aws ec2 describe-regions --all-regions | jq -r '.Regions[] .RegionName'); do + for REGION in "${REGIONS[@]}"; do for img in "${PULLED_IMGS[@]}"; do - regional_img="${img/$BINARY_BUCKET_REGION/$region}" + region_uri=$(/etc/eks/get-ecr-uri.sh "${region}" "${AWS_DOMAIN}") + regional_img="${img/$ECR_URI/$region_uri}" sudo ctr -n k8s.io image tag "${img}" "${regional_img}" || : ## Tag ECR fips endpoint for supported regions if [[ "${region}" =~ (us-east-1|us-east-2|us-west-1|us-west-2|us-gov-east-1|us-gov-east-2) ]]; then From 4cc2ed2d87fca33b54aef1eca60b8d2c0162e59b Mon Sep 17 00:00:00 2001 From: Sichaow Date: Fri, 29 Sep 2023 13:09:57 -0700 Subject: [PATCH 541/621] Add H100 into gpu clock (#1447) --- files/bootstrap.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 60dd9006b..dcd69ee5d 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -658,6 +658,8 @@ if command -v nvidia-smi &> /dev/null; then nvidia-smi -ac 5001,1590 elif [[ $GPUNAME == *"M60"* ]]; then nvidia-smi -ac 2505,1177 + elif [[ $GPUNAME == *"H100"* ]]; then + nvidia-smi -ac 2619,1980 else echo "unsupported gpu" fi From 371a2344f58d2a0527e53bab72e7ff419341015e Mon Sep 17 00:00:00 2001 From: Laxmi Soumya Josyula <42261978+ljosyula@users.noreply.github.com> Date: Mon, 2 Oct 2023 12:38:17 -0700 Subject: [PATCH 542/621] bug: incorrect region variable name (#1449) Co-authored-by: ljosyula --- scripts/install-worker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index b7ae53680..16e3ddda1 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -466,7 +466,7 @@ if [[ "$CACHE_CONTAINER_IMAGES" == "true" ]] && ! [[ ${ISOLATED_REGIONS} =~ $BIN done #### Tag the pulled down image for all other regions in the partition - for REGION in "${REGIONS[@]}"; do + for region in "${REGIONS[@]}"; do for img in "${PULLED_IMGS[@]}"; do region_uri=$(/etc/eks/get-ecr-uri.sh "${region}" "${AWS_DOMAIN}") regional_img="${img/$ECR_URI/$region_uri}" From a92c481419ff7a109732080605877a95df854267 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 4 Oct 2023 18:21:02 -0700 Subject: [PATCH 543/621] Update eni-max-pods.txt (#1452) Co-authored-by: GitHub --- files/eni-max-pods.txt | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index e32dbddcd..e31f53c1d 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -167,6 +167,18 @@ c6in.8xlarge 234 c6in.large 29 c6in.metal 345 c6in.xlarge 58 +c7a.12xlarge 234 +c7a.16xlarge 737 +c7a.24xlarge 737 +c7a.2xlarge 58 +c7a.32xlarge 737 +c7a.48xlarge 737 +c7a.4xlarge 234 +c7a.8xlarge 234 +c7a.large 29 +c7a.medium 8 +c7a.metal-48xl 737 +c7a.xlarge 58 c7g.12xlarge 234 c7g.16xlarge 737 c7g.2xlarge 58 @@ -220,8 +232,6 @@ dl1.24xlarge 737 f1.16xlarge 394 f1.2xlarge 58 f1.4xlarge 234 -g2.2xlarge 58 -g2.8xlarge 234 g3.16xlarge 737 g3.4xlarge 234 g3.8xlarge 234 @@ -671,6 +681,7 @@ r7a.4xlarge 234 r7a.8xlarge 234 r7a.large 29 r7a.medium 8 +r7a.metal-48xl 737 r7a.xlarge 58 r7g.12xlarge 234 r7g.16xlarge 737 From 79c0b643345f21c778d1f477144386c523f954b4 Mon Sep 17 00:00:00 2001 From: Laxmi Soumya Josyula <42261978+ljosyula@users.noreply.github.com> Date: Thu, 5 Oct 2023 17:52:03 -0700 Subject: [PATCH 544/621] Update CHANGELOG.md for 20231002 AMI release (#1456) Co-authored-by: ljosyula --- CHANGELOG.md | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f2c889bf4..b8efc787d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,58 @@ # Changelog +### AMI Release v20231002 +* amazon-eks-gpu-node-1.28-v20231002 +* amazon-eks-gpu-node-1.27-v20231002 +* amazon-eks-gpu-node-1.26-v20231002 +* amazon-eks-gpu-node-1.25-v20231002 +* amazon-eks-gpu-node-1.24-v20231002 +* amazon-eks-gpu-node-1.23-v20231002 +* amazon-eks-arm64-node-1.28-v20231002 +* amazon-eks-arm64-node-1.27-v20231002 +* amazon-eks-arm64-node-1.26-v20231002 +* amazon-eks-arm64-node-1.25-v20231002 +* amazon-eks-arm64-node-1.24-v20231002 +* amazon-eks-arm64-node-1.23-v20231002 +* amazon-eks-node-1.28-v20231002 +* amazon-eks-node-1.27-v20231002 +* amazon-eks-node-1.26-v20231002 +* amazon-eks-node-1.25-v20231002 +* amazon-eks-node-1.24-v20231002 +* amazon-eks-node-1.23-v20231002 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.28.1-20231002` +* `1.27.5-20231002` +* `1.26.8-20231002` +* `1.25.13-20231002` +* `1.24.17-20231002` +* `1.23.17-20231002` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.28.1/20230914/ +* s3://amazon-eks/1.27.5/20230914/ +* s3://amazon-eks/1.26.8/20230914/ +* s3://amazon-eks/1.25.13/20230914/ +* s3://amazon-eks/1.24.17/20230914/ +* s3://amazon-eks/1.23.17/20230914/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.254-170.358.amzn2 + * Kubernetes 1.24 and above: 5.10.192-183.736.amzn2 + * **Note** that the GPU AMI on Kubernetes 1.27 and below will continue to use kernel-5.4 as we work to address a [compatibility issue](https://github.com/awslabs/amazon-eks-ami/issues/1222) with `nvidia-driver-latest-dkms`. +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.3 +* `runc`: 1.1.7-3.amzn2 +* `cuda`: 12.2.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.2.1630.0-1 + +Notable changes: + - SSM agent upgraded to `3.2.1630.0-1` + - Update `libssh2` for [ALAS-2023-2257](https://alas.aws.amazon.com/AL2/ALAS-2023-2257.html) + ### AMI Release v20230919 * amazon-eks-gpu-node-1.28-v20230919 * amazon-eks-gpu-node-1.27-v20230919 From 1dc843d13c927acabbf68cfbe50a20c8a724bec4 Mon Sep 17 00:00:00 2001 From: Carter Date: Sat, 7 Oct 2023 00:25:23 -0700 Subject: [PATCH 545/621] Build with latest binaries by default (#1391) --- Makefile | 22 ++++++----- doc/USER_GUIDE.md | 82 +++++++++++++++++++++++------------------ hack/latest-binaries.sh | 26 +++++++++++++ 3 files changed, 84 insertions(+), 46 deletions(-) create mode 100755 hack/latest-binaries.sh diff --git a/Makefile b/Makefile index 96712f6b0..869e606d1 100644 --- a/Makefile +++ b/Makefile @@ -53,8 +53,12 @@ T_GREEN := \e[0;32m T_YELLOW := \e[0;33m T_RESET := \e[0m -.PHONY: latest -latest: 1.28 ## Build EKS Optimized AL2 AMI with the latest supported version of Kubernetes +# default to the latest supported Kubernetes version +k8s=1.28 + +.PHONY: build +build: ## Build EKS Optimized AL2 AMI + $(MAKE) k8s $(shell hack/latest-binaries.sh $(k8s)) # ensure that these flags are equivalent to the rules in the .editorconfig SHFMT_FLAGS := --list \ @@ -109,31 +113,29 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI @echo "$(T_GREEN)Building AMI for version $(T_YELLOW)$(kubernetes_version)$(T_GREEN) on $(T_YELLOW)$(arch)$(T_RESET)" $(PACKER_BINARY) build -timestamp-ui -color=false $(PACKER_VAR_FLAGS) $(PACKER_TEMPLATE_FILE) -# Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html - .PHONY: 1.23 1.23: ## Build EKS Optimized AL2 AMI - K8s 1.23 - $(MAKE) k8s kubernetes_version=1.23.17 kubernetes_build_date=2023-09-14 + $(MAKE) k8s $(shell hack/latest-binaries.sh 1.23) .PHONY: 1.24 1.24: ## Build EKS Optimized AL2 AMI - K8s 1.24 - $(MAKE) k8s kubernetes_version=1.24.17 kubernetes_build_date=2023-09-14 + $(MAKE) k8s $(shell hack/latest-binaries.sh 1.24) .PHONY: 1.25 1.25: ## Build EKS Optimized AL2 AMI - K8s 1.25 - $(MAKE) k8s kubernetes_version=1.25.13 kubernetes_build_date=2023-09-14 + $(MAKE) k8s $(shell hack/latest-binaries.sh 1.25) .PHONY: 1.26 1.26: ## Build EKS Optimized AL2 AMI - K8s 1.26 - $(MAKE) k8s kubernetes_version=1.26.8 kubernetes_build_date=2023-09-14 + $(MAKE) k8s $(shell hack/latest-binaries.sh 1.26) .PHONY: 1.27 1.27: ## Build EKS Optimized AL2 AMI - K8s 1.27 - $(MAKE) k8s kubernetes_version=1.27.5 kubernetes_build_date=2023-09-14 + $(MAKE) k8s $(shell hack/latest-binaries.sh 1.27) .PHONY: 1.28 1.28: ## Build EKS Optimized AL2 AMI - K8s 1.28 - $(MAKE) k8s kubernetes_version=1.28.1 kubernetes_build_date=2023-09-14 + $(MAKE) k8s $(shell hack/latest-binaries.sh 1.28) .PHONY: lint-docs lint-docs: ## Lint the docs diff --git a/doc/USER_GUIDE.md b/doc/USER_GUIDE.md index 9f90bd7ec..a46d58df2 100644 --- a/doc/USER_GUIDE.md +++ b/doc/USER_GUIDE.md @@ -66,50 +66,68 @@ Users have the following options for specifying their own values: --- -## Building against other versions of Kubernetes binaries -To build an Amazon EKS Worker AMI with other versions of Kubernetes that are not listed above run the following AWS Command -Line Interface (AWS CLI) commands to obtain values for KUBERNETES_VERSION, KUBERNETES_BUILD_DATE, PLATFORM, ARCH from S3 +## Choosing Kubernetes binaries + +When building the AMI, binaries such as `kubelet`, `aws-iam-authenticator`, and `ecr-credential-provider` are installed. + +### Using the latest binaries + +It is recommended that the latest available binaries are used, as they may contain important fixes for bugs or security issues. +The latest binaries can be discovered with the following script: +```bash +hack/latest-binaries.sh $KUBERNETES_MINOR_VERSION +``` +This script will return the values for the binary-related AMI template variables, for example: +```bash +> hack/latest-binaries.sh 1.28 + +kubernetes_version=1.28.1 kubernetes_build_date=2023-10-01 +``` + +### Using a specific version of the binaries + +Use the following commands to obtain values for the binary-related AMI template variables: ```bash -#List of all avalable Kuberenets Versions: -aws s3 ls s3://amazon-eks -KUBERNETES_VERSION=1.23.9 # Chose a version and set the variable +# List Kubernetes versions +aws s3 ls s3://amazon-eks + +# List build dates +aws s3 ls s3://amazon-eks/1.23.9/ -#List of all builds for the specified Kubernetes Version: -aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/ -KUBERNETES_BUILD_DATE=2022-07-27 # Chose a date and set the variable +# List platforms +aws s3 ls s3://amazon-eks/1.23.9/2022-07-27/bin/ -#List of all platforms available for the selected Kubernetes Version and build date -aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/ -PLATFORM=linux # Chose a platform and set the variable +# List architectures +aws s3 ls s3://amazon-eks/1.23.9/2022-07-27/bin/linux/ -#List of all architectures for the selected Kubernetes Version, build date and platform -aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/linux/ -ARCH=x86_64 #Chose an architecture and set the variable +# List binaries +aws s3 ls s3://amazon-eks/1.23.9/2022-07-27/bin/linux/x86_64/ ``` -Run the following command to build an Amazon EKS Worker AMI based on the chosen parameters in the previous step + +To build using the example binaries above: ```bash make k8s \ - kubernetes_version=$KUBERNETES_VERSION \ - kubernetes_build_date=$KUBERNETES_BUILD_DATE \ - arch=$ARCH + kubernetes_version=1.23.9 \ + kubernetes_build_date=2022-07-27 \ + arch=x86_64 ``` ---- +### Providing your own binaries -## Providing your own Kubernetes Binaries +By default, binaries are downloaded from the public S3 bucket `amazon-eks` in `us-west-2`. +You can instead provide your own version of Kubernetes binaries. -By default, binaries are downloaded from the Amazon EKS public Amazon Simple Storage Service (Amazon S3) -bucket amazon-eks in us-west-2. You can instead choose to provide your own version of Kubernetes binaries to be used. To use your own binaries +To use your own binaries: -1. Copy the binaries to your own S3 bucket using the AWS CLI. Here is an example that uses Kubelet binary +1. Copy all of the necessary binaries to your own S3 bucket using the AWS CLI. For example: ```bash - aws s3 cp kubelet s3://my-custom-bucket/kubernetes_version/kubernetes_build_date/bin/linux/arch/kubelet + aws s3 cp kubelet s3://$BUCKET/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/linux/$ARCH/kubelet ``` -**Note**: Replace my-custom-bucket, amazon-eks, kubernetes_version, kubernetes_build_date, and arch with your values. -**Important**: You must provide all the binaries listed in the default amazon-eks bucket for a specific kubernetes_version, kubernetes_build_date, and arch combination. These binaries must be accessible through AWS Identity and Access Management (IAM) credentials configured in the Install and configure HashiCorp Packer section. +**Important**: You must provide all the binaries present in the default `amazon-eks` bucket for a specific `KUBERNETES_VERSION`, `KUBERNETES_BUILD_DATE`, and `ARCH` combination. +These binaries must be accessible using the credentials on the Packer builder EC2 instance. -2. Run the following command to start the build process to use your own Kubernetes binaries +2. Run the following command to start the build process to use your own Kubernetes binaries: ```bash make k8s \ binary_bucket_name=my-custom-bucket \ @@ -119,14 +137,6 @@ make k8s \ ``` **Note**: Confirm that the binary_bucket_name, binary_bucket_region, kubernetes_version, and kubernetes_build_date parameters match the path to your binaries in Amazon S3. -The Makefile runs Packer with the `eks-worker-al2.json` build specification -template and the [amazon-ebs](https://www.packer.io/docs/builders/amazon-ebs.html) -builder. An instance is launched and the Packer [Shell -Provisioner](https://www.packer.io/docs/provisioners/shell.html) runs the -`install-worker.sh` script on the instance to install software and perform other -necessary configuration tasks. Then, Packer creates an AMI from the instance -and terminates the instance after the AMI is created. - --- ## Container Image Caching diff --git a/hack/latest-binaries.sh b/hack/latest-binaries.sh new file mode 100755 index 000000000..246fc8dd8 --- /dev/null +++ b/hack/latest-binaries.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +set -o errexit +set -o pipefail +set -o nounset + +if [ "$#" -ne 1 ]; then + echo "usage: $0 KUBERNETES_MINOR_VERSION" + exit 1 +fi + +MINOR_VERSION="${1}" + +# retrieve the available "VERSION/BUILD_DATE" prefixes (e.g. "1.28.1/2023-09-14") +# from the binary object keys, sorted in descending semver order, and pick the first one +LATEST_BINARIES=$(aws s3api list-objects-v2 --bucket amazon-eks --prefix "${MINOR_VERSION}" --query 'Contents[*].[Key]' --output text | cut -d'/' -f-2 | sort -Vru | head -n1) + +if [ "${LATEST_BINARIES}" == "None" ]; then + echo >&2 "No binaries available for minor version: ${MINOR_VERSION}" + exit 1 +fi + +LATEST_VERSION=$(echo "${LATEST_BINARIES}" | cut -d'/' -f1) +LATEST_BUILD_DATE=$(echo "${LATEST_BINARIES}" | cut -d'/' -f2) + +echo "kubernetes_version=${LATEST_VERSION} kubernetes_build_date=${LATEST_BUILD_DATE}" From fae39c4d024cc49352d181d8208b60dca15ade18 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 10 Oct 2023 11:05:06 -0700 Subject: [PATCH 546/621] Fix region in cached image names (#1461) --- scripts/install-worker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 16e3ddda1..d239b9859 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -466,7 +466,7 @@ if [[ "$CACHE_CONTAINER_IMAGES" == "true" ]] && ! [[ ${ISOLATED_REGIONS} =~ $BIN done #### Tag the pulled down image for all other regions in the partition - for region in "${REGIONS[@]}"; do + for region in ${REGIONS[*]}; do for img in "${PULLED_IMGS[@]}"; do region_uri=$(/etc/eks/get-ecr-uri.sh "${region}" "${AWS_DOMAIN}") regional_img="${img/$ECR_URI/$region_uri}" From 1c9e032897501f16fc23c68f3dfa4c4d027854fa Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 10 Oct 2023 14:44:19 -0700 Subject: [PATCH 547/621] Add 1.28 to CI (#1464) --- kubernetes-versions.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kubernetes-versions.json b/kubernetes-versions.json index fc7449f8d..97526a4ba 100644 --- a/kubernetes-versions.json +++ b/kubernetes-versions.json @@ -3,5 +3,6 @@ "1.24", "1.25", "1.26", - "1.27" -] \ No newline at end of file + "1.27", + "1.28" +] From 1f04e2630388ed0ee1be38e9edf9a7698a1e54e8 Mon Sep 17 00:00:00 2001 From: Alex Schultz Date: Wed, 11 Oct 2023 00:00:23 -0600 Subject: [PATCH 548/621] Add optional FIPS support (#1458) --- Makefile | 13 ++++++++----- doc/USER_GUIDE.md | 1 + eks-worker-al2-variables.json | 1 + eks-worker-al2.json | 18 ++++++++++++++++-- files/get-ecr-uri.sh | 8 +++++++- scripts/enable-fips.sh | 10 ++++++++++ scripts/upgrade_kernel.sh | 2 -- 7 files changed, 43 insertions(+), 10 deletions(-) create mode 100755 scripts/enable-fips.sh diff --git a/Makefile b/Makefile index 869e606d1..f7c64c618 100644 --- a/Makefile +++ b/Makefile @@ -26,19 +26,22 @@ ifeq ($(call vercmp,$(kubernetes_version),gteq,1.25.0), true) ami_component_description ?= (k8s: {{ user `kubernetes_version` }}, containerd: {{ user `containerd_version` }}) endif -OS= +AMI_VERSION ?= v$(shell date '+%Y%m%d') +AMI_VARIANT ?= amazon-eks ifneq (,$(findstring al2023, $(PACKER_TEMPLATE_FILE))) - OS=-al2023 + AMI_VARIANT := $(AMI_VARIANT)-al2023 endif - arch ?= x86_64 ifeq ($(arch), arm64) instance_type ?= m6g.large - ami_name ?= amazon-eks-arm64-node$(OS)-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') + AMI_VARIANT := $(AMI_VARIANT)-arm64 else instance_type ?= m5.large - ami_name ?= amazon-eks-node$(OS)-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') endif +ifeq ($(enable_fips), true) + AMI_VARIANT := $(AMI_VARIANT)-fips +endif +ami_name ?= $(AMI_VARIANT)-node-$(K8S_VERSION_MINOR)-$(AMI_VERSION) ifeq ($(aws_region), cn-northwest-1) source_ami_owners ?= 141808717104 diff --git a/doc/USER_GUIDE.md b/doc/USER_GUIDE.md index a46d58df2..24b2575dc 100644 --- a/doc/USER_GUIDE.md +++ b/doc/USER_GUIDE.md @@ -41,6 +41,7 @@ Users have the following options for specifying their own values: | `creator` | ```{{env `USER`}}``` | | | `docker_version` | ```20.10.23-1.amzn2.0.1``` | | | `encrypted` | ```false``` | | +| `enable_fips` | ```false``` | Install openssl and enable fips related kernel parameters | | `instance_type` | *None* | | | `kernel_version` | `""` | | | `kms_key_id` | `""` | | diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index 267fd6c82..2ff4df904 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -16,6 +16,7 @@ "containerd_version": "1.6.*", "creator": "{{env `USER`}}", "docker_version": "20.10.23-1.amzn2.0.1", + "enable_fips": "false", "encrypted": "false", "kernel_version": "", "kms_key_id": "", diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 3111f7dd5..91d99b07b 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -21,6 +21,7 @@ "creator": null, "docker_version": null, "encrypted": null, + "enable_fips": null, "instance_type": null, "kernel_version": null, "kms_key_id": null, @@ -152,14 +153,27 @@ { "type": "shell", "remote_folder": "{{ user `remote_folder`}}", - "expect_disconnect": true, - "pause_after": "90s", "script": "{{template_dir}}/scripts/upgrade_kernel.sh", "environment_vars": [ "KUBERNETES_VERSION={{user `kubernetes_version`}}", "KERNEL_VERSION={{user `kernel_version`}}" ] }, + { + "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", + "script": "{{template_dir}}/scripts/enable-fips.sh", + "environment_vars": [ + "ENABLE_FIPS={{user `enable_fips`}}" + ] + }, + { + "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", + "inline": ["sudo reboot"], + "expect_disconnect": true, + "pause_after": "90s" + }, { "type": "shell", "remote_folder": "{{ user `remote_folder`}}", diff --git a/files/get-ecr-uri.sh b/files/get-ecr-uri.sh index ba719ac06..56278ab8d 100755 --- a/files/get-ecr-uri.sh +++ b/files/get-ecr-uri.sh @@ -69,4 +69,10 @@ else esac fi -echo "${acct}.dkr.ecr.${region}.${aws_domain}" +AWS_ECR_SUBDOMAIN="ecr" +# if FIPS is enabled on the machine, use the FIPS endpoint. +if [[ "$(sysctl -n crypto.fips_enabled)" == 1 ]]; then + AWS_ECR_SUBDOMAIN="ecr-fips" +fi + +echo "${acct}.dkr.${AWS_ECR_SUBDOMAIN}.${region}.${aws_domain}" diff --git a/scripts/enable-fips.sh b/scripts/enable-fips.sh new file mode 100755 index 000000000..399ab6b26 --- /dev/null +++ b/scripts/enable-fips.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# https://aws.amazon.com/blogs/publicsector/enabling-fips-mode-amazon-linux-2/ +if [[ "$ENABLE_FIPS" == "true" ]]; then + # install and enable fips modules + sudo yum install -y dracut-fips openssl + sudo dracut -f + + # enable fips in the boot command + sudo /sbin/grubby --update-kernel=ALL --args="fips=1" +fi diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh index 4242aad4f..24071ea96 100755 --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -31,5 +31,3 @@ sudo grubby \ sudo grubby \ --update-kernel=ALL \ --args="clocksource=tsc tsc=reliable" - -sudo reboot From a9898217774c0fa2365c910562bb473b98224eca Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Wed, 11 Oct 2023 08:02:13 +0200 Subject: [PATCH 549/621] Set remote_folder on all shell provisioners (#1462) --- eks-worker-al2.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 91d99b07b..306e31dbe 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -120,6 +120,7 @@ "provisioners": [ { "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", "inline": [ "mkdir -p {{user `working_dir`}}", "mkdir -p {{user `working_dir`}}/log-collector-script" @@ -145,6 +146,7 @@ }, { "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", "inline": [ "sudo chmod -R a+x {{user `working_dir`}}/bin/", "sudo mv {{user `working_dir`}}/bin/* /usr/bin/" @@ -235,6 +237,7 @@ }, { "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", "inline": [ "rm -rf {{user `working_dir`}}" ] From 4835c67b996fc362fe698033abfe1736382e7b37 Mon Sep 17 00:00:00 2001 From: Nick Baker Date: Tue, 10 Oct 2023 23:51:54 -0700 Subject: [PATCH 550/621] Pull eksctl supported versions for CI (#1465) * remove kubernetes versions file and use eksctl supported version list * recognize compression Co-authored-by: Carter --------- Co-authored-by: Carter --- .github/workflows/ci-manual.yaml | 7 +++++-- kubernetes-versions.json | 8 -------- 2 files changed, 5 insertions(+), 10 deletions(-) delete mode 100644 kubernetes-versions.json diff --git a/.github/workflows/ci-manual.yaml b/.github/workflows/ci-manual.yaml index 6bffb2f8a..9d840747e 100644 --- a/.github/workflows/ci-manual.yaml +++ b/.github/workflows/ci-manual.yaml @@ -36,14 +36,17 @@ jobs: build_id: ${{ steps.variables.outputs.build_id }} ci_step_name_prefix: ${{ steps.variables.outputs.ci_step_name_prefix }} steps: - - uses: actions/checkout@v3 - id: variables run: | echo "git_sha_short=$(echo ${{ inputs.git_sha }} | rev | cut -c-7 | rev)" >> $GITHUB_OUTPUT echo "workflow_run_url=https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" >> $GITHUB_OUTPUT - echo "kubernetes_versions=$(cat kubernetes-versions.json | jq -c .)" >> $GITHUB_OUTPUT + # grab supported versions directly from eksctl + wget --no-verbose -O eksctl.tar.gz "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz" + tar xzf eksctl.tar.gz && chmod +x ./eksctl + echo "kubernetes_versions=$(./eksctl version --output json | jq -c .EKSServerSupportedVersions)" >> $GITHUB_OUTPUT echo "build_id=ci-${{ inputs.pr_number }}-${{ needs.setup.outputs.git_sha_short }}-${{ inputs.uuid }}" >> $GITHUB_OUTPUT echo 'ci_step_name_prefix=CI:' >> $GITHUB_OUTPUT + notify-start: runs-on: ubuntu-latest needs: diff --git a/kubernetes-versions.json b/kubernetes-versions.json deleted file mode 100644 index 97526a4ba..000000000 --- a/kubernetes-versions.json +++ /dev/null @@ -1,8 +0,0 @@ -[ - "1.23", - "1.24", - "1.25", - "1.26", - "1.27", - "1.28" -] From 5b54e37cde1e8a4c67a3ddaff27c953fc7b7b37f Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 11 Oct 2023 13:27:53 -0700 Subject: [PATCH 551/621] Add CHANGELOG entry placeholder (#1466) --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b8efc787d..d36228ebf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ # Changelog + + ### AMI Release v20231002 * amazon-eks-gpu-node-1.28-v20231002 * amazon-eks-gpu-node-1.27-v20231002 From 915ce2222e692a4d7b5904ab020d3c28e1e3e511 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 12 Oct 2023 10:02:40 -0700 Subject: [PATCH 552/621] Add named arguments to bot commands (#1463) --- .github/actions/bot/README.md | 11 ++++- .github/actions/bot/index.js | 74 ++++++++++++++++++++++++---- .github/actions/ci/build/action.yaml | 5 +- .github/workflows/ci-manual.yaml | 6 ++- 4 files changed, 84 insertions(+), 12 deletions(-) diff --git a/.github/actions/bot/README.md b/.github/actions/bot/README.md index 7b90fb7bd..526846d91 100644 --- a/.github/actions/bot/README.md +++ b/.github/actions/bot/README.md @@ -4,9 +4,18 @@ This GitHub Action parses commands from pull request comments and executes them. Only authorized users (members and owners of this repository) are able to execute commands. -Commands look like: +Commands look like `/COMMAND ARGS`, for example: ``` /echo hello world ``` Multiple commands can be included in a comment, one per line; but each command must be unique. + +Some commands accept additional, named arguments specified on subsequent lines. +Named arguments look like `+NAME ARGS`, for example: +``` +/ci launch ++build cache_container_images=true +``` + +Multiple named arguments can be specified. \ No newline at end of file diff --git a/.github/actions/bot/index.js b/.github/actions/bot/index.js index 76134bd9b..c24398f6d 100644 --- a/.github/actions/bot/index.js +++ b/.github/actions/bot/index.js @@ -20,14 +20,22 @@ async function bot(core, github, context, uuid) { } console.log(`Comment author is authorized: ${author}`); - const commands = parseCommands(uuid, payload, payload.comment.body); + let commands; + try { + commands = parseCommands(uuid, payload, payload.comment.body); + } catch (error) { + console.log(error); + const reply = `@${author} I didn't understand [that](${payload.comment.html_url})! 🤔\n\nTake a look at my [logs](${getBotWorkflowURL(payload, context)}).` + replyToCommand(github, payload, reply); + return; + } if (commands.length === 0) { console.log("No commands found in comment body"); return; } const uniqueCommands = [...new Set(commands.map(command => typeof command))]; if (uniqueCommands.length != commands.length) { - console.log("Duplicate commands found in comment body"); + replyToCommand(github, payload, `@${author} you can't use the same command more than once! 🙅`); return; } console.log(commands.length + " command(s) found in comment body"); @@ -35,12 +43,7 @@ async function bot(core, github, context, uuid) { for (const command of commands) { const reply = await command.run(author, github); if (typeof reply === 'string') { - github.rest.issues.createComment({ - owner: payload.repository.owner.login, - repo: payload.repository.name, - issue_number: payload.issue.number, - body: reply - }); + replyToCommand(github, payload, reply); } else if (reply) { console.log(`Command returned: ${reply}`); } else { @@ -49,7 +52,22 @@ async function bot(core, github, context, uuid) { } } -// parseCommands splits the comment body into lines and parses each line as a command. +// replyToCommand creates a comment on the same PR that triggered this workflow +function replyToCommand(github, payload, reply) { + github.rest.issues.createComment({ + owner: payload.repository.owner.login, + repo: payload.repository.name, + issue_number: payload.issue.number, + body: reply + }); +} + +// getBotWorkflowURL returns an HTML URL for this workflow execution of the bot +function getBotWorkflowURL(payload, context) { + return `https://github.com/${payload.repository.owner.login}/${payload.repository.name}/actions/runs/${context.runId}`; +} + +// parseCommands splits the comment body into lines and parses each line as a command or named arguments to the previous command. function parseCommands(uuid, payload, commentBody) { const commands = []; if (!commentBody) { @@ -57,9 +75,25 @@ function parseCommands(uuid, payload, commentBody) { } const lines = commentBody.split(/\r?\n/); for (const line of lines) { + console.log(`Parsing line: ${line}`); const command = parseCommand(uuid, payload, line); if (command) { commands.push(command); + } else { + const namedArguments = parseNamedArguments(line); + if (namedArguments) { + const previousCommand = commands.at(-1); + if (previousCommand) { + if (typeof previousCommand.addNamedArguments === 'function') { + previousCommand.addNamedArguments(namedArguments.name, namedArguments.args); + } else { + throw new Error(`Parsed named arguments but previous command (${previousCommand.constructor.name}) does not support arguments: ${JSON.stringify(namedArguments)}`); + } + } else { + // don't treat this as an error, because the named argument syntax might just be someone '+1'-ing. + console.log(`Parsed named arguments with no previous command: ${JSON.stringify(namedArguments)}`); + } + } } } return commands @@ -89,6 +123,20 @@ function buildCommand(uuid, payload, name, args) { } } +// parseNamedArgument parses a line as named arguments. +// The format of a command is `+NAME ARGS...`. +// Leading and trailing spaces are ignored. +function parseNamedArguments(line) { + const parsed = line.trim().match(/^\+([a-z\-]+)(?:\s+(.+))?$/); + if (parsed) { + return { + name: parsed[1], + args: parsed[2] + } + } + return null; +} + class EchoCommand { constructor(uuid, payload, args) { this.phrase = args ? args : "echo"; @@ -111,6 +159,11 @@ class CICommand { if (args != null && args != "") { this.goal = args; } + this.goal_args = {}; + } + + addNamedArguments(goal, args) { + this.goal_args[goal] = args; } async run(author, github) { @@ -137,6 +190,9 @@ class CICommand { requester: author, comment_url: this.comment_url }; + for (const [goal, args] of Object.entries(this.goal_args)) { + inputs[`${goal}_arguments`] = args; + } console.log(`Dispatching workflow with inputs: ${JSON.stringify(inputs)}`); await github.rest.actions.createWorkflowDispatch({ owner: this.repository_owner, diff --git a/.github/actions/ci/build/action.yaml b/.github/actions/ci/build/action.yaml index f7ad76035..4797df437 100644 --- a/.github/actions/ci/build/action.yaml +++ b/.github/actions/ci/build/action.yaml @@ -9,6 +9,9 @@ inputs: k8s_version: required: true type: string + additional_arguments: + required: false + type: string outputs: ami_id: value: ${{ steps.build.outputs.ami_id }} @@ -22,5 +25,5 @@ runs: shell: bash run: | AMI_NAME="amazon-eks-node-${{ inputs.k8s_version }}-${{ inputs.build_id }}" - make ${{ inputs.k8s_version }} ami_name=${AMI_NAME} + make ${{ inputs.k8s_version }} ami_name=${AMI_NAME} ${{ inputs.additional_arguments }} echo "ami_id=$(jq -r .builds[0].artifact_id "${AMI_NAME}-manifest.json" | cut -d ':' -f 2)" >> $GITHUB_OUTPUT diff --git a/.github/workflows/ci-manual.yaml b/.github/workflows/ci-manual.yaml index 9d840747e..b880875b5 100644 --- a/.github/workflows/ci-manual.yaml +++ b/.github/workflows/ci-manual.yaml @@ -26,6 +26,9 @@ on: - "build" - "launch" - "test" + build_arguments: + required: false + type: string jobs: setup: runs-on: ubuntu-latest @@ -92,6 +95,7 @@ jobs: git_sha: ${{ inputs.git_sha }} k8s_version: ${{ matrix.k8s_version }} build_id: ${{ needs.setup.outputs.build_id }} + additional_arguments: ${{ inputs.build_arguments }} - if: ${{ inputs.goal == 'launch' || inputs.goal == 'test' }} name: "${{ needs.setup.outputs.ci_step_name_prefix }} Launch" id: launch @@ -179,4 +183,4 @@ jobs: repo: context.repo.repo, issue_number: ${{ inputs.pr_number }}, body: commentBody - }); \ No newline at end of file + }); From 14fb76d02475a5d67a1276a067949c15a0edc16b Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 13 Oct 2023 13:58:28 -0700 Subject: [PATCH 553/621] get-ecr-uri.sh falls back to use another region in partition if region unconfigured (#1468) --- files/get-ecr-uri.sh | 49 ++++++++++++++++++++-- test/cases/get-ecr-uri.sh | 85 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+), 4 deletions(-) create mode 100755 test/cases/get-ecr-uri.sh diff --git a/files/get-ecr-uri.sh b/files/get-ecr-uri.sh index 56278ab8d..a160cebcb 100755 --- a/files/get-ecr-uri.sh +++ b/files/get-ecr-uri.sh @@ -39,15 +39,15 @@ else af-south-1) acct="877085696533" ;; - eu-south-1) - acct="590381155156" - ;; ap-southeast-3) acct="296578399912" ;; me-central-1) acct="759879836304" ;; + eu-south-1) + acct="590381155156" + ;; eu-south-2) acct="455263428931" ;; @@ -63,10 +63,51 @@ else il-central-1) acct="066635153087" ;; + # This sections includes all commercial non-opt-in regions, which use + # the same account for ECR pause container images, but still have in-region + # registries. + ap-northeast-1 | \ + ap-northeast-2 | \ + ap-northeast-3 | \ + ap-south-1 | \ + ap-southeast-1 | \ + ap-southeast-2 | \ + ca-central-1 | \ + eu-central-1 | \ + eu-north-1 | \ + eu-west-1 | \ + eu-west-2 | \ + eu-west-3 | \ + sa-east-1 | \ + us-east-1 | \ + us-east-2 | \ + us-west-1 | \ + us-west-2) + acct="602401143452" + ;; + # If the region is not mapped to an account, let's try to choose another region + # in that partition. + us-gov-*) + acct="013241004608" + region="us-gov-west-1" + ;; + cn-*) + acct="961992271922" + region="cn-northwest-1" + ;; + us-iso-*) + acct="725322719131" + region="us-iso-east-1" + ;; + us-isob-*) + acct="187977181151" + region="us-isob-east-1" + ;; *) acct="602401143452" + region="us-west-2" ;; - esac + esac # end region check fi AWS_ECR_SUBDOMAIN="ecr" diff --git a/test/cases/get-ecr-uri.sh b/test/cases/get-ecr-uri.sh new file mode 100755 index 000000000..5b4dd3209 --- /dev/null +++ b/test/cases/get-ecr-uri.sh @@ -0,0 +1,85 @@ +#!/usr/bin/env bash + +set -o nounset +set -o errexit +set -o pipefail + +echo "--> Should use specified account when passed in" +EXPECTED_ECR_URI="999999999999.dkr.ecr.mars-west-1.amazonaws.com.mars" +REGION="mars-west-1" +DOMAIN="amazonaws.com.mars" +ECR_URI=$(/etc/eks/get-ecr-uri.sh "${REGION}" "${DOMAIN}" "999999999999") +if [ ! "$ECR_URI" = "$EXPECTED_ECR_URI" ]; then + echo "❌ Test Failed: expected ecr-uri=$EXPECTED_ECR_URI but got '${ECR_URI}'" + exit 1 +fi + +echo "--> Should use account mapped to the region when set" +EXPECTED_ECR_URI="590381155156.dkr.ecr.eu-south-1.amazonaws.com" +REGION="eu-south-1" +DOMAIN="amazonaws.com" +ECR_URI=$(/etc/eks/get-ecr-uri.sh "${REGION}" "${DOMAIN}") +if [ ! "$ECR_URI" = "$EXPECTED_ECR_URI" ]; then + echo "❌ Test Failed: expected ecr-uri=$EXPECTED_ECR_URI but got '${ECR_URI}'" + exit 1 +fi + +echo "--> Should use non-opt-in account when not opt-in-region" +EXPECTED_ECR_URI="602401143452.dkr.ecr.us-east-2.amazonaws.com" +REGION="us-east-2" +DOMAIN="amazonaws.com" +ECR_URI=$(/etc/eks/get-ecr-uri.sh "${REGION}" "${DOMAIN}") +if [ ! "$ECR_URI" = "$EXPECTED_ECR_URI" ]; then + echo "❌ Test Failed: expected ecr-uri=$EXPECTED_ECR_URI but got '${ECR_URI}'" + exit 1 +fi + +echo "--> Should use us-west-2 account and region when opt-in-region" +EXPECTED_ECR_URI="602401143452.dkr.ecr.us-west-2.amazonaws.com" +REGION="eu-south-100" +DOMAIN="amazonaws.com" +ECR_URI=$(/etc/eks/get-ecr-uri.sh "${REGION}" "${DOMAIN}") +if [ ! "$ECR_URI" = "$EXPECTED_ECR_URI" ]; then + echo "❌ Test Failed: expected ecr-uri=$EXPECTED_ECR_URI but got '${ECR_URI}'" + exit 1 +fi + +echo "--> Should default us-gov-west-1 when unknown amazonaws.com.us-gov region" +EXPECTED_ECR_URI="013241004608.dkr.ecr.us-gov-west-1.amazonaws.com.us-gov" +REGION="us-gov-east-100" +DOMAIN="amazonaws.com.us-gov" +ECR_URI=$(/etc/eks/get-ecr-uri.sh "${REGION}" "${DOMAIN}") +if [ ! "$ECR_URI" = "$EXPECTED_ECR_URI" ]; then + echo "❌ Test Failed: expected ecr-uri=$EXPECTED_ECR_URI but got '${ECR_URI}'" + exit 1 +fi + +echo "--> Should default cn-northwest-1 when unknown amazonaws.com.cn region" +EXPECTED_ECR_URI="961992271922.dkr.ecr.cn-northwest-1.amazonaws.com.cn" +REGION="cn-north-100" +DOMAIN="amazonaws.com.cn" +ECR_URI=$(/etc/eks/get-ecr-uri.sh "${REGION}" "${DOMAIN}") +if [ ! "$ECR_URI" = "$EXPECTED_ECR_URI" ]; then + echo "❌ Test Failed: expected ecr-uri=$EXPECTED_ECR_URI but got '${ECR_URI}'" + exit 1 +fi + +echo "--> Should default us-iso-east-1 when unknown amazonaws.com.iso region" +EXPECTED_ECR_URI="725322719131.dkr.ecr.us-iso-east-1.amazonaws.com.iso" +REGION="us-iso-west-100" +DOMAIN="amazonaws.com.iso" +ECR_URI=$(/etc/eks/get-ecr-uri.sh "${REGION}" "${DOMAIN}") +if [ ! "$ECR_URI" = "$EXPECTED_ECR_URI" ]; then + echo "❌ Test Failed: expected ecr-uri=$EXPECTED_ECR_URI but got '${ECR_URI}'" + exit 1 +fi + +echo "--> Should default us-isob-east-1 when unknown amazonaws.com.isob region" +EXPECTED_ECR_URI="187977181151.dkr.ecr.us-isob-east-1.amazonaws.com.isob" +REGION="us-isob-west-100" +DOMAIN="amazonaws.com.isob" +ECR_URI=$(/etc/eks/get-ecr-uri.sh "${REGION}" "${DOMAIN}") +if [ ! "$ECR_URI" = "$EXPECTED_ECR_URI" ]; then + echo "❌ Test Failed: expected ecr-uri=$EXPECTED_ECR_URI but got '${ECR_URI}'" + exit 1 +fi From 54795f7cd4863e61b3e75a1129697993b3a50f9f Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 13 Oct 2023 14:25:56 -0700 Subject: [PATCH 554/621] Force delete CI clusters, don't wait for pod eviction (#1472) --- .../actions/janitor/cluster-sweeper/script.sh | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/.github/actions/janitor/cluster-sweeper/script.sh b/.github/actions/janitor/cluster-sweeper/script.sh index 97c041eec..57a20759d 100755 --- a/.github/actions/janitor/cluster-sweeper/script.sh +++ b/.github/actions/janitor/cluster-sweeper/script.sh @@ -27,24 +27,11 @@ function cluster_is_eligible_for_deletion() { local CREATED_AT_ISO8601=$(aws eks describe-cluster --name $CLUSTER_NAME --query 'cluster.createdAt' --output text) iso8601_is_eligible_for_deletion "$CREATED_AT_ISO8601" } -function nodegroup_is_eligible_for_deletion() { - local CLUSTER_NAME="$1" - local NODEGROUP_NAME="$2" - local CREATED_AT_ISO8601=$(aws eks describe-nodegroup --cluster-name "$CLUSTER_NAME" --nodegroup-name $NODEGROUP_NAME --query 'nodegroup.createdAt' --output text) - iso8601_is_eligible_for_deletion "$CREATED_AT_ISO8601" -} wget --no-verbose -O eksctl.tar.gz "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz" tar xf eksctl.tar.gz && chmod +x ./eksctl for CLUSTER in $(aws eks list-clusters --query 'clusters[]' --output text); do - for NODEGROUP in $(aws eks list-nodegroups --cluster-name $CLUSTER --query 'nodegroups[]' --output text); do - if nodegroup_is_eligible_for_deletion $CLUSTER $NODEGROUP; then - ./eksctl delete nodegroup --cluster $CLUSTER --name $NODEGROUP - fi - done - if [ "$(aws eks list-nodegroups --cluster-name $CLUSTER --output json | jq '.nodegroups | length')" -gt 0 ]; then - echo "Skipping cluster $CLUSTER" - elif cluster_is_eligible_for_deletion $CLUSTER; then + if cluster_is_eligible_for_deletion $CLUSTER; then echo "Deleting cluster $CLUSTER" - ./eksctl delete cluster --name "$CLUSTER" + ./eksctl delete cluster --name "$CLUSTER" --force --disable-nodegroup-eviction fi done From 5b8b129af4a0705c6db0004e85a792445676cf94 Mon Sep 17 00:00:00 2001 From: Carter Date: Sat, 14 Oct 2023 14:15:35 -0700 Subject: [PATCH 555/621] Add CHANGELOG workflow for new releases (#1467) --- .github/workflows/update-changelog.yaml | 60 +++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 .github/workflows/update-changelog.yaml diff --git a/.github/workflows/update-changelog.yaml b/.github/workflows/update-changelog.yaml new file mode 100644 index 000000000..bc0dcd11b --- /dev/null +++ b/.github/workflows/update-changelog.yaml @@ -0,0 +1,60 @@ +name: "[Release] Update CHANGELOG.md" +on: + release: + types: [released] +permissions: + contents: write + pull-requests: write +jobs: + setup: + # this workflow will always fail in forks; bail if this isn't running in the upstream + if: github.repository == 'awslabs/amazon-eks-ami' + runs-on: ubuntu-latest + outputs: + tag_name: ${{ steps.variables.outputs.tag_name }} + steps: + - id: variables + run: | + echo "tag_name=$(echo ${{ github.ref }} | cut -d/ -f3)" >> $GITHUB_OUTPUT + update-changelog: + runs-on: ubuntu-latest + needs: + - setup + steps: + - uses: actions/checkout@v3 + with: + repository: awslabs/amazon-eks-ami + ref: refs/heads/master + path: amazon-eks-ami/ + - uses: actions/github-script@v6 + with: + script: | + const fs = require('fs'); + const changelogPath = './amazon-eks-ami/CHANGELOG.md'; + const placeholder = ''; + const tagName = '${{ needs.setup.outputs.tag_name }}'; + const release = await github.rest.repos.getReleaseByTag({ + tag: tagName, + owner: context.repo.owner, + repo: context.repo.repo, + }); + const changelog = fs.readFileSync(changelogPath, 'utf8'); + if (changelog.includes(release.data.name)) { + throw new Error(`changelog already includes ${release.data.name}`); + } + const newEntry = `### ${release.data.name}\n${release.data.body}`; + const updatedChangelog = changelog.replace(placeholder, placeholder + '\n\n' + newEntry); + fs.writeFileSync(changelogPath, updatedChangelog); + - uses: peter-evans/create-pull-request@v4 + with: + branch: update-changelog + path: amazon-eks-ami/ + add-paths: CHANGELOG.md + commit-message: "Update CHANGELOG.md for release ${{ needs.setup.outputs.tag_name }}" + committer: "GitHub " + author: "GitHub " + title: "Update CHANGELOG.md" + labels: | + changelog/exclude + body: | + Adds CHANGELOG.md entry for release [${{ needs.setup.outputs.tag_name }}](https://github.com/awslabs/amazon-eks-ami/releases/tag/${{ needs.setup.outputs.tag_name }}). From b5138d2a45caf0a051516985c8ca9f705319e3a3 Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 16 Oct 2023 14:42:53 -0700 Subject: [PATCH 556/621] Allow more flexible kernel_version (#1469) --- scripts/install-worker.sh | 11 +++++++---- scripts/upgrade_kernel.sh | 10 ++++++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index d239b9859..fdf241426 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -52,6 +52,13 @@ fi ### Packages ################################################################### ################################################################################ +sudo yum install -y \ + yum-utils \ + yum-plugin-versionlock + +# lock the version of the kernel and associated packages before we yum update +sudo yum versionlock kernel-$(uname -r) kernel-headers-$(uname -r) kernel-devel-$(uname -r) + # Update the OS to begin with to catch up to the latest packages. sudo yum update -y @@ -68,8 +75,6 @@ sudo yum install -y \ socat \ unzip \ wget \ - yum-utils \ - yum-plugin-versionlock \ mdadm \ pigz @@ -88,8 +93,6 @@ else sudo yum install -y curl fi -sudo yum versionlock kernel-$(uname -r) kernel-headers-$(uname -r) kernel-devel-$(uname -r) - # Remove the ec2-net-utils package, if it's installed. This package interferes with the route setup on the instance. if yum list installed | grep ec2-net-utils; then sudo yum remove ec2-net-utils -y -q; fi diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh index 24071ea96..9b13a18bb 100755 --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -13,13 +13,15 @@ if [[ -z "$KERNEL_VERSION" ]]; then echo "kernel_version is unset. Setting to $KERNEL_VERSION based on Kubernetes version $KUBERNETES_VERSION." fi -if [[ $KERNEL_VERSION == "4.14" ]]; then - sudo yum update -y kernel +if [[ $KERNEL_VERSION == 4.14* ]]; then + sudo yum install -y "kernel-${KERNEL_VERSION}*" else - sudo amazon-linux-extras install -y "kernel-${KERNEL_VERSION}" + KERNEL_MINOR_VERSION=$(echo ${KERNEL_VERSION} | cut -d. -f-2) + sudo amazon-linux-extras enable "kernel-${KERNEL_MINOR_VERSION}" + sudo yum install -y "kernel-${KERNEL_VERSION}*" fi -sudo yum install -y kernel-headers kernel-devel +sudo yum install -y "kernel-headers-${KERNEL_VERSION}*" "kernel-devel-${KERNEL_VERSION}*" # enable pressure stall information sudo grubby \ From bf1203ce7ac164098a629ff7bdd1cca392497e02 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 17 Oct 2023 13:31:06 -0700 Subject: [PATCH 557/621] Add r7i to eni-max-pods.txt (#1473) Co-authored-by: GitHub --- files/eni-max-pods.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index e31f53c1d..fbde64aaa 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -700,6 +700,15 @@ r7gd.8xlarge 234 r7gd.large 29 r7gd.medium 8 r7gd.xlarge 58 +r7i.12xlarge 234 +r7i.16xlarge 737 +r7i.24xlarge 737 +r7i.2xlarge 58 +r7i.48xlarge 737 +r7i.4xlarge 234 +r7i.8xlarge 234 +r7i.large 29 +r7i.xlarge 58 r7iz.12xlarge 234 r7iz.16xlarge 737 r7iz.2xlarge 58 From 7b1ff2283a135e51fa6874bf6eea137971ff50f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zolt=C3=A1n=20Reegn?= Date: Wed, 18 Oct 2023 17:08:41 +0200 Subject: [PATCH 558/621] Fix containerd slice configuration (#1437) --- files/bootstrap.sh | 3 --- scripts/install-worker.sh | 9 ++++++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index dcd69ee5d..36f47d9c3 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -557,9 +557,6 @@ if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then sudo mkdir -p /etc/containerd sudo mkdir -p /etc/cni/net.d - sudo mkdir -p /etc/systemd/system/containerd.service.d - printf '[Service]\nSlice=runtime.slice\n' | sudo tee /etc/systemd/system/containerd.service.d/00-runtime-slice.conf - if [[ -n "${CONTAINERD_CONFIG_FILE}" ]]; then sudo cp -v "${CONTAINERD_CONFIG_FILE}" /etc/eks/containerd/containerd-config.toml fi diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index fdf241426..515f3bccc 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -174,8 +174,15 @@ sudo mv $WORKING_DIR/pull-sandbox-image.sh /etc/eks/containerd/pull-sandbox-imag sudo mv $WORKING_DIR/pull-image.sh /etc/eks/containerd/pull-image.sh sudo chmod +x /etc/eks/containerd/pull-sandbox-image.sh sudo chmod +x /etc/eks/containerd/pull-image.sh - sudo mkdir -p /etc/systemd/system/containerd.service.d +CONFIGURE_CONTAINERD_SLICE=$(vercmp "$KUBERNETES_VERSION" gteq "1.24.0" || true) +if [ "$CONFIGURE_CONTAINERD_SLICE" == "true" ]; then + cat << EOF | sudo tee /etc/systemd/system/containerd.service.d/00-runtime-slice.conf +[Service] +Slice=runtime.slice +EOF +fi + cat << EOF | sudo tee /etc/systemd/system/containerd.service.d/10-compat-symlink.conf [Service] ExecStartPre=/bin/ln -sf /run/containerd/containerd.sock /run/dockershim.sock From dc273ea90b67aced48aea34227e71705b720e7ce Mon Sep 17 00:00:00 2001 From: donovanrost Date: Wed, 18 Oct 2023 15:55:46 -0600 Subject: [PATCH 559/621] Correctly tag cached images for us-gov-west-1 FIPS endpoint (#1476) --- scripts/install-worker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 515f3bccc..a5e30c755 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -482,7 +482,7 @@ if [[ "$CACHE_CONTAINER_IMAGES" == "true" ]] && ! [[ ${ISOLATED_REGIONS} =~ $BIN regional_img="${img/$ECR_URI/$region_uri}" sudo ctr -n k8s.io image tag "${img}" "${regional_img}" || : ## Tag ECR fips endpoint for supported regions - if [[ "${region}" =~ (us-east-1|us-east-2|us-west-1|us-west-2|us-gov-east-1|us-gov-east-2) ]]; then + if [[ "${region}" =~ (us-east-1|us-east-2|us-west-1|us-west-2|us-gov-east-1|us-gov-west-1) ]]; then regional_fips_img="${regional_img/.ecr./.ecr-fips.}" sudo ctr -n k8s.io image tag "${img}" "${regional_fips_img}" || : sudo ctr -n k8s.io image tag "${img}" "${regional_fips_img/-eksbuild.1/}" || : From bde408b340d992aad39e13de1aaf929f358f4338 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 19 Oct 2023 09:44:26 -0700 Subject: [PATCH 560/621] Lint space errors (#1121) --- .git-blame-ignore-revs | 1 - .github/workflows/ci-manual.yaml | 6 ++-- .github/workflows/sync-to-codecommit.yaml | 2 +- CHANGELOG.md | 22 ++++++------ Makefile | 1 + doc/CODE_OF_CONDUCT.md | 4 +-- doc/CONTRIBUTING.md | 14 ++++---- doc/USER_GUIDE.md | 6 ++-- eks-worker-al2.json | 6 ++-- hack/lint-space-errors.sh | 8 +++++ .../windows/eks-log-collector.ps1 | 34 +++++++++---------- scripts/cleanup_additional_repos.sh | 10 +++--- scripts/install_additional_repos.sh | 10 +++--- test/README.md | 8 ++--- 14 files changed, 70 insertions(+), 62 deletions(-) create mode 100755 hack/lint-space-errors.sh diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index b78d5db21..283144acd 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -1,3 +1,2 @@ # Applied code style rules to shell files 6014c4e6872a23f82ca295afa93b033207042876 - diff --git a/.github/workflows/ci-manual.yaml b/.github/workflows/ci-manual.yaml index b880875b5..2860b75c7 100644 --- a/.github/workflows/ci-manual.yaml +++ b/.github/workflows/ci-manual.yaml @@ -33,8 +33,8 @@ jobs: setup: runs-on: ubuntu-latest outputs: - git_sha_short: ${{ steps.variables.outputs.git_sha_short }} - workflow_run_url: ${{ steps.variables.outputs.workflow_run_url }} + git_sha_short: ${{ steps.variables.outputs.git_sha_short }} + workflow_run_url: ${{ steps.variables.outputs.workflow_run_url }} kubernetes_versions: ${{ steps.variables.outputs.kubernetes_versions }} build_id: ${{ steps.variables.outputs.build_id }} ci_step_name_prefix: ${{ steps.variables.outputs.ci_step_name_prefix }} @@ -150,7 +150,7 @@ jobs: uniqueStepNames.add(stepName); } } - } + } } const headers = [{ data: 'Kubernetes version', diff --git a/.github/workflows/sync-to-codecommit.yaml b/.github/workflows/sync-to-codecommit.yaml index a5d03d0e4..ebed3203c 100644 --- a/.github/workflows/sync-to-codecommit.yaml +++ b/.github/workflows/sync-to-codecommit.yaml @@ -7,7 +7,7 @@ on: jobs: mirror: - if: github.repository == 'awslabs/amazon-eks-ami' + if: github.repository == 'awslabs/amazon-eks-ami' runs-on: ubuntu-latest # These permissions are needed to interact with GitHub's OIDC Token endpoint. permissions: diff --git a/CHANGELOG.md b/CHANGELOG.md index d36228ebf..fc5a4dce1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -107,10 +107,10 @@ AMI details: Notable changes: - kernel-5.10 updated to address: - [ALAS2KERNEL-5.10-2023-039](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-039.html) - - Add support for Kubernetes 1.28 ([#1431](https://github.com/awslabs/amazon-eks-ami/pull/1431)) + - Add support for Kubernetes 1.28 ([#1431](https://github.com/awslabs/amazon-eks-ami/pull/1431)) - GPU AMI: - Released with [Neuron version 2.14.0](https://awsdocs-neuron.readthedocs-hosted.com/en/latest/release-notes/index.html#neuron-2-14-0-09-15-2023) - - GPU AMIs on Kubernetes 1.28 and above: + - GPU AMIs on Kubernetes 1.28 and above: - Upgraded `kernel` to 5.10 - Upgraded `cuda` version to 12.2 - Upgraded Nvidia driver to 535.54.03-1 @@ -265,7 +265,7 @@ AMI details: * `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 Notable changes: -- Kernel fix for `CVE-2023-3117` and `CVE-2023-35001` with new versions: [5.10 kernel](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-037.html) and [5.4 kernel](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2023-049.html) +- Kernel fix for `CVE-2023-3117` and `CVE-2023-35001` with new versions: [5.10 kernel](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-037.html) and [5.4 kernel](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2023-049.html) - Mount bpffs on all supported Kubernetes versions. ([#1349](https://github.com/awslabs/amazon-eks-ami/pull/1349)) - Enable discard_unpacked_layers by default to clean up compressed image layers in containerd's content store.([#1360](https://github.com/awslabs/amazon-eks-ami/pull/1360)) @@ -1264,8 +1264,8 @@ AMI details: Notable changes: * Pin Kernel 5.4 to 5.4.209-116.367 to prevent nodes from going into Unready [#1072](https://github.com/awslabs/amazon-eks-ami/pull/1072) -* Increase the kube-api-server QPS from 5/10 to 10/20 [#1030](https://github.com/awslabs/amazon-eks-ami/pull/1030) -* Update docker and containerd for [ALASDOCKER-2022-021](https://alas.aws.amazon.com/AL2/ALASDOCKER-2022-021.html) [#1056](https://github.com/awslabs/amazon-eks-ami/pull/1056) +* Increase the kube-api-server QPS from 5/10 to 10/20 [#1030](https://github.com/awslabs/amazon-eks-ami/pull/1030) +* Update docker and containerd for [ALASDOCKER-2022-021](https://alas.aws.amazon.com/AL2/ALASDOCKER-2022-021.html) [#1056](https://github.com/awslabs/amazon-eks-ami/pull/1056) * runc version is updated to 1.1.3-1.amzn2.0.2 to include ALAS2DOCKER-2022-020 [#1055](https://github.com/awslabs/amazon-eks-ami/pull/1055) * Release AMI in me-central-1 with version 1.21, 1.22, 1.23. 1.20 is not supported in this region since it will be deprecated soon. * Fixes an issue with Docker daemon configuration on the GPU AMI (#351). @@ -1425,9 +1425,9 @@ Binaries used to build these AMIs are published: AMI details: * kernel: 5.4.209-116.363.amzn2 -* dockerd: 20.10.17-1.amzn2 -* containerd: 1.6.6-1.amzn2 -* runc: 1.1.3-1.amzn2-1.amzn2 +* dockerd: 20.10.17-1.amzn2 +* containerd: 1.6.6-1.amzn2 +* runc: 1.1.3-1.amzn2-1.amzn2 * cuda: 470.57.02-1 * nvidia-container-runtime-hook: 1.4.0-1.amzn2 * SSM agent: 3.1.1575.0-1.amzn2 @@ -1609,7 +1609,7 @@ AMI details: Notable changes: * Update kubelet binaries for 1.20 * Support packer's ami_regions feature -* Increase /var/log/messages limit to 100M +* Increase /var/log/messages limit to 100M * Support local cluster in Outposts * Adding c6id, m6id, r6id to eni-max-pods.txt @@ -3116,7 +3116,7 @@ Notable changes: - Fix Makefile indentation for 1.19 (#616) - Increase fs.inotify.max_user_instances to 8192 from the default of 128 (#614) - use dynamic lookup of docker gid (#622) -- bump docker version to 19.03.13ce-1 (#624) +- bump docker version to 19.03.13ce-1 (#624) ### AMI Release v20210208 * amazon-eks-gpu-node-1.19-v20210208 @@ -3167,7 +3167,7 @@ Binaries used to build these AMIs are published : * s3://amazon-eks/1.15.12/2020-11-02/ Notable changes : -* ARM AMIs built with m6g.large instance type (#601) +* ARM AMIs built with m6g.large instance type (#601) * Add Support for c6gn instance type (#597) * Patch for CVE-2021-3156 (https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-3156) diff --git a/Makefile b/Makefile index f7c64c618..1a10456bb 100644 --- a/Makefile +++ b/Makefile @@ -96,6 +96,7 @@ transform-al2-to-al2023: lint: lint-docs ## Check the source files for syntax and format issues $(SHFMT_COMMAND) $(SHFMT_FLAGS) --diff $(MAKEFILE_DIR) $(SHELLCHECK_COMMAND) --format gcc --severity error $(SHELL_FILES) + hack/lint-space-errors.sh .PHONY: test test: ## run the test-harness diff --git a/doc/CODE_OF_CONDUCT.md b/doc/CODE_OF_CONDUCT.md index 3b6446687..5b627cfa6 100644 --- a/doc/CODE_OF_CONDUCT.md +++ b/doc/CODE_OF_CONDUCT.md @@ -1,4 +1,4 @@ ## Code of Conduct -This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). -For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact +This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). +For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact opensource-codeofconduct@amazon.com with any additional questions or comments. diff --git a/doc/CONTRIBUTING.md b/doc/CONTRIBUTING.md index 5f030d149..b7cdc25ab 100644 --- a/doc/CONTRIBUTING.md +++ b/doc/CONTRIBUTING.md @@ -1,9 +1,9 @@ # Contributing Guidelines -Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional +Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional documentation, we greatly value feedback and contributions from our community. -Please read through this document before submitting any issues or pull requests to ensure we have all the necessary +Please read through this document before submitting any issues or pull requests to ensure we have all the necessary information to effectively respond to your bug report or contribution. @@ -11,7 +11,7 @@ information to effectively respond to your bug report or contribution. We welcome you to use the GitHub issue tracker to report bugs or suggest features. -When filing an issue, please check [existing open](https://github.com/aws-samples/amazon-eks-ami/issues), or [recently closed](https://github.com/aws-samples/amazon-eks-ami/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already +When filing an issue, please check [existing open](https://github.com/aws-samples/amazon-eks-ami/issues), or [recently closed](https://github.com/aws-samples/amazon-eks-ami/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: * A reproducible test case or series of steps @@ -37,7 +37,7 @@ To send us a pull request, please: 6. Send us a pull request, answering any default questions in the pull request interface. 7. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. -GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and +GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). ### Testing Changes @@ -131,12 +131,12 @@ The issue is discussed in [this StackExchange post](https://unix.stackexchange.c On OSX, running `brew install coreutils` resolves the issue. ## Finding contributions to work on -Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels ((enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/aws-samples/amazon-eks-ami/labels/help%20wanted) issues is a great place to start. +Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels ((enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/aws-samples/amazon-eks-ami/labels/help%20wanted) issues is a great place to start. ## Code of Conduct -This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). -For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact +This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). +For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact opensource-codeofconduct@amazon.com with any additional questions or comments. diff --git a/doc/USER_GUIDE.md b/doc/USER_GUIDE.md index 24b2575dc..d365b7e29 100644 --- a/doc/USER_GUIDE.md +++ b/doc/USER_GUIDE.md @@ -142,7 +142,7 @@ make k8s \ ## Container Image Caching -Optionally, some container images can be cached during the AMI build process in order to reduce the latency of the node getting to a `Ready` state when launched. +Optionally, some container images can be cached during the AMI build process in order to reduce the latency of the node getting to a `Ready` state when launched. To turn on container image caching: @@ -159,7 +159,7 @@ When container image caching is enabled, the following images are cached: The account ID can be different depending on the region and partition you are building the AMI in. See [here](https://docs.aws.amazon.com/eks/latest/userguide/add-ons-images.html) for more details. -Since the VPC CNI is not versioned with K8s itself, the latest version of the VPC CNI and the default version, based on the response from the EKS DescribeAddonVersions at the time of the AMI build, will be cached. +Since the VPC CNI is not versioned with K8s itself, the latest version of the VPC CNI and the default version, based on the response from the EKS DescribeAddonVersions at the time of the AMI build, will be cached. The images listed above are also tagged with each region in the partition the AMI is built in, since images are often built in one region and copied to others within the same partition. Images that are available to pull from an ECR FIPS endpoint are also tagged as such (i.e. `602401143452.dkr.ecr-fips.us-east-1.amazonaws.com/eks/pause:3.5`). @@ -377,7 +377,7 @@ For more information about image credential provider plugins, refer to the [Kube Some instance types launch with ephemeral NVMe instance storage (i3, i4i, c5d, c6id, etc). There are two main ways of utilizing this storage within Kubernetes: a single RAID-0 array for use by kubelet and containerd or mounting the individual disks for pod usage. -The EKS Optimized AMI includes a utility script to configure ephemeral storage. The script can be invoked by passing the `--local-disks ` flag to the `/etc/eks/bootstrap.sh` script or the script can be invoked directly at `/bin/setup-local-disks`. All disks are formatted with an XFS file system. +The EKS Optimized AMI includes a utility script to configure ephemeral storage. The script can be invoked by passing the `--local-disks ` flag to the `/etc/eks/bootstrap.sh` script or the script can be invoked directly at `/bin/setup-local-disks`. All disks are formatted with an XFS file system. Below are details on the two disk setup options: diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 306e31dbe..c301c1eca 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -2,7 +2,7 @@ "_comment": "All template variables are enumerated here; and most variables have a default value defined in eks-worker-al2-variables.json", "variables": { "additional_yum_repos": null, - "ami_component_description": null, + "ami_component_description": null, "ami_description": null, "ami_name": null, "ami_regions": null, @@ -15,7 +15,7 @@ "aws_session_token": null, "binary_bucket_name": null, "binary_bucket_region": null, - "cache_container_images": null, + "cache_container_images": null, "cni_plugin_version": null, "containerd_version": null, "creator": null, @@ -28,7 +28,7 @@ "kubernetes_build_date": null, "kubernetes_version": null, "launch_block_device_mappings_volume_size": null, - "pause_container_version": null, + "pause_container_version": null, "pull_cni_from_github": null, "remote_folder": null, "runc_version": null, diff --git a/hack/lint-space-errors.sh b/hack/lint-space-errors.sh new file mode 100755 index 000000000..6c0f84a73 --- /dev/null +++ b/hack/lint-space-errors.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +cd $(dirname $0)/.. + +# `git apply|diff` can check for space errors, with the core implementation being `git diff-tree` +# this tool compares two trees, generally used to find errors in proposed changes +# we want to check the entire existing tree, so we compare HEAD against an empty tree +git diff-tree --check $(git hash-object -t tree /dev/null) HEAD diff --git a/log-collector-script/windows/eks-log-collector.ps1 b/log-collector-script/windows/eks-log-collector.ps1 index 31fa84ba2..4bb1e454e 100644 --- a/log-collector-script/windows/eks-log-collector.ps1 +++ b/log-collector-script/windows/eks-log-collector.ps1 @@ -1,27 +1,27 @@ -<# +<# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ - or in the "license" file accompanying this file. + or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -.SYNOPSIS +.SYNOPSIS Collects EKS Logs -.DESCRIPTION - Run the script to gather basic operating system, Docker daemon, and kubelet logs. +.DESCRIPTION + Run the script to gather basic operating system, Docker daemon, and kubelet logs. .NOTES You need to run this script with Elevated permissions to allow for the collection of the installed applications list -.EXAMPLE +.EXAMPLE eks-log-collector.ps1 - Gather basic operating system, Docker daemon, and kubelet logs. + Gather basic operating system, Docker daemon, and kubelet logs. #> param( - [Parameter(Mandatory=$False)][string]$RunMode = "Collect" + [Parameter(Mandatory=$False)][string]$RunMode = "Collect" ) # Common options @@ -111,10 +111,10 @@ Function get_sysinfo{ Write-Host "OK" -ForegroundColor "green" } catch { - Write-Error "Unable to collect system information" + Write-Error "Unable to collect system information" Break - } - + } + } Function is_diskfull{ @@ -127,11 +127,11 @@ Function is_diskfull{ Write-Host "OK" -ForegroundColor "green" } catch { - Write-Error "Unable to Determine Free Disk Space" + Write-Error "Unable to Determine Free Disk Space" Break } if ($percent -lt $threshold){ - Write-Error "C: drive only has $percent% free space, please ensure there is at least $threshold% free disk space to collect and store the log files" + Write-Error "C: drive only has $percent% free space, please ensure there is at least $threshold% free disk space to collect and store the log files" Break } } @@ -328,7 +328,7 @@ Function get_containerd_logs{ Function get_network_info{ try { - Write-Host "Collecting network Information" + Write-Host "Collecting network Information" Get-HnsNetwork | Select Name, Type, Id, AddressPrefix > $info_system\network\hns\network.txt Get-hnsnetwork | Convertto-json -Depth 20 >> $info_system\network\hns\network.txt Get-hnsnetwork | % { Get-HnsNetwork -Id $_.ID -Detailed } | Convertto-json -Depth 20 >> $info_system\network\hns\networkdetailed.txt @@ -373,7 +373,7 @@ Function init{ create_working_dir get_sysinfo } - + Function collect{ init is_diskfull @@ -395,11 +395,11 @@ Function collect{ #-------------------------- #Main-function -Function main { +Function main { Write-Host "Running Default(Collect) Mode" -foregroundcolor "blue" cleanup collect - pack + pack } #Entry point diff --git a/scripts/cleanup_additional_repos.sh b/scripts/cleanup_additional_repos.sh index 79179d674..c9cb20f07 100644 --- a/scripts/cleanup_additional_repos.sh +++ b/scripts/cleanup_additional_repos.sh @@ -12,13 +12,13 @@ fi AWK_CMD=' BEGIN {RS=";";FS=","} { - delete vars; - for(i = 1; i <= NF; ++i) { - n = index($i, "="); - if(n) { + delete vars; + for(i = 1; i <= NF; ++i) { + n = index($i, "="); + if(n) { vars[substr($i, 1, n-1)] = substr($i, n + 1) } - } + } Repo = "/etc/yum.repos.d/"vars["repo"]".repo" } {cmd="rm -f " Repo; system(cmd)} diff --git a/scripts/install_additional_repos.sh b/scripts/install_additional_repos.sh index caabbca4d..dd1862743 100644 --- a/scripts/install_additional_repos.sh +++ b/scripts/install_additional_repos.sh @@ -19,13 +19,13 @@ fi AWK_CMD=' BEGIN {RS=";";FS=","} { - delete vars; - for(i = 1; i <= NF; ++i) { - n = index($i, "="); - if(n) { + delete vars; + for(i = 1; i <= NF; ++i) { + n = index($i, "="); + if(n) { vars[substr($i, 1, n-1)] = substr($i, n + 1) } - } + } Repo = "/etc/yum.repos.d/"vars["repo"]".repo" } {print "["vars["repo"]"]" > Repo} diff --git a/test/README.md b/test/README.md index e688ca945..6d9f58a2f 100644 --- a/test/README.md +++ b/test/README.md @@ -1,10 +1,10 @@ ## Tests -This directory contains a Dockerfile that is able to be used locally to test the `/etc/eks/boostrap.sh` script without having to use a real AL2 EC2 instance for a quick dev-loop. It is still necessary to test the bootstrap script on a real instance since the Docker image is not a fully accurate representation. +This directory contains a Dockerfile that is able to be used locally to test the `/etc/eks/boostrap.sh` script without having to use a real AL2 EC2 instance for a quick dev-loop. It is still necessary to test the bootstrap script on a real instance since the Docker image is not a fully accurate representation. ## AL2 EKS Optimized AMI Docker Image -The image is built using the official AL2 image `public.ecr.aws/amazonlinux/amazonlinux:2`. It has several mocks installed including the [ec2-metadata-mock](https://github.com/aws/amazon-ec2-metadata-mock). Mocks are installed into `/sbin`, so adding addditional ones as necessary should be as simple as dropping a bash script in the `mocks` dir named as the command you would like to mock out. +The image is built using the official AL2 image `public.ecr.aws/amazonlinux/amazonlinux:2`. It has several mocks installed including the [ec2-metadata-mock](https://github.com/aws/amazon-ec2-metadata-mock). Mocks are installed into `/sbin`, so adding addditional ones as necessary should be as simple as dropping a bash script in the `mocks` dir named as the command you would like to mock out. ## Usage @@ -16,7 +16,7 @@ docker build -t eks-optimized-ami -f Dockerfile ../ docker run -it eks-optimized-ami /etc/eks/bootstrap.sh --b64-cluster-ca dGVzdA== --apiserver-endpoint http://my-api-endpoint test ``` -The `test-harness.sh` script wraps a build and runs test script in the `cases` dir. Tests scripts within the `cases` dir are invoked by the `test-harness.sh` script and have access to the `run` function. The `run` function accepts a temporary directory as an argument in order to mount as a volume in the container so that test scripts can check files within the `/etc/kubernetes/` directory after a bootstrap run. The remaining arguments to the `run` function are a path to a script within the AL2 EKS Optimized AMI Docker Container. +The `test-harness.sh` script wraps a build and runs test script in the `cases` dir. Tests scripts within the `cases` dir are invoked by the `test-harness.sh` script and have access to the `run` function. The `run` function accepts a temporary directory as an argument in order to mount as a volume in the container so that test scripts can check files within the `/etc/kubernetes/` directory after a bootstrap run. The remaining arguments to the `run` function are a path to a script within the AL2 EKS Optimized AMI Docker Container. Here's an example `run` call: @@ -31,7 +31,7 @@ run ${TEMP_DIR} /etc/eks/bootstrap.sh \ ## ECR Public -You may need to logout of ECR public or reauthenticate if your credentials are expired: +You may need to logout of ECR public or reauthenticate if your credentials are expired: ```bash docker logout public.ecr.aws From bd6844d1f0aba7e0c78d50b9f03eae958a81b22c Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 19 Oct 2023 10:00:43 -0700 Subject: [PATCH 561/621] Ignore commit to address space errors (#1478) --- .git-blame-ignore-revs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 283144acd..d04cc330c 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -1,2 +1,4 @@ # Applied code style rules to shell files 6014c4e6872a23f82ca295afa93b033207042876 +# Addressed space errors +bde408b340d992aad39e13de1aaf929f358f4338 From dbf0d15c09e6b4a695b1a45ba8c9ecf677fa0155 Mon Sep 17 00:00:00 2001 From: guessi Date: Sat, 21 Oct 2023 02:03:54 +0800 Subject: [PATCH 562/621] Collect more info about Amazon VPC CNI (#1245) --- .../linux/eks-log-collector.sh | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index cbd454fcb..ee03b46ac 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -278,6 +278,7 @@ collect() { get_sysctls_info get_networking_info get_cni_config + get_cni_configuration_variables get_docker_logs get_sandboxImage_info get_cpu_throttled_processes @@ -560,6 +561,35 @@ get_cni_config() { ok } +get_cni_configuration_variables() { + # To get cni configuration variables, gather from the main container "amazon-k8s-cni" + # - https://github.com/aws/amazon-vpc-cni-k8s#cni-configuration-variables + try "collect CNI Configuration Variables from Docker" + + # "docker container list" will only show "RUNNING" containers. + # "docker container inspect" will generate plain text output. + if [[ "$(pgrep -o dockerd)" -ne 0 ]]; then + timeout 75 docker container list | awk '/amazon-k8s-cni/{print$NF}' | xargs -n 1 docker container inspect > "${COLLECT_DIR}"/cni/cni-configuration-variables-dockerd.txt 2>&1 || echo -e "\tTimed out, ignoring \"cni configuration variables output \" " + else + warning "The Docker daemon is not running." + fi + + try "collect CNI Configuration Variables from Containerd" + + # "ctr container list" will list down all containers, including stopped ones. + # "ctr container info" will generate JSON format output. + if ! command -v ctr > /dev/null 2>&1; then + warning "ctr not installed" + else + # "ctr --namespace k8s.io container list" will return two containers + # - amazon-k8s-cni:v1.xx.yy + # - amazon-k8s-cni-init:v1.xx.yy + timeout 75 ctr --namespace k8s.io container list | awk '/amazon-k8s-cni:v/{print$1}' | xargs -n 1 ctr --namespace k8s.io container info > "${COLLECT_DIR}"/cni/cni-configuration-variables-containerd.json 2>&1 || echo -e "\tTimed out, ignoring \"cni configuration variables output \" " + fi + + ok +} + get_pkgtype() { if [[ "$(command -v rpm)" ]]; then PACKAGE_TYPE=rpm From a181b022f16fe26081ef020f1cff6a687ad82775 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 25 Oct 2023 17:38:28 -0700 Subject: [PATCH 563/621] Update eni-max-pods.txt (#1485) Co-authored-by: GitHub --- files/eni-max-pods.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index fbde64aaa..de12d7b64 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -518,6 +518,7 @@ m7i.8xlarge 234 m7i.large 29 m7i.xlarge 58 mac1.metal 234 +mac2-m2.metal 234 mac2-m2pro.metal 234 mac2.metal 234 p2.16xlarge 234 From 967fb36528818af12a9cf2dcfc4097dec2e1e2b3 Mon Sep 17 00:00:00 2001 From: Davanum Srinivas Date: Thu, 26 Oct 2023 13:22:42 -0400 Subject: [PATCH 564/621] Fail fast if we cannot determine kubelet version (#1484) kubelet is likely to fail when there is a mismatch with GLIBC that is in the image vs the one golang uses to build the kubelet. So fail the image right away when this happens as this specific kubelet binary will NOT work in any instance started with this image. ``` 2023-10-25T10:11:38-04:00: amazon-ebs: kubelet: /lib64/libc.so.6: version `GLIBC_2.32' not found (required by kubelet) 2023-10-25T10:11:38-04:00: amazon-ebs: kubelet: /lib64/libc.so.6: version `GLIBC_2.34' not found (required by kubelet) ``` Signed-off-by: Davanum Srinivas --- scripts/generate-version-info.sh | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/scripts/generate-version-info.sh b/scripts/generate-version-info.sh index 3f75cc01d..94ded309c 100644 --- a/scripts/generate-version-info.sh +++ b/scripts/generate-version-info.sh @@ -16,8 +16,19 @@ OUTPUT_FILE="$1" sudo rpm --query --all --queryformat '\{"%{NAME}": "%{VERSION}-%{RELEASE}"\}\n' | jq --slurp --sort-keys 'add | {packages:(.)}' > "$OUTPUT_FILE" # binaries -echo $(jq ".binaries.kubelet = \"$(kubelet --version | awk '{print $2}')\"" $OUTPUT_FILE) > $OUTPUT_FILE -echo $(jq ".binaries.awscli = \"$(aws --version | awk '{print $1}' | cut -d '/' -f 2)\"" $OUTPUT_FILE) > $OUTPUT_FILE +KUBELET_VERSION=$(kubelet --version | awk '{print $2}') +if [ "$?" != 0 ]; then + echo "unable to get kubelet version" + exit 1 +fi +echo $(jq ".binaries.kubelet = \"$KUBELET_VERSION\"" $OUTPUT_FILE) > $OUTPUT_FILE + +CLI_VERSION=$(aws --version | awk '{print $1}' | cut -d '/' -f 2) +if [ "$?" != 0 ]; then + echo "unable to get aws cli version" + exit 1 +fi +echo $(jq ".binaries.awscli = \"$CLI_VERSION\"" $OUTPUT_FILE) > $OUTPUT_FILE # cached images if systemctl is-active --quiet containerd; then From 094c79792367ad4501f0fd2774f1fc99dd0b1044 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 26 Oct 2023 13:46:40 -0700 Subject: [PATCH 565/621] Persist CI version-info.json as artifact (#1493) --- .github/actions/ci/build/action.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/actions/ci/build/action.yaml b/.github/actions/ci/build/action.yaml index 4797df437..822617abb 100644 --- a/.github/actions/ci/build/action.yaml +++ b/.github/actions/ci/build/action.yaml @@ -27,3 +27,7 @@ runs: AMI_NAME="amazon-eks-node-${{ inputs.k8s_version }}-${{ inputs.build_id }}" make ${{ inputs.k8s_version }} ami_name=${AMI_NAME} ${{ inputs.additional_arguments }} echo "ami_id=$(jq -r .builds[0].artifact_id "${AMI_NAME}-manifest.json" | cut -d ':' -f 2)" >> $GITHUB_OUTPUT + - uses: actions/upload-artifact@v3 + with: + name: version-info + path: "*-version-info.json" From 78983e5d0d9be9b52cf7b1d87090391c1453a160 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 17:24:58 -0700 Subject: [PATCH 566/621] Add new i4i sizes to eni-max-pods.txt (#1495) Co-authored-by: GitHub --- files/eni-max-pods.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index de12d7b64..4119a522f 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -301,7 +301,9 @@ i4g.4xlarge 234 i4g.8xlarge 234 i4g.large 29 i4g.xlarge 58 +i4i.12xlarge 234 i4i.16xlarge 737 +i4i.24xlarge 437 i4i.2xlarge 58 i4i.32xlarge 737 i4i.4xlarge 234 From b3207e08f4f4433f18ee3061c710158c08760102 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 30 Oct 2023 11:31:36 -0700 Subject: [PATCH 567/621] Update eni-max-pods.txt (#1497) Co-authored-by: GitHub --- files/eni-max-pods.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 4119a522f..0d5e473f0 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -212,6 +212,8 @@ c7i.48xlarge 737 c7i.4xlarge 234 c7i.8xlarge 234 c7i.large 29 +c7i.metal-24xl 737 +c7i.metal-48xl 737 c7i.xlarge 58 cr1.8xlarge 234 d2.2xlarge 58 @@ -518,6 +520,8 @@ m7i.48xlarge 737 m7i.4xlarge 234 m7i.8xlarge 234 m7i.large 29 +m7i.metal-24xl 737 +m7i.metal-48xl 737 m7i.xlarge 58 mac1.metal 234 mac2-m2.metal 234 @@ -711,6 +715,8 @@ r7i.48xlarge 737 r7i.4xlarge 234 r7i.8xlarge 234 r7i.large 29 +r7i.metal-24xl 737 +r7i.metal-48xl 737 r7i.xlarge 58 r7iz.12xlarge 234 r7iz.16xlarge 737 @@ -719,6 +725,8 @@ r7iz.32xlarge 737 r7iz.4xlarge 234 r7iz.8xlarge 234 r7iz.large 29 +r7iz.metal-16xl 737 +r7iz.metal-32xl 737 r7iz.xlarge 58 t1.micro 4 t2.2xlarge 44 From d61b307cc94bf66ff7bfb6d4e50bdf0a6fa710ed Mon Sep 17 00:00:00 2001 From: Davanum Srinivas Date: Tue, 31 Oct 2023 08:36:13 -0400 Subject: [PATCH 568/621] Drop the FIPS related provisioners for al2023 (#1499) Signed-off-by: Davanum Srinivas --- hack/transform-al2-to-al2023.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hack/transform-al2-to-al2023.sh b/hack/transform-al2-to-al2023.sh index e15be44d9..7a5c0bb69 100755 --- a/hack/transform-al2-to-al2023.sh +++ b/hack/transform-al2-to-al2023.sh @@ -18,6 +18,8 @@ fi cat "${PACKER_TEMPLATE_FILE}" \ | jq '._comment = "All template variables are enumerated here; and most variables have a default value defined in eks-worker-al2023-variables.json"' \ | jq '.variables.temporary_key_pair_type = "ed25519"' \ + | jq 'del(.provisioners[5])' \ + | jq 'del(.provisioners[5])' \ | jq 'del(.provisioners[5])' \ > "${PACKER_TEMPLATE_FILE/al2/al2023}" From 7e6c213f14f1dd38551a7589c332c034161ad43f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zolt=C3=A1n=20Reegn?= Date: Wed, 1 Nov 2023 19:08:55 +0100 Subject: [PATCH 569/621] Set nerdctl default namespace to k8s.io (#1488) --- scripts/install-worker.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index a5e30c755..00256ee80 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -204,6 +204,10 @@ EOF ############################################################################### sudo yum install -y nerdctl +sudo mkdir /etc/nerdctl +cat << EOF | sudo tee -a /etc/nerdctl/nerdctl.toml +namespace = "k8s.io" +EOF ################################################################################ ### Docker ##################################################################### From 2980eaf98294f669bd5f9f87dcc2a8d95261a4e7 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 1 Nov 2023 17:15:27 -0700 Subject: [PATCH 570/621] Update CHANGELOG.md for release v20231027 (#1502) Co-authored-by: GitHub --- CHANGELOG.md | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc5a4dce1..3a2bb30e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,68 @@ +### AMI Release v20231027 +* amazon-eks-gpu-node-1.28-v20231027 +* amazon-eks-gpu-node-1.27-v20231027 +* amazon-eks-gpu-node-1.26-v20231027 +* amazon-eks-gpu-node-1.25-v20231027 +* amazon-eks-gpu-node-1.24-v20231027 +* amazon-eks-gpu-node-1.23-v20231027 +* amazon-eks-arm64-node-1.28-v20231027 +* amazon-eks-arm64-node-1.27-v20231027 +* amazon-eks-arm64-node-1.26-v20231027 +* amazon-eks-arm64-node-1.25-v20231027 +* amazon-eks-arm64-node-1.24-v20231027 +* amazon-eks-arm64-node-1.23-v20231027 +* amazon-eks-node-1.28-v20231027 +* amazon-eks-node-1.27-v20231027 +* amazon-eks-node-1.26-v20231027 +* amazon-eks-node-1.25-v20231027 +* amazon-eks-node-1.24-v20231027 +* amazon-eks-node-1.23-v20231027 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.28.2-20231027` +* `1.27.6-20231027` +* `1.26.9-20231027` +* `1.25.14-20231027` +* `1.24.17-20231027` +* `1.23.17-20231027` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.28.2/2023-10-17/ +* s3://amazon-eks/1.27.6/2023-10-17/ +* s3://amazon-eks/1.26.9/2023-10-17/ +* s3://amazon-eks/1.25.14/2023-10-17/ +* s3://amazon-eks/1.24.17/2023-10-17/ +* s3://amazon-eks/1.23.17/2023-10-17/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.257-170.359.amzn2 + * Kubernetes 1.24 and above: 5.10.197-186.748.amzn2 + * ⚠️ **Note: A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible.** More information is available in https://github.com/awslabs/amazon-eks-ami/issues/1494. To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: + * Kubernetes 1.27 and below: 5.4.254-170.358.amzn2 + * Kubernetes 1.28 and above: 5.10.192-183.736.amzn2 +* `dockerd`: 20.10.25-1.amzn2.0.3 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.5 +* `runc`: 1.1.7-4.amzn2 +* `cuda`: 12.2.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.2.1705.0-1 + +Notable changes: +- Add optional FIPS support ([#1458](https://github.com/awslabs/amazon-eks-ami/pull/1458)) +- Fix region in cached image names ([#1461](https://github.com/awslabs/amazon-eks-ami/pull/1461)) +- Update curl for [ALAS-2023-2287](https://alas.aws.amazon.com/AL2/ALAS-2023-2287.html) +- Update kernel for [ALASKERNEL-5.10-2023-039](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-039.html) + +Minor changes: +- Add r7i to eni-max-pods.txt ([#1473](https://github.com/awslabs/amazon-eks-ami/pull/1473)) +- Correctly tag cached images for us-gov-west-1 FIPS endpoint ([#1476](https://github.com/awslabs/amazon-eks-ami/pull/1476)) +- Add new i4i sizes to eni-max-pods.txt ([#1495](https://github.com/awslabs/amazon-eks-ami/pull/1495)) + ### AMI Release v20231002 * amazon-eks-gpu-node-1.28-v20231002 * amazon-eks-gpu-node-1.27-v20231002 From ea2e73b7d5f270b4bcbd3c112e969457d1245f4c Mon Sep 17 00:00:00 2001 From: pjaudiomv <34245618+pjaudiomv@users.noreply.github.com> Date: Wed, 1 Nov 2023 20:40:12 -0400 Subject: [PATCH 571/621] Skip installing amazon-ssm-agent if already present (#1501) --- scripts/install-worker.sh | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 00256ee80..05c1b3f05 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -503,11 +503,15 @@ fi ### SSM Agent ################################################################## ################################################################################ -echo "Installing amazon-ssm-agent" -if ! [[ ${ISOLATED_REGIONS} =~ $BINARY_BUCKET_REGION ]]; then - sudo yum install -y https://s3.${BINARY_BUCKET_REGION}.${S3_DOMAIN}/amazon-ssm-${BINARY_BUCKET_REGION}/${SSM_AGENT_VERSION}/linux_${ARCH}/amazon-ssm-agent.rpm +if yum list installed | grep amazon-ssm-agent; then + echo "amazon-ssm-agent already present - skipping install" else - sudo yum install -y amazon-ssm-agent + echo "Installing amazon-ssm-agent" + if ! [[ ${ISOLATED_REGIONS} =~ $BINARY_BUCKET_REGION ]]; then + sudo yum install -y https://s3.${BINARY_BUCKET_REGION}.${S3_DOMAIN}/amazon-ssm-${BINARY_BUCKET_REGION}/${SSM_AGENT_VERSION}/linux_${ARCH}/amazon-ssm-agent.rpm + else + sudo yum install -y amazon-ssm-agent + fi fi ################################################################################ From 698e53e5613252e1358d375b7fcaff54ee5ffe9b Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 6 Nov 2023 12:32:30 -0800 Subject: [PATCH 572/621] Exclude automated eni-max-pods.txt PR's from release notes (#1498) --- .github/workflows/sync-eni-max-pods.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/sync-eni-max-pods.yaml b/.github/workflows/sync-eni-max-pods.yaml index 929ceb2a5..2affd7873 100644 --- a/.github/workflows/sync-eni-max-pods.yaml +++ b/.github/workflows/sync-eni-max-pods.yaml @@ -42,6 +42,8 @@ jobs: commit-message: "Update eni-max-pods.txt" committer: "GitHub " author: "GitHub " + labels: | + changelog/exclude title: "Update eni-max-pods.txt" body: | Generated by [aws/amazon-vpc-cni-k8s](https://github.com/aws/amazon-vpc-cni-k8s): From 958d48e1b5436fc187a0ca1bf7dde5b218ca61a1 Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 6 Nov 2023 12:41:41 -0800 Subject: [PATCH 573/621] Remove extraneous space character (#1505) --- .github/workflows/update-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/update-changelog.yaml b/.github/workflows/update-changelog.yaml index bc0dcd11b..32f5e457f 100644 --- a/.github/workflows/update-changelog.yaml +++ b/.github/workflows/update-changelog.yaml @@ -42,7 +42,7 @@ jobs: if (changelog.includes(release.data.name)) { throw new Error(`changelog already includes ${release.data.name}`); } - const newEntry = `### ${release.data.name}\n${release.data.body}`; + const newEntry = `### ${release.data.name}\n${release.data.body}`; const updatedChangelog = changelog.replace(placeholder, placeholder + '\n\n' + newEntry); fs.writeFileSync(changelogPath, updatedChangelog); - uses: peter-evans/create-pull-request@v4 From c73b27340e311353ddf98078956c8b94c75007e5 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 8 Nov 2023 20:49:54 -0800 Subject: [PATCH 574/621] Update CHANGELOG.md (#1507) --- CHANGELOG.md | 345 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 345 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3a2bb30e8..bd771a3c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,351 @@ +# AMI Release v20231106 +## What's Changed +* Add new i4i sizes to eni-max-pods.txt by @github-actions in https://github.com/awslabs/amazon-eks-ami/pull/1495 +* Set nerdctl default namespace to k8s.io by @reegnz in https://github.com/awslabs/amazon-eks-ami/pull/1488 +* Skip installing amazon-ssm-agent if already present by @pjaudiomv in https://github.com/awslabs/amazon-eks-ami/pull/1501 + +## New Contributors +* @pjaudiomv made their first contribution in https://github.com/awslabs/amazon-eks-ami/pull/1501 + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20231027...v20231106 + +--- + +

AMI Details

+ + +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.28-v202311061.28.3-20231106s3://amazon-eks/1.28.3/2023-11-02/
amazon-eks-gpu-node-1.28-v20231106
amazon-eks-arm64-node-1.28-v20231106
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1
containerd1.6.19-1.amzn2.0.5
cuda12.2.0-1
kernel5.10.198-187.748.amzn2
nvidia-driver-latest-dkms535.54.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.27-v202311061.27.7-20231106s3://amazon-eks/1.27.7/2023-11-02/
amazon-eks-gpu-node-1.27-v20231106
amazon-eks-arm64-node-1.27-v20231106
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1
containerd1.6.19-1.amzn2.0.5
cuda11.4.0-1
kernel5.10.198-187.748.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.26-v202311061.26.10-20231106s3://amazon-eks/1.26.10/2023-11-02/
amazon-eks-gpu-node-1.26-v20231106
amazon-eks-arm64-node-1.26-v20231106
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1
containerd1.6.19-1.amzn2.0.5
cuda11.4.0-1
kernel5.10.198-187.748.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.25-v202311061.25.15-20231106s3://amazon-eks/1.25.15/2023-11-02/
amazon-eks-gpu-node-1.25-v20231106
amazon-eks-arm64-node-1.25-v20231106
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1
containerd1.6.19-1.amzn2.0.5
cuda11.4.0-1
kernel5.10.198-187.748.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.24-v202311061.24.17-20231106s3://amazon-eks/1.24.17/2023-11-02/
amazon-eks-gpu-node-1.24-v20231106
amazon-eks-arm64-node-1.24-v20231106
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1
containerd1.6.19-1.amzn2.0.5
cuda11.4.0-1
docker20.10.23-1.amzn2.0.1
kernel5.10.198-187.748.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.23-v202311061.23.17-20231106s3://amazon-eks/1.23.17/2023-11-02/
amazon-eks-gpu-node-1.23-v20231106
amazon-eks-arm64-node-1.23-v20231106
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1
containerd1.6.19-1.amzn2.0.5
cuda11.4.0-1
docker20.10.23-1.amzn2.0.1
kernel5.4.258-171.360.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.27 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.28 and above: `5.10.192-183.736.amzn2` + +--- + ### AMI Release v20231027 * amazon-eks-gpu-node-1.28-v20231027 * amazon-eks-gpu-node-1.27-v20231027 From 4a6b51efc4d94cb652f280b6cd80c0dea6998385 Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 9 Nov 2023 10:17:14 -0800 Subject: [PATCH 575/621] Update CHANGELOG.md to fix docker version (#1511) --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bd771a3c6..a3047068a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -390,7 +390,7 @@ AMI details: * ⚠️ **Note: A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible.** More information is available in https://github.com/awslabs/amazon-eks-ami/issues/1494. To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: * Kubernetes 1.27 and below: 5.4.254-170.358.amzn2 * Kubernetes 1.28 and above: 5.10.192-183.736.amzn2 -* `dockerd`: 20.10.25-1.amzn2.0.3 +* `dockerd`: 20.10.23-1.amzn2.0.1 * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. * `containerd`: 1.6.19-1.amzn2.0.5 * `runc`: 1.1.7-4.amzn2 From e7f95af66f7f68c72b386bb71b6c66abcc6039ea Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 9 Nov 2023 11:26:54 -0800 Subject: [PATCH 576/621] Update docker to the latest 20.10 version (#1510) --- doc/USER_GUIDE.md | 2 +- eks-worker-al2-variables.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/USER_GUIDE.md b/doc/USER_GUIDE.md index d365b7e29..f96c046ea 100644 --- a/doc/USER_GUIDE.md +++ b/doc/USER_GUIDE.md @@ -39,7 +39,7 @@ Users have the following options for specifying their own values: | `cni_plugin_version` | ```v1.2.0``` | | | `containerd_version` | ```1.6.*``` | | | `creator` | ```{{env `USER`}}``` | | -| `docker_version` | ```20.10.23-1.amzn2.0.1``` | | +| `docker_version` | ```20.10.*``` | | | `encrypted` | ```false``` | | | `enable_fips` | ```false``` | Install openssl and enable fips related kernel parameters | | `instance_type` | *None* | | diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index 2ff4df904..43b60748c 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -15,7 +15,7 @@ "cni_plugin_version": "v1.2.0", "containerd_version": "1.6.*", "creator": "{{env `USER`}}", - "docker_version": "20.10.23-1.amzn2.0.1", + "docker_version": "20.10.*", "enable_fips": "false", "encrypted": "false", "kernel_version": "", From e99201099a3c4752af465c03bd5dedda0a729ca8 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 9 Nov 2023 11:28:16 -0800 Subject: [PATCH 577/621] Changelog entry format tweaks (#1508) --- .github/workflows/update-changelog.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/update-changelog.yaml b/.github/workflows/update-changelog.yaml index 32f5e457f..aaffcc5d8 100644 --- a/.github/workflows/update-changelog.yaml +++ b/.github/workflows/update-changelog.yaml @@ -42,8 +42,8 @@ jobs: if (changelog.includes(release.data.name)) { throw new Error(`changelog already includes ${release.data.name}`); } - const newEntry = `### ${release.data.name}\n${release.data.body}`; - const updatedChangelog = changelog.replace(placeholder, placeholder + '\n\n' + newEntry); + const newEntry = `# ${release.data.name}\n${release.data.body}`; + const updatedChangelog = changelog.replace(placeholder, placeholder + '\n\n' + newEntry + '\n---\n'); fs.writeFileSync(changelogPath, updatedChangelog); - uses: peter-evans/create-pull-request@v4 with: From 872f5505e1de493694e74141985091b2c6f8354d Mon Sep 17 00:00:00 2001 From: Edmond Ceausu Date: Wed, 15 Nov 2023 14:45:29 -0500 Subject: [PATCH 578/621] Document how to collect UserData (#1504) --- log-collector-script/linux/README.md | 9 +++++++++ log-collector-script/windows/README.md | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/log-collector-script/linux/README.md b/log-collector-script/linux/README.md index 4119e4410..9bdad98bd 100644 --- a/log-collector-script/linux/README.md +++ b/log-collector-script/linux/README.md @@ -129,3 +129,12 @@ aws ssm get-command-invocation \ ``` 4. Once the above command is executed successfully, the logs should be present in the S3 bucket specified in the previous step. + +### Collect User Data + +If collecting user data is required as apart of troubleshooting please use the commands below to retrieve data via IMDSv2: + +``` +TOKEN=`curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600"` \ +&& curl -H "X-aws-ec2-metadata-token: $TOKEN" -v http://169.254.169.254/latest/user-data +``` diff --git a/log-collector-script/windows/README.md b/log-collector-script/windows/README.md index 945211c14..1bff2287b 100644 --- a/log-collector-script/windows/README.md +++ b/log-collector-script/windows/README.md @@ -121,3 +121,12 @@ aws ssm get-command-invocation \ ``` 4. Once the above command is executed successfully, the logs should be present in the S3 bucket specified in the previous step. + +### Collect User Data + +If collecting use rdata is required as apart of troubleshooting please use the commands below to retrieve data via IMDSv2: + +``` +[string]$token = Invoke-RestMethod -Headers @{"X-aws-ec2-metadata-token-ttl-seconds" = "21600"} -Method PUT -Uri http://169.254.169.254/latest/api/token +Invoke-RestMethod -Headers @{"X-aws-ec2-metadata-token" = $token} -Method GET -Uri http://169.254.169.254/latest/user-data +``` From e566fe08ea09186120bdaa582d8ccf6461ca66d6 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 15 Nov 2023 16:20:29 -0800 Subject: [PATCH 579/621] Update eni-max-pods.txt (#1518) Co-authored-by: GitHub --- files/eni-max-pods.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 0d5e473f0..70f9a59bc 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -231,6 +231,7 @@ d3en.6xlarge 58 d3en.8xlarge 78 d3en.xlarge 10 dl1.24xlarge 737 +dl2q.24xlarge 737 f1.16xlarge 394 f1.2xlarge 58 f1.4xlarge 234 From 8c5b9f60d0db705367f9ff6f952a7879891aa0cd Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 17 Nov 2023 10:38:24 -0800 Subject: [PATCH 580/621] Update CHANGELOG.md for release v20231116 (#1521) Co-authored-by: GitHub --- CHANGELOG.md | 346 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 346 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a3047068a..aa64c8fa2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,352 @@ +# AMI Release v20231116 + + +## What's Changed +* Sets docker to the latest 20.10 version by @mmerkes in https://github.com/awslabs/amazon-eks-ami/pull/1510 + +## New Contributors +* @edmondceausu made their first contribution in https://github.com/awslabs/amazon-eks-ami/pull/1504 + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20231106...v20231116 + +--- + +

AMI Details

+ + +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.28-v202311161.28.3-20231116s3://amazon-eks/1.28.3/2023-11-14/
amazon-eks-gpu-node-1.28-v20231116
amazon-eks-arm64-node-1.28-v20231116
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1798.0-1
containerd1.6.19-1.amzn2.0.5
cuda12.2.0-1
kernel5.10.198-187.748.amzn2
nvidia-driver-latest-dkms535.54.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.27-v202311161.27.7-20231116s3://amazon-eks/1.27.7/2023-11-14/
amazon-eks-gpu-node-1.27-v20231116
amazon-eks-arm64-node-1.27-v20231116
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1798.0-1
containerd1.6.19-1.amzn2.0.5
cuda12.2.0-1
kernel5.10.198-187.748.amzn2
nvidia-driver-latest-dkms535.54.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.26-v202311161.26.10-20231116s3://amazon-eks/1.26.10/2023-11-14/
amazon-eks-gpu-node-1.26-v20231116
amazon-eks-arm64-node-1.26-v20231116
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1798.0-1
containerd1.6.19-1.amzn2.0.5
cuda12.2.0-1
kernel5.10.198-187.748.amzn2
nvidia-driver-latest-dkms535.54.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.25-v202311161.25.15-20231116s3://amazon-eks/1.25.15/2023-11-14/
amazon-eks-gpu-node-1.25-v20231116
amazon-eks-arm64-node-1.25-v20231116
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1798.0-1
containerd1.6.19-1.amzn2.0.5
cuda12.2.0-1
kernel5.10.198-187.748.amzn2
nvidia-driver-latest-dkms535.54.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.24-v202311161.24.17-20231116s3://amazon-eks/1.24.17/2023-11-14/
amazon-eks-gpu-node-1.24-v20231116
amazon-eks-arm64-node-1.24-v20231116
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1798.0-1
containerd1.6.19-1.amzn2.0.5
cuda11.4.0-1
docker20.10.25-1.amzn2.0.3
kernel5.10.198-187.748.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.23-v202311161.23.17-20231116s3://amazon-eks/1.23.17/2023-11-14/
amazon-eks-gpu-node-1.23-v20231116
amazon-eks-arm64-node-1.23-v20231116
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1798.0-1
containerd1.6.19-1.amzn2.0.5
cuda11.4.0-1
docker20.10.25-1.amzn2.0.3
kernel5.4.258-171.360.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.25 and above: `5.10.192-183.736.amzn2` + +--- + + # AMI Release v20231106 ## What's Changed * Add new i4i sizes to eni-max-pods.txt by @github-actions in https://github.com/awslabs/amazon-eks-ami/pull/1495 From 1c779bff755ea1424f8d646a1fab1863fdd44e98 Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 20 Nov 2023 17:08:42 -0800 Subject: [PATCH 581/621] Add check for ecr-fips endpoint availability (#1524) --- files/get-ecr-uri.sh | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/files/get-ecr-uri.sh b/files/get-ecr-uri.sh index a160cebcb..3a5bad0b0 100755 --- a/files/get-ecr-uri.sh +++ b/files/get-ecr-uri.sh @@ -110,10 +110,15 @@ else esac # end region check fi -AWS_ECR_SUBDOMAIN="ecr" -# if FIPS is enabled on the machine, use the FIPS endpoint. +ECR_DOMAIN="${acct}.dkr.ecr.${region}.${aws_domain}" + +# if FIPS is enabled on the machine, use the FIPS endpoint if it's available if [[ "$(sysctl -n crypto.fips_enabled)" == 1 ]]; then - AWS_ECR_SUBDOMAIN="ecr-fips" + ECR_FIPS_DOMAIN="${acct}.dkr.ecr-fips.${region}.${aws_domain}" + if [ $(getent hosts "$ECR_FIPS_DOMAIN" | wc -l) -gt 0 ]; then + echo "$ECR_FIPS_DOMAIN" + exit 0 + fi fi -echo "${acct}.dkr.${AWS_ECR_SUBDOMAIN}.${region}.${aws_domain}" +echo "$ECR_DOMAIN" From fc8815c9fcad84e64cb25f0d34d3b8333c7b904f Mon Sep 17 00:00:00 2001 From: Davanum Srinivas Date: Fri, 24 Nov 2023 13:48:24 -0500 Subject: [PATCH 582/621] Miscellaneous fixes from AL2023 testing (#1528) Signed-off-by: Davanum Srinivas --- files/bootstrap.sh | 2 +- scripts/install-worker.sh | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 36f47d9c3..f7327868d 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -407,7 +407,7 @@ fi log "INFO: Using IP family: ${IP_FAMILY}" -echo $B64_CLUSTER_CA | base64 -d > $CA_CERTIFICATE_FILE_PATH +echo "$B64_CLUSTER_CA" | base64 -d > $CA_CERTIFICATE_FILE_PATH sed -i s,MASTER_ENDPOINT,$APISERVER_ENDPOINT,g /var/lib/kubelet/kubeconfig sed -i s,AWS_REGION,$AWS_DEFAULT_REGION,g /var/lib/kubelet/kubeconfig diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 05c1b3f05..b2a01b7d1 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -88,13 +88,19 @@ fi if cat /etc/*release | grep "al2023" > /dev/null 2>&1; then # exists in al2023 only (needed by kubelet) sudo yum install -y iptables-legacy + + # Remove the amazon-ec2-net-utils package, if it's installed. This package interferes with the route setup on the instance. + if yum list installed | grep amazon-ec2-net-utils; then sudo yum remove amazon-ec2-net-utils -y -q; fi + + # Temporary fix for https://github.com/aws/amazon-vpc-cni-k8s/pull/2118 + sed -i "s/^MACAddressPolicy=.*/MACAddressPolicy=none/" /usr/lib/systemd/network/99-default.link else # curl-minimal already exists in al2023 so install curl only on al2 sudo yum install -y curl -fi -# Remove the ec2-net-utils package, if it's installed. This package interferes with the route setup on the instance. -if yum list installed | grep ec2-net-utils; then sudo yum remove ec2-net-utils -y -q; fi + # Remove the ec2-net-utils package, if it's installed. This package interferes with the route setup on the instance. + if yum list installed | grep ec2-net-utils; then sudo yum remove ec2-net-utils -y -q; fi +fi sudo mkdir -p /etc/eks/ From 8b87d03be07533865298f97b73e96a63508a43d5 Mon Sep 17 00:00:00 2001 From: Davanum Srinivas Date: Sat, 25 Nov 2023 04:25:00 -0500 Subject: [PATCH 583/621] fix Permission denied for 99-default.link (#1529) Signed-off-by: Davanum Srinivas --- scripts/install-worker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index b2a01b7d1..fe319b628 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -93,7 +93,7 @@ if cat /etc/*release | grep "al2023" > /dev/null 2>&1; then if yum list installed | grep amazon-ec2-net-utils; then sudo yum remove amazon-ec2-net-utils -y -q; fi # Temporary fix for https://github.com/aws/amazon-vpc-cni-k8s/pull/2118 - sed -i "s/^MACAddressPolicy=.*/MACAddressPolicy=none/" /usr/lib/systemd/network/99-default.link + sudo sed -i "s/^MACAddressPolicy=.*/MACAddressPolicy=none/" /usr/lib/systemd/network/99-default.link || true else # curl-minimal already exists in al2023 so install curl only on al2 sudo yum install -y curl From bfb9f10fc202dc0111df59a3e38fec2786a9c4ea Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 29 Nov 2023 15:40:10 -0800 Subject: [PATCH 584/621] Install SSM agent from AL core repo by default (#1531) --- doc/USER_GUIDE.md | 2 +- eks-worker-al2-variables.json | 2 +- scripts/install-worker.sh | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/USER_GUIDE.md b/doc/USER_GUIDE.md index f96c046ea..b0a704127 100644 --- a/doc/USER_GUIDE.md +++ b/doc/USER_GUIDE.md @@ -58,7 +58,7 @@ Users have the following options for specifying their own values: | `source_ami_owners` | ```137112412989``` | | | `ssh_interface` | `""` | | | `ssh_username` | ```ec2-user``` | | -| `ssm_agent_version` | ```latest``` | | +| `ssm_agent_version` | `""` | Version of the SSM agent to install from the S3 bucket provided by the SSM agent project, such as ```latest```. If empty, the latest version of the SSM agent available in the Amazon Linux core repositories will be installed. | | `subnet_id` | `""` | | | `temporary_security_group_source_cidrs` | `""` | | | `volume_type` | ```gp2``` | | diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index 43b60748c..0e02f145c 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -31,7 +31,7 @@ "source_ami_owners": "137112412989", "ssh_interface": "", "ssh_username": "ec2-user", - "ssm_agent_version": "latest", + "ssm_agent_version": "", "subnet_id": "", "temporary_security_group_source_cidrs": "", "volume_type": "gp2", diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index fe319b628..7cee78cd3 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -32,7 +32,6 @@ validate_env_set PULL_CNI_FROM_GITHUB validate_env_set PAUSE_CONTAINER_VERSION validate_env_set CACHE_CONTAINER_IMAGES validate_env_set WORKING_DIR -validate_env_set SSM_AGENT_VERSION ################################################################################ ### Machine Architecture ####################################################### @@ -512,10 +511,11 @@ fi if yum list installed | grep amazon-ssm-agent; then echo "amazon-ssm-agent already present - skipping install" else - echo "Installing amazon-ssm-agent" - if ! [[ ${ISOLATED_REGIONS} =~ $BINARY_BUCKET_REGION ]]; then + if ! [[ -z "${SSM_AGENT_VERSION}" ]]; then + echo "Installing amazon-ssm-agent@${SSM_AGENT_VERSION} from S3" sudo yum install -y https://s3.${BINARY_BUCKET_REGION}.${S3_DOMAIN}/amazon-ssm-${BINARY_BUCKET_REGION}/${SSM_AGENT_VERSION}/linux_${ARCH}/amazon-ssm-agent.rpm else + echo "Installing amazon-ssm-agent from AL core repository" sudo yum install -y amazon-ssm-agent fi fi From 90c57ff13bc45a7c3812f450cce7595b293cee56 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 30 Nov 2023 10:36:48 -0800 Subject: [PATCH 585/621] Update to `containerd` 1.7 (#1516) --- doc/USER_GUIDE.md | 2 +- eks-worker-al2-variables.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/USER_GUIDE.md b/doc/USER_GUIDE.md index b0a704127..a546ab034 100644 --- a/doc/USER_GUIDE.md +++ b/doc/USER_GUIDE.md @@ -37,7 +37,7 @@ Users have the following options for specifying their own values: | `binary_bucket_region` | ```us-west-2``` | | | `cache_container_images` | ```false``` | | | `cni_plugin_version` | ```v1.2.0``` | | -| `containerd_version` | ```1.6.*``` | | +| `containerd_version` | ```1.7.*``` | | | `creator` | ```{{env `USER`}}``` | | | `docker_version` | ```20.10.*``` | | | `encrypted` | ```false``` | | diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index 0e02f145c..45756e51e 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -13,7 +13,7 @@ "binary_bucket_region": "us-west-2", "cache_container_images": "false", "cni_plugin_version": "v1.2.0", - "containerd_version": "1.6.*", + "containerd_version": "1.7.*", "creator": "{{env `USER`}}", "docker_version": "20.10.*", "enable_fips": "false", From fb87d587ff27b3098176792859115ec0ee7a6429 Mon Sep 17 00:00:00 2001 From: Joe North Date: Thu, 30 Nov 2023 14:58:14 -0500 Subject: [PATCH 586/621] Capture logs for EKS Pod Identity Agent (#1533) --- log-collector-script/linux/eks-log-collector.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index ee03b46ac..6c4c03528 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -359,6 +359,7 @@ get_common_logs() { cp --force --dereference --recursive /var/log/containers/fsx-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null cp --force --dereference --recursive /var/log/containers/fsx-openzfs-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null cp --force --dereference --recursive /var/log/containers/file-cache-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/eks-pod-identity-agent* "${COLLECT_DIR}"/var_log/ 2> /dev/null continue fi if [[ "${entry}" == "pods" ]]; then @@ -371,6 +372,7 @@ get_common_logs() { cp --force --dereference --recursive /var/log/pods/kube-system_fsx-csi-* "${COLLECT_DIR}"/var_log/ 2> /dev/null cp --force --dereference --recursive /var/log/pods/kube-system_fsx-openzfs-csi-* "${COLLECT_DIR}"/var_log/ 2> /dev/null cp --force --dereference --recursive /var/log/pods/kube-system_file-cache-csi-* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_eks-pod-identity-agent* "${COLLECT_DIR}"/var_log/ 2> /dev/null continue fi cp --force --recursive --dereference /var/log/"${entry}" "${COLLECT_DIR}"/var_log/ 2> /dev/null From 6866d4c3f29b28d93adab796fa37ed8ad1252331 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 6 Dec 2023 18:59:09 -0800 Subject: [PATCH 587/621] Update CHANGELOG.md for release v20231201 (#1538) Co-authored-by: GitHub --- CHANGELOG.md | 348 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 348 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index aa64c8fa2..b98d9be74 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,354 @@ +# AMI Release v20231201 + + +## What's Changed +* Check for ecr-fips endpoint availability by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1524 +* Install SSM agent from AL core repo by default by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1531 +* Update to `containerd` 1.7 by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1516 + +## New Contributors +* @JoeNorth made their first contribution in https://github.com/awslabs/amazon-eks-ami/pull/1533 + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20231116...v20231201 + +--- + +

AMI Details

+ + +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.28-v202312011.28.3-20231201s3://amazon-eks/1.28.3/2023-11-14/
amazon-eks-gpu-node-1.28-v20231201
amazon-eks-arm64-node-1.28-v20231201
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.0-1
kernel5.10.199-190.747.amzn2
nvidia-driver-latest-dkms535.54.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.27-v202312011.27.7-20231201s3://amazon-eks/1.27.7/2023-11-14/
amazon-eks-gpu-node-1.27-v20231201
amazon-eks-arm64-node-1.27-v20231201
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.0-1
kernel5.10.199-190.747.amzn2
nvidia-driver-latest-dkms535.54.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.26-v202312011.26.10-20231201s3://amazon-eks/1.26.10/2023-11-14/
amazon-eks-gpu-node-1.26-v20231201
amazon-eks-arm64-node-1.26-v20231201
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.0-1
kernel5.10.199-190.747.amzn2
nvidia-driver-latest-dkms535.54.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.25-v202312011.25.15-20231201s3://amazon-eks/1.25.15/2023-11-14/
amazon-eks-gpu-node-1.25-v20231201
amazon-eks-arm64-node-1.25-v20231201
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.0-1
kernel5.10.199-190.747.amzn2
nvidia-driver-latest-dkms535.54.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.24-v202312011.24.17-20231201s3://amazon-eks/1.24.17/2023-11-14/
amazon-eks-gpu-node-1.24-v20231201
amazon-eks-arm64-node-1.24-v20231201
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.3
kernel5.10.199-190.747.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.23-v202312011.23.17-20231201s3://amazon-eks/1.23.17/2023-11-14/
amazon-eks-gpu-node-1.23-v20231201
amazon-eks-arm64-node-1.23-v20231201
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.3
kernel5.4.259-173.361.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.25 and above: `5.10.192-183.736.amzn2` + +--- + + # AMI Release v20231116 From aaf1aa727648832f1f7b7a5f627f21314d389ba4 Mon Sep 17 00:00:00 2001 From: Jeffrey Nelson Date: Thu, 7 Dec 2023 14:03:28 -0600 Subject: [PATCH 588/621] AL2023 networking changes for VPC CNI compatibility (#1539) --- scripts/install-worker.sh | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 7cee78cd3..8fc2094d0 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -86,10 +86,19 @@ fi # packages that need special handling if cat /etc/*release | grep "al2023" > /dev/null 2>&1; then # exists in al2023 only (needed by kubelet) - sudo yum install -y iptables-legacy - - # Remove the amazon-ec2-net-utils package, if it's installed. This package interferes with the route setup on the instance. - if yum list installed | grep amazon-ec2-net-utils; then sudo yum remove amazon-ec2-net-utils -y -q; fi + sudo yum install -y iptables-nft + + # Mask udev triggers installed by amazon-ec2-net-utils package + sudo touch /etc/udev/rules.d/99-cni-empty.rules + + # Make networkd ignore foreign settings, else it may unexpectedly delete IP rules and routes added by CNI + sudo mkdir -p /etc/systemd/networkd.conf.d/ + cat << EOF | sudo tee /etc/systemd/networkd.conf.d/80-release.conf +# Do not clobber any routes or rules added by CNI. +[Network] +ManageForeignRoutes=no +ManageForeignRoutingPolicyRules=no +EOF # Temporary fix for https://github.com/aws/amazon-vpc-cni-k8s/pull/2118 sudo sed -i "s/^MACAddressPolicy=.*/MACAddressPolicy=none/" /usr/lib/systemd/network/99-default.link || true From e0989539612fd6c56a32c129a185c988d790d16e Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 7 Dec 2023 12:11:15 -0800 Subject: [PATCH 589/621] Set containerd LimitNOFILE to recommended value (#1535) --- scripts/install-worker.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 8fc2094d0..2b57a2914 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -202,6 +202,12 @@ cat << EOF | sudo tee /etc/systemd/system/containerd.service.d/10-compat-symlink ExecStartPre=/bin/ln -sf /run/containerd/containerd.sock /run/dockershim.sock EOF +cat << EOF | sudo tee /etc/systemd/system/containerd.service.d/20-limitnofile.conf +[Service] +# https://github.com/containerd/containerd/pull/8924 +LimitNOFILE=1024:524288 +EOF + cat << EOF | sudo tee -a /etc/modules-load.d/containerd.conf overlay br_netfilter From b5cdeb62e9f7c7cef6b9e6ddab593649c7faf69b Mon Sep 17 00:00:00 2001 From: Jeffrey Nelson Date: Thu, 7 Dec 2023 14:41:50 -0600 Subject: [PATCH 590/621] fix networkd settings (#1540) --- scripts/install-worker.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 2b57a2914..23a06e9da 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -89,11 +89,11 @@ if cat /etc/*release | grep "al2023" > /dev/null 2>&1; then sudo yum install -y iptables-nft # Mask udev triggers installed by amazon-ec2-net-utils package - sudo touch /etc/udev/rules.d/99-cni-empty.rules + sudo touch /etc/udev/rules.d/99-vpc-policy-routes.rules # Make networkd ignore foreign settings, else it may unexpectedly delete IP rules and routes added by CNI - sudo mkdir -p /etc/systemd/networkd.conf.d/ - cat << EOF | sudo tee /etc/systemd/networkd.conf.d/80-release.conf + sudo mkdir -p /usr/lib/systemd/networkd.conf.d/ + cat << EOF | sudo tee /usr/lib/systemd/networkd.conf.d/80-release.conf # Do not clobber any routes or rules added by CNI. [Network] ManageForeignRoutes=no From e65bc141b753c98c6171058f5c5f4a1f9ea957bb Mon Sep 17 00:00:00 2001 From: Matt Date: Sun, 10 Dec 2023 17:55:08 -0800 Subject: [PATCH 591/621] Update get-ecr-uri.sh with ca-west-1 account (#1542) --- files/get-ecr-uri.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/files/get-ecr-uri.sh b/files/get-ecr-uri.sh index 3a5bad0b0..3dc56523a 100755 --- a/files/get-ecr-uri.sh +++ b/files/get-ecr-uri.sh @@ -63,6 +63,9 @@ else il-central-1) acct="066635153087" ;; + ca-west-1) + acct="761377655185" + ;; # This sections includes all commercial non-opt-in regions, which use # the same account for ECR pause container images, but still have in-region # registries. From e9f135ed7a1ec25c57dcd0e2aac8604f2c0eefbe Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 13 Dec 2023 10:06:50 -0800 Subject: [PATCH 592/621] Install amazon packer plugin for CI (#1545) --- .github/actions/ci/build/action.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/actions/ci/build/action.yaml b/.github/actions/ci/build/action.yaml index 822617abb..5b91c3d0d 100644 --- a/.github/actions/ci/build/action.yaml +++ b/.github/actions/ci/build/action.yaml @@ -24,6 +24,7 @@ runs: - id: build shell: bash run: | + packer plugins install github.com/hashicorp/amazon AMI_NAME="amazon-eks-node-${{ inputs.k8s_version }}-${{ inputs.build_id }}" make ${{ inputs.k8s_version }} ami_name=${AMI_NAME} ${{ inputs.additional_arguments }} echo "ami_id=$(jq -r .builds[0].artifact_id "${AMI_NAME}-manifest.json" | cut -d ':' -f 2)" >> $GITHUB_OUTPUT From 72aa58b200a628d9ec316765aef6a46ee58ce296 Mon Sep 17 00:00:00 2001 From: "Keto D. Zhang" Date: Sat, 16 Dec 2023 16:24:33 -0800 Subject: [PATCH 593/621] Fix flag typo in logging (#1547) --- files/bootstrap.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index f7327868d..42567a495 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -345,7 +345,7 @@ CA_CERTIFICATE_DIRECTORY=/etc/kubernetes/pki CA_CERTIFICATE_FILE_PATH=$CA_CERTIFICATE_DIRECTORY/ca.crt mkdir -p $CA_CERTIFICATE_DIRECTORY if [[ -z "${B64_CLUSTER_CA}" ]] || [[ -z "${APISERVER_ENDPOINT}" ]]; then - log "INFO: --cluster-ca or --api-server-endpoint is not defined, describing cluster..." + log "INFO: --b64-cluster-ca or --apiserver-endpoint is not defined, describing cluster..." DESCRIBE_CLUSTER_RESULT="/tmp/describe_cluster_result.txt" # Retry the DescribeCluster API for API_RETRY_ATTEMPTS From c5ff1ca882f03e6f12e1472b8d5f26fe03d2bebb Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 21 Dec 2023 20:11:11 -0800 Subject: [PATCH 594/621] Update CHANGELOG.md for release v20231220 (#1550) Co-authored-by: GitHub --- CHANGELOG.md | 364 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 364 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b98d9be74..de8de2923 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,370 @@ +# AMI Release v20231220 + + +## What's Changed +* Set containerd LimitNOFILE to recommended value by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1535 +* Update get-ecr-uri.sh with ca-west-1 account by @mmerkes in https://github.com/awslabs/amazon-eks-ami/pull/1542 +* Fix typo opt names in `bootstrap.sh` logging by @ketozhang in https://github.com/awslabs/amazon-eks-ami/pull/1547 + +## New Contributors +* @ketozhang made their first contribution in https://github.com/awslabs/amazon-eks-ami/pull/1547 + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20231201...v20231220 + +--- + +

AMI Details

+ + +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.28-v202312201.28.3-20231220s3://amazon-eks/1.28.3/2023-11-14/
amazon-eks-gpu-node-1.28-v20231220
amazon-eks-arm64-node-1.28-v20231220
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.201-191.748.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.27-v202312201.27.7-20231220s3://amazon-eks/1.27.7/2023-11-14/
amazon-eks-gpu-node-1.27-v20231220
amazon-eks-arm64-node-1.27-v20231220
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.201-191.748.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.26-v202312201.26.10-20231220s3://amazon-eks/1.26.10/2023-11-14/
amazon-eks-gpu-node-1.26-v20231220
amazon-eks-arm64-node-1.26-v20231220
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.201-191.748.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.25-v202312201.25.15-20231220s3://amazon-eks/1.25.15/2023-11-14/
amazon-eks-gpu-node-1.25-v20231220
amazon-eks-arm64-node-1.25-v20231220
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.201-191.748.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.24-v202312201.24.17-20231220s3://amazon-eks/1.24.17/2023-11-14/
amazon-eks-gpu-node-1.24-v20231220
amazon-eks-arm64-node-1.24-v20231220
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.3
kernel5.10.201-191.748.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.23-v202312201.23.17-20231220s3://amazon-eks/1.23.17/2023-11-14/
amazon-eks-gpu-node-1.23-v20231220
amazon-eks-arm64-node-1.23-v20231220
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.3
kernel5.4.261-174.360.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.25 and above: `5.10.192-183.736.amzn2` + +--- + + # AMI Release v20231201 From 569c6b146cbf28af71485a9f30171241643174c3 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 22 Dec 2023 07:08:26 -0800 Subject: [PATCH 595/621] Revert "Set containerd LimitNOFILE to recommended value (#1535)" (#1552) This reverts commit e0989539612fd6c56a32c129a185c988d790d16e. --- scripts/install-worker.sh | 6 ------ 1 file changed, 6 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 23a06e9da..e61ca9d21 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -202,12 +202,6 @@ cat << EOF | sudo tee /etc/systemd/system/containerd.service.d/10-compat-symlink ExecStartPre=/bin/ln -sf /run/containerd/containerd.sock /run/dockershim.sock EOF -cat << EOF | sudo tee /etc/systemd/system/containerd.service.d/20-limitnofile.conf -[Service] -# https://github.com/containerd/containerd/pull/8924 -LimitNOFILE=1024:524288 -EOF - cat << EOF | sudo tee -a /etc/modules-load.d/containerd.conf overlay br_netfilter From e4e596d0d410083f7c49b63e0cbdd48cd24a8bcc Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 3 Jan 2024 08:27:38 -0800 Subject: [PATCH 596/621] Update CHANGELOG.md for release v20231230 (#1555) Co-authored-by: GitHub --- CHANGELOG.md | 360 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 360 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index de8de2923..6835709c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,366 @@ +# AMI Release v20231230 + + +## What's Changed +* Revert "Set containerd LimitNOFILE to recommended value (#1535)" by @mmerkes in https://github.com/awslabs/amazon-eks-ami/pull/1552 + + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20231220...v20231230 + +--- + +

AMI Details

+ + +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.28-v202312301.28.3-20231230s3://amazon-eks/1.28.3/2023-11-14/
amazon-eks-gpu-node-1.28-v20231230
amazon-eks-arm64-node-1.28-v20231230
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.201-191.748.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.27-v202312301.27.7-20231230s3://amazon-eks/1.27.7/2023-11-14/
amazon-eks-gpu-node-1.27-v20231230
amazon-eks-arm64-node-1.27-v20231230
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.201-191.748.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.26-v202312301.26.10-20231230s3://amazon-eks/1.26.10/2023-11-14/
amazon-eks-gpu-node-1.26-v20231230
amazon-eks-arm64-node-1.26-v20231230
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.201-191.748.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.25-v202312301.25.15-20231230s3://amazon-eks/1.25.15/2023-11-14/
amazon-eks-gpu-node-1.25-v20231230
amazon-eks-arm64-node-1.25-v20231230
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.201-191.748.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.24-v202312301.24.17-20231230s3://amazon-eks/1.24.17/2023-11-14/
amazon-eks-gpu-node-1.24-v20231230
amazon-eks-arm64-node-1.24-v20231230
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.3
kernel5.10.201-191.748.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.23-v202312301.23.17-20231230s3://amazon-eks/1.23.17/2023-11-14/
amazon-eks-gpu-node-1.23-v20231230
amazon-eks-arm64-node-1.23-v20231230
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.3
kernel5.4.261-174.360.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.25 and above: `5.10.192-183.736.amzn2` + +--- + + # AMI Release v20231220 From bda21094d958c5f305fe4e70ff5457af8f7538cd Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 11 Jan 2024 23:50:02 -0800 Subject: [PATCH 597/621] Update CHANGELOG.md for release v20240110 (#1567) Co-authored-by: GitHub --- CHANGELOG.md | 358 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 358 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6835709c2..e8b109451 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,364 @@ +# AMI Release v20240110 + + + + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20231230...v20240110 + +--- + +

AMI Details

+ + +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.28-v202401101.28.5-20240110s3://amazon-eks/1.28.5/2024-01-04/
amazon-eks-gpu-node-1.28-v20240110
amazon-eks-arm64-node-1.28-v20240110
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.804.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.27-v202401101.27.9-20240110s3://amazon-eks/1.27.9/2024-01-04/
amazon-eks-gpu-node-1.27-v20240110
amazon-eks-arm64-node-1.27-v20240110
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.804.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.26-v202401101.26.12-20240110s3://amazon-eks/1.26.12/2024-01-04/
amazon-eks-gpu-node-1.26-v20240110
amazon-eks-arm64-node-1.26-v20240110
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.804.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.25-v202401101.25.16-20240110s3://amazon-eks/1.25.16/2024-01-04/
amazon-eks-gpu-node-1.25-v20240110
amazon-eks-arm64-node-1.25-v20240110
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.804.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.24-v202401101.24.17-20240110s3://amazon-eks/1.24.17/2024-01-04/
amazon-eks-gpu-node-1.24-v20240110
amazon-eks-arm64-node-1.24-v20240110
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.10.205-195.804.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.23-v202401101.23.17-20240110s3://amazon-eks/1.23.17/2024-01-04/
amazon-eks-gpu-node-1.23-v20240110
amazon-eks-arm64-node-1.23-v20240110
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.4.265-176.364.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.25 and above: `5.10.192-183.736.amzn2` + +--- + + # AMI Release v20231230 From 632a6ddb2e5b9fedd1f6cd21bd3ce7d274153f61 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 12 Jan 2024 14:59:33 -0800 Subject: [PATCH 598/621] Sync `al2023` branch to CodeCommit (#1571) --- .github/workflows/sync-to-codecommit.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/sync-to-codecommit.yaml b/.github/workflows/sync-to-codecommit.yaml index ebed3203c..fda159369 100644 --- a/.github/workflows/sync-to-codecommit.yaml +++ b/.github/workflows/sync-to-codecommit.yaml @@ -27,3 +27,5 @@ jobs: - run: git remote add codecommit ${{ secrets.AWS_CODECOMMIT_REPO_URL }} - run: git checkout master - run: git push codecommit master + - run: git checkout al2023 + - run: git push codecommit al2023 From 76fac7f983ada07552b31ce63ea166feca8f54cc Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 18 Jan 2024 20:56:15 -0800 Subject: [PATCH 599/621] Update CHANGELOG.md for release v20240117 (#1585) Co-authored-by: GitHub --- CHANGELOG.md | 416 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 416 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e8b109451..4b3a1998f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,422 @@ +# AMI Release v20240117 + + +## What's Changed +* Sync `al2023` branch to CodeCommit by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1571 + + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20240110...v20240117 + +--- + +

AMI Details

+ + +
+Kubernetes 1.29 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.29-v202401171.29.0-20240117s3://amazon-eks/1.29.0/2024-01-04/
amazon-eks-gpu-node-1.29-v20240117
amazon-eks-arm64-node-1.29-v20240117
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.804.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.28-v202401171.28.5-20240117s3://amazon-eks/1.28.5/2024-01-04/
amazon-eks-gpu-node-1.28-v20240117
amazon-eks-arm64-node-1.28-v20240117
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.804.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.27-v202401171.27.9-20240117s3://amazon-eks/1.27.9/2024-01-04/
amazon-eks-gpu-node-1.27-v20240117
amazon-eks-arm64-node-1.27-v20240117
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.804.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.26-v202401171.26.12-20240117s3://amazon-eks/1.26.12/2024-01-04/
amazon-eks-gpu-node-1.26-v20240117
amazon-eks-arm64-node-1.26-v20240117
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.804.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.25-v202401171.25.16-20240117s3://amazon-eks/1.25.16/2024-01-04/
amazon-eks-gpu-node-1.25-v20240117
amazon-eks-arm64-node-1.25-v20240117
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.804.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.24-v202401171.24.17-20240117s3://amazon-eks/1.24.17/2024-01-04/
amazon-eks-gpu-node-1.24-v20240117
amazon-eks-arm64-node-1.24-v20240117
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.10.205-195.804.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.23-v202401171.23.17-20240117s3://amazon-eks/1.23.17/2024-01-04/
amazon-eks-gpu-node-1.23-v20240117
amazon-eks-arm64-node-1.23-v20240117
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.4.265-176.364.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.25 and above: `5.10.192-183.736.amzn2` + +--- + + # AMI Release v20240110 From 616bdda5695ecd9be06ce435ef80daeaf23621ea Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 29 Jan 2024 13:45:25 -0800 Subject: [PATCH 600/621] Update example build command (#1598) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 49eb62c26..350d47f35 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ following command in the root of this repository: make # build an AMI with a specific Kubernetes version -make 1.25 +make k8s=1.29 ``` The Makefile chooses a particular kubelet binary to use per Kubernetes version which you can [view here](Makefile). From ea542d142e97cc2b6ebdd9b1170cd95424cb8e76 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 30 Jan 2024 12:55:01 -0800 Subject: [PATCH 601/621] Update CI to use k8s arg to make command (#1602) --- .github/actions/ci/build/action.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/ci/build/action.yaml b/.github/actions/ci/build/action.yaml index 5b91c3d0d..befdc6f7e 100644 --- a/.github/actions/ci/build/action.yaml +++ b/.github/actions/ci/build/action.yaml @@ -26,7 +26,7 @@ runs: run: | packer plugins install github.com/hashicorp/amazon AMI_NAME="amazon-eks-node-${{ inputs.k8s_version }}-${{ inputs.build_id }}" - make ${{ inputs.k8s_version }} ami_name=${AMI_NAME} ${{ inputs.additional_arguments }} + make k8s=${{ inputs.k8s_version }} ami_name=${AMI_NAME} ${{ inputs.additional_arguments }} echo "ami_id=$(jq -r .builds[0].artifact_id "${AMI_NAME}-manifest.json" | cut -d ':' -f 2)" >> $GITHUB_OUTPUT - uses: actions/upload-artifact@v3 with: From d9abb3cdb3630baed9e0805c44be8126fce60a59 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 31 Jan 2024 13:34:47 -0800 Subject: [PATCH 602/621] Update CHANGELOG.md for release v20240129 (#1607) Co-authored-by: GitHub --- CHANGELOG.md | 414 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 414 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b3a1998f..bbf3dde84 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,420 @@ +# AMI Release v20240129 + + + + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20240117...v20240129 + +--- + +

AMI Details

+ + +
+Kubernetes 1.29 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.29-v202401291.29.0-20240129s3://amazon-eks/1.29.0/2024-01-04/
amazon-eks-gpu-node-1.29-v20240129
amazon-eks-arm64-node-1.29-v20240129
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.28-v202401291.28.5-20240129s3://amazon-eks/1.28.5/2024-01-04/
amazon-eks-gpu-node-1.28-v20240129
amazon-eks-arm64-node-1.28-v20240129
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.27-v202401291.27.9-20240129s3://amazon-eks/1.27.9/2024-01-04/
amazon-eks-gpu-node-1.27-v20240129
amazon-eks-arm64-node-1.27-v20240129
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.26-v202401291.26.12-20240129s3://amazon-eks/1.26.12/2024-01-04/
amazon-eks-gpu-node-1.26-v20240129
amazon-eks-arm64-node-1.26-v20240129
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.25-v202401291.25.16-20240129s3://amazon-eks/1.25.16/2024-01-04/
amazon-eks-gpu-node-1.25-v20240129
amazon-eks-arm64-node-1.25-v20240129
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.24-v202401291.24.17-20240129s3://amazon-eks/1.24.17/2024-01-04/
amazon-eks-gpu-node-1.24-v20240129
amazon-eks-arm64-node-1.24-v20240129
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.23-v202401291.23.17-20240129s3://amazon-eks/1.23.17/2024-01-04/
amazon-eks-gpu-node-1.23-v20240129
amazon-eks-arm64-node-1.23-v20240129
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.4.266-178.365.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.25 and above: `5.10.192-183.736.amzn2` + +--- + + # AMI Release v20240117 From 824c55ec0cb6165ae1ba6df0195ba5ea259539a8 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 31 Jan 2024 13:45:35 -0800 Subject: [PATCH 603/621] Pull sandbox image periodically (#1601) --- files/bootstrap.sh | 3 +++ files/sandbox-image.timer | 9 +++++++++ scripts/install-worker.sh | 5 ++++- test/Dockerfile | 2 +- 4 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 files/sandbox-image.timer diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 42567a495..b133b55d9 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -575,10 +575,13 @@ if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then if ! cmp -s /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml; then sudo cp -v /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service + sudo cp -v /etc/eks/containerd/sandbox-image.timer /etc/systemd/system/sandbox-image.timer sudo chown root:root /etc/systemd/system/sandbox-image.service + sudo chown root:root /etc/systemd/system/sandbox-image.timer systemctl daemon-reload systemctl enable containerd sandbox-image systemctl restart sandbox-image containerd + systemctl enable --now sandbox-image.timer fi sudo cp -v /etc/eks/containerd/kubelet-containerd.service /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/kubelet.service diff --git a/files/sandbox-image.timer b/files/sandbox-image.timer new file mode 100644 index 000000000..7b4514f4f --- /dev/null +++ b/files/sandbox-image.timer @@ -0,0 +1,9 @@ +[Unit] +Description=Pulls the containerd sandbox image periodically + +[Timer] +OnUnitActiveSec=60 +Persistent=true + +[Install] +WantedBy=timers.target diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index e61ca9d21..15211211f 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -184,6 +184,7 @@ fi sudo mv $WORKING_DIR/kubelet-containerd.service /etc/eks/containerd/kubelet-containerd.service sudo mv $WORKING_DIR/sandbox-image.service /etc/eks/containerd/sandbox-image.service +sudo mv $WORKING_DIR/sandbox-image.timer /etc/eks/containerd/sandbox-image.timer sudo mv $WORKING_DIR/pull-sandbox-image.sh /etc/eks/containerd/pull-sandbox-image.sh sudo mv $WORKING_DIR/pull-image.sh /etc/eks/containerd/pull-image.sh sudo chmod +x /etc/eks/containerd/pull-sandbox-image.sh @@ -413,10 +414,12 @@ if [[ "$CACHE_CONTAINER_IMAGES" == "true" ]] && ! [[ ${ISOLATED_REGIONS} =~ $BIN cat /etc/eks/containerd/containerd-config.toml | sed s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g | sudo tee /etc/eks/containerd/containerd-cached-pause-config.toml sudo cp -v /etc/eks/containerd/containerd-cached-pause-config.toml /etc/containerd/config.toml sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service + sudo cp -v /etc/eks/containerd/sandbox-image.timer /etc/systemd/system/sandbox-image.timer sudo chown root:root /etc/systemd/system/sandbox-image.service + sudo chown root:root /etc/systemd/system/sandbox-image.timer sudo systemctl daemon-reload sudo systemctl start containerd - sudo systemctl enable containerd sandbox-image + sudo systemctl enable containerd sandbox-image sandbox-image.timer K8S_MINOR_VERSION=$(echo "${KUBERNETES_VERSION}" | cut -d'.' -f1-2) diff --git a/test/Dockerfile b/test/Dockerfile index d00837c3e..b4ba499c6 100644 --- a/test/Dockerfile +++ b/test/Dockerfile @@ -10,7 +10,7 @@ COPY --from=aemm /ec2-metadata-mock /sbin/ec2-metadata-mock RUN mkdir -p /etc/systemd/system RUN mkdir -p /etc/eks/containerd COPY files/ /etc/eks/ -COPY files/containerd-config.toml files/kubelet-containerd.service files/pull-sandbox-image.sh files/sandbox-image.service /etc/eks/containerd/ +COPY files/containerd-config.toml files/kubelet-containerd.service files/pull-sandbox-image.sh files/sandbox-image.service files/sandbox-image.timer /etc/eks/containerd/ COPY files/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json COPY files/kubelet-kubeconfig /var/lib/kubelet/kubeconfig COPY files/ecr-credential-provider-config.json /etc/eks/image-credential-provider/config.json From 6044c59bea1971754ec0ce8c002a9a01241787d8 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 1 Feb 2024 10:29:31 -0800 Subject: [PATCH 604/621] Revert "Pull sandbox image periodically (#1601)" (#1611) --- files/bootstrap.sh | 3 --- files/sandbox-image.timer | 9 --------- scripts/install-worker.sh | 5 +---- test/Dockerfile | 2 +- 4 files changed, 2 insertions(+), 17 deletions(-) delete mode 100644 files/sandbox-image.timer diff --git a/files/bootstrap.sh b/files/bootstrap.sh index b133b55d9..42567a495 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -575,13 +575,10 @@ if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then if ! cmp -s /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml; then sudo cp -v /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service - sudo cp -v /etc/eks/containerd/sandbox-image.timer /etc/systemd/system/sandbox-image.timer sudo chown root:root /etc/systemd/system/sandbox-image.service - sudo chown root:root /etc/systemd/system/sandbox-image.timer systemctl daemon-reload systemctl enable containerd sandbox-image systemctl restart sandbox-image containerd - systemctl enable --now sandbox-image.timer fi sudo cp -v /etc/eks/containerd/kubelet-containerd.service /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/kubelet.service diff --git a/files/sandbox-image.timer b/files/sandbox-image.timer deleted file mode 100644 index 7b4514f4f..000000000 --- a/files/sandbox-image.timer +++ /dev/null @@ -1,9 +0,0 @@ -[Unit] -Description=Pulls the containerd sandbox image periodically - -[Timer] -OnUnitActiveSec=60 -Persistent=true - -[Install] -WantedBy=timers.target diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 15211211f..e61ca9d21 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -184,7 +184,6 @@ fi sudo mv $WORKING_DIR/kubelet-containerd.service /etc/eks/containerd/kubelet-containerd.service sudo mv $WORKING_DIR/sandbox-image.service /etc/eks/containerd/sandbox-image.service -sudo mv $WORKING_DIR/sandbox-image.timer /etc/eks/containerd/sandbox-image.timer sudo mv $WORKING_DIR/pull-sandbox-image.sh /etc/eks/containerd/pull-sandbox-image.sh sudo mv $WORKING_DIR/pull-image.sh /etc/eks/containerd/pull-image.sh sudo chmod +x /etc/eks/containerd/pull-sandbox-image.sh @@ -414,12 +413,10 @@ if [[ "$CACHE_CONTAINER_IMAGES" == "true" ]] && ! [[ ${ISOLATED_REGIONS} =~ $BIN cat /etc/eks/containerd/containerd-config.toml | sed s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g | sudo tee /etc/eks/containerd/containerd-cached-pause-config.toml sudo cp -v /etc/eks/containerd/containerd-cached-pause-config.toml /etc/containerd/config.toml sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service - sudo cp -v /etc/eks/containerd/sandbox-image.timer /etc/systemd/system/sandbox-image.timer sudo chown root:root /etc/systemd/system/sandbox-image.service - sudo chown root:root /etc/systemd/system/sandbox-image.timer sudo systemctl daemon-reload sudo systemctl start containerd - sudo systemctl enable containerd sandbox-image sandbox-image.timer + sudo systemctl enable containerd sandbox-image K8S_MINOR_VERSION=$(echo "${KUBERNETES_VERSION}" | cut -d'.' -f1-2) diff --git a/test/Dockerfile b/test/Dockerfile index b4ba499c6..d00837c3e 100644 --- a/test/Dockerfile +++ b/test/Dockerfile @@ -10,7 +10,7 @@ COPY --from=aemm /ec2-metadata-mock /sbin/ec2-metadata-mock RUN mkdir -p /etc/systemd/system RUN mkdir -p /etc/eks/containerd COPY files/ /etc/eks/ -COPY files/containerd-config.toml files/kubelet-containerd.service files/pull-sandbox-image.sh files/sandbox-image.service files/sandbox-image.timer /etc/eks/containerd/ +COPY files/containerd-config.toml files/kubelet-containerd.service files/pull-sandbox-image.sh files/sandbox-image.service /etc/eks/containerd/ COPY files/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json COPY files/kubelet-kubeconfig /var/lib/kubelet/kubeconfig COPY files/ecr-credential-provider-config.json /etc/eks/image-credential-provider/config.json From 7fa037a5a3b39ffb1c050a85a29c9b3a64299ab5 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 1 Feb 2024 11:32:44 -0800 Subject: [PATCH 605/621] Use crictl to pull sandbox image (#1605) --- files/pull-sandbox-image.sh | 24 +++++++++++++++++++++++- scripts/install-worker.sh | 3 +++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/files/pull-sandbox-image.sh b/files/pull-sandbox-image.sh index e6484a962..b123b50aa 100644 --- a/files/pull-sandbox-image.sh +++ b/files/pull-sandbox-image.sh @@ -8,4 +8,26 @@ if [[ "$(sudo ctr --namespace k8s.io image ls | grep $sandbox_image)" != "" ]]; exit 0 fi -/etc/eks/containerd/pull-image.sh "${sandbox_image}" +MAX_RETRIES=3 + +function retry() { + local rc=0 + for attempt in $(seq 0 $MAX_RETRIES); do + rc=0 + [[ $attempt -gt 0 ]] && echo "Attempt $attempt of $MAX_RETRIES" 1>&2 + "$@" + rc=$? + [[ $rc -eq 0 ]] && break + [[ $attempt -eq $MAX_RETRIES ]] && exit $rc + local jitter=$((1 + RANDOM % 10)) + local sleep_sec="$(($((5 << $((1 + $attempt)))) + $jitter))" + sleep $sleep_sec + done +} + +ecr_password=$(retry aws ecr get-login-password) +if [[ -z ${ecr_password} ]]; then + echo >&2 "Unable to retrieve the ECR password." + exit 1 +fi +retry sudo crictl pull --creds "AWS:${ecr_password}" "${sandbox_image}" diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index e61ca9d21..8d93409de 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -174,6 +174,9 @@ sudo yum versionlock runc-* sudo yum install -y containerd-${CONTAINERD_VERSION} sudo yum versionlock containerd-* +# install cri-tools for crictl, needed to interact with containerd's CRI server +sudo yum install -y cri-tools + sudo mkdir -p /etc/eks/containerd if [ -f "/etc/eks/containerd/containerd-config.toml" ]; then ## this means we are building a gpu ami and have already placed a containerd configuration file in /etc/eks From 41dfa2217582a624a3cd582e5f0a93c25f951cad Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 2 Feb 2024 09:13:14 -0700 Subject: [PATCH 606/621] Remove sandbox image from build-time cache (#1615) --- scripts/install-worker.sh | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 8d93409de..fd58fa292 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -412,14 +412,9 @@ if [[ "$CACHE_CONTAINER_IMAGES" == "true" ]] && ! [[ ${ISOLATED_REGIONS} =~ $BIN AWS_DOMAIN=$(imds 'latest/meta-data/services/domain') ECR_URI=$(/etc/eks/get-ecr-uri.sh "${BINARY_BUCKET_REGION}" "${AWS_DOMAIN}") - PAUSE_CONTAINER="${ECR_URI}/eks/pause:${PAUSE_CONTAINER_VERSION}" - cat /etc/eks/containerd/containerd-config.toml | sed s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g | sudo tee /etc/eks/containerd/containerd-cached-pause-config.toml - sudo cp -v /etc/eks/containerd/containerd-cached-pause-config.toml /etc/containerd/config.toml - sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service - sudo chown root:root /etc/systemd/system/sandbox-image.service sudo systemctl daemon-reload sudo systemctl start containerd - sudo systemctl enable containerd sandbox-image + sudo systemctl enable containerd K8S_MINOR_VERSION=$(echo "${KUBERNETES_VERSION}" | cut -d'.' -f1-2) @@ -467,7 +462,6 @@ if [[ "$CACHE_CONTAINER_IMAGES" == "true" ]] && ! [[ ${ISOLATED_REGIONS} =~ $BIN fi CACHE_IMGS=( - "${PAUSE_CONTAINER}" ${KUBE_PROXY_IMGS[@]+"${KUBE_PROXY_IMGS[@]}"} ${VPC_CNI_IMGS[@]+"${VPC_CNI_IMGS[@]}"} ) From 7d6230e0228901000e306b4083726212253bdae7 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 2 Feb 2024 21:44:22 -0700 Subject: [PATCH 607/621] Update CHANGELOG.md for release v20240202 (#1622) Co-authored-by: GitHub --- CHANGELOG.md | 420 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 420 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bbf3dde84..887317e7e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,426 @@ +# AMI Release v20240202 + + +> [!NOTE] +> This release addresses an issue with Kubernetes 1.29 that allowed the sandbox container image used by `containerd` to be garbage-collected by `kubelet`. More information is available in #1597. + +## What's Changed +* Use crictl to pull sandbox image by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1605 +* Remove sandbox image from build-time cache by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1615 + + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20240129...v20240202 + +--- + +

AMI Details

+ + +
+Kubernetes 1.29 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.29-v202402021.29.0-20240202s3://amazon-eks/1.29.0/2024-01-04/
amazon-eks-gpu-node-1.29-v20240202
amazon-eks-arm64-node-1.29-v20240202
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.28-v202402021.28.5-20240202s3://amazon-eks/1.28.5/2024-01-04/
amazon-eks-gpu-node-1.28-v20240202
amazon-eks-arm64-node-1.28-v20240202
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.27-v202402021.27.9-20240202s3://amazon-eks/1.27.9/2024-01-04/
amazon-eks-gpu-node-1.27-v20240202
amazon-eks-arm64-node-1.27-v20240202
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.26-v202402021.26.12-20240202s3://amazon-eks/1.26.12/2024-01-04/
amazon-eks-gpu-node-1.26-v20240202
amazon-eks-arm64-node-1.26-v20240202
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.25-v202402021.25.16-20240202s3://amazon-eks/1.25.16/2024-01-04/
amazon-eks-gpu-node-1.25-v20240202
amazon-eks-arm64-node-1.25-v20240202
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.24-v202402021.24.17-20240202s3://amazon-eks/1.24.17/2024-01-04/
amazon-eks-gpu-node-1.24-v20240202
amazon-eks-arm64-node-1.24-v20240202
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.23-v202402021.23.17-20240202s3://amazon-eks/1.23.17/2024-01-04/
amazon-eks-gpu-node-1.23-v20240202
amazon-eks-arm64-node-1.23-v20240202
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.4.266-178.365.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.25 and above: `5.10.192-183.736.amzn2` + +--- + + # AMI Release v20240129 From 107df3fb9264e47cfd1e269ac5cc69ab4c4a8e3c Mon Sep 17 00:00:00 2001 From: Nick Baker Date: Mon, 5 Feb 2024 13:54:52 -0800 Subject: [PATCH 608/621] Specify region for local zones during sandbox image ECR auth (#1626) --- files/pull-sandbox-image.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/files/pull-sandbox-image.sh b/files/pull-sandbox-image.sh index b123b50aa..02b651a9e 100644 --- a/files/pull-sandbox-image.sh +++ b/files/pull-sandbox-image.sh @@ -8,6 +8,10 @@ if [[ "$(sudo ctr --namespace k8s.io image ls | grep $sandbox_image)" != "" ]]; exit 0 fi +# use the region that the sandbox image comes from for the ecr authentication, +# also mitigating the localzone isse: https://github.com/aws/aws-cli/issues/7043 +region=$(echo "${sandbox_image}" | cut -f4 -d ".") + MAX_RETRIES=3 function retry() { @@ -25,7 +29,7 @@ function retry() { done } -ecr_password=$(retry aws ecr get-login-password) +ecr_password=$(retry aws ecr get-login-password --region $region) if [[ -z ${ecr_password} ]]; then echo >&2 "Unable to retrieve the ECR password." exit 1 From baef6f0860f60dbec366de30853e47418e3fb430 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 9 Feb 2024 00:47:36 -0800 Subject: [PATCH 609/621] Fix CHANGELOG space errors (#1647) --- .github/workflows/update-changelog.yaml | 4 +- CHANGELOG.md | 830 ++++++++++++------------ 2 files changed, 418 insertions(+), 416 deletions(-) diff --git a/.github/workflows/update-changelog.yaml b/.github/workflows/update-changelog.yaml index aaffcc5d8..1b7c5680e 100644 --- a/.github/workflows/update-changelog.yaml +++ b/.github/workflows/update-changelog.yaml @@ -43,7 +43,9 @@ jobs: throw new Error(`changelog already includes ${release.data.name}`); } const newEntry = `# ${release.data.name}\n${release.data.body}`; - const updatedChangelog = changelog.replace(placeholder, placeholder + '\n\n' + newEntry + '\n---\n'); + let updatedChangelog = changelog.replace(placeholder, placeholder + '\n\n' + newEntry + '\n---\n'); + // if the release notes are modified in the GitHub web editor, trailing spaces can be added accidentally + updatedChangelog = updatedChangelog.replace(/\s+$/, ''); fs.writeFileSync(changelogPath, updatedChangelog); - uses: peter-evans/create-pull-request@v4 with: diff --git a/CHANGELOG.md b/CHANGELOG.md index 887317e7e..91505ef8a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,421 +3,421 @@ # AMI Release v20240202 - - -> [!NOTE] -> This release addresses an issue with Kubernetes 1.29 that allowed the sandbox container image used by `containerd` to be garbage-collected by `kubelet`. More information is available in #1597. - -## What's Changed -* Use crictl to pull sandbox image by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1605 -* Remove sandbox image from build-time cache by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1615 - - -**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20240129...v20240202 - ---- - -

AMI Details

- - -
-Kubernetes 1.29 - - - - - - - - - - - - - - - - - -
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.29-v202402021.29.0-20240202s3://amazon-eks/1.29.0/2024-01-04/
amazon-eks-gpu-node-1.29-v20240202
amazon-eks-arm64-node-1.29-v20240202
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
-
- -
-Kubernetes 1.28 - - - - - - - - - - - - - - - - - -
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.28-v202402021.28.5-20240202s3://amazon-eks/1.28.5/2024-01-04/
amazon-eks-gpu-node-1.28-v20240202
amazon-eks-arm64-node-1.28-v20240202
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
-
- -
-Kubernetes 1.27 - - - - - - - - - - - - - - - - - -
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.27-v202402021.27.9-20240202s3://amazon-eks/1.27.9/2024-01-04/
amazon-eks-gpu-node-1.27-v20240202
amazon-eks-arm64-node-1.27-v20240202
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
-
- -
-Kubernetes 1.26 - - - - - - - - - - - - - - - - - -
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.26-v202402021.26.12-20240202s3://amazon-eks/1.26.12/2024-01-04/
amazon-eks-gpu-node-1.26-v20240202
amazon-eks-arm64-node-1.26-v20240202
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
-
- -
-Kubernetes 1.25 - - - - - - - - - - - - - - - - - -
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.25-v202402021.25.16-20240202s3://amazon-eks/1.25.16/2024-01-04/
amazon-eks-gpu-node-1.25-v20240202
amazon-eks-arm64-node-1.25-v20240202
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
-
- -
-Kubernetes 1.24 - - - - - - - - - - - - - - - - - -
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.24-v202402021.24.17-20240202s3://amazon-eks/1.24.17/2024-01-04/
amazon-eks-gpu-node-1.24-v20240202
amazon-eks-arm64-node-1.24-v20240202
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
-
- -
-Kubernetes 1.23 - - - - - - - - - - - - - - - - - -
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.23-v202402021.23.17-20240202s3://amazon-eks/1.23.17/2024-01-04/
amazon-eks-gpu-node-1.23-v20240202
amazon-eks-arm64-node-1.23-v20240202
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.4.266-178.365.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
-
- - -> **Note** -> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. -> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: -> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` -> - Kubernetes 1.25 and above: `5.10.192-183.736.amzn2` + + +> [!NOTE] +> This release addresses an issue with Kubernetes 1.29 that allowed the sandbox container image used by `containerd` to be garbage-collected by `kubelet`. More information is available in #1597. + +## What's Changed +* Use crictl to pull sandbox image by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1605 +* Remove sandbox image from build-time cache by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1615 + + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20240129...v20240202 + +--- + +

AMI Details

+ + +
+Kubernetes 1.29 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.29-v202402021.29.0-20240202s3://amazon-eks/1.29.0/2024-01-04/
amazon-eks-gpu-node-1.29-v20240202
amazon-eks-arm64-node-1.29-v20240202
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.28-v202402021.28.5-20240202s3://amazon-eks/1.28.5/2024-01-04/
amazon-eks-gpu-node-1.28-v20240202
amazon-eks-arm64-node-1.28-v20240202
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.27-v202402021.27.9-20240202s3://amazon-eks/1.27.9/2024-01-04/
amazon-eks-gpu-node-1.27-v20240202
amazon-eks-arm64-node-1.27-v20240202
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.26-v202402021.26.12-20240202s3://amazon-eks/1.26.12/2024-01-04/
amazon-eks-gpu-node-1.26-v20240202
amazon-eks-arm64-node-1.26-v20240202
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.25-v202402021.25.16-20240202s3://amazon-eks/1.25.16/2024-01-04/
amazon-eks-gpu-node-1.25-v20240202
amazon-eks-arm64-node-1.25-v20240202
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.24-v202402021.24.17-20240202s3://amazon-eks/1.24.17/2024-01-04/
amazon-eks-gpu-node-1.24-v20240202
amazon-eks-arm64-node-1.24-v20240202
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.23-v202402021.23.17-20240202s3://amazon-eks/1.23.17/2024-01-04/
amazon-eks-gpu-node-1.23-v20240202
amazon-eks-arm64-node-1.23-v20240202
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.4.266-178.365.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.25 and above: `5.10.192-183.736.amzn2` --- From 976fe67e4c359737be71d892a8f55015cc1475f5 Mon Sep 17 00:00:00 2001 From: Nick Baker Date: Fri, 9 Feb 2024 16:33:55 -0800 Subject: [PATCH 610/621] harden pull-sandbox-image script (#1649) --- files/pull-sandbox-image.sh | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/files/pull-sandbox-image.sh b/files/pull-sandbox-image.sh index 02b651a9e..5610c09b1 100644 --- a/files/pull-sandbox-image.sh +++ b/files/pull-sandbox-image.sh @@ -1,10 +1,16 @@ #!/usr/bin/env bash -set -euo pipefail source <(grep "sandbox_image" /etc/containerd/config.toml | tr -d ' ') +### skip if we don't have a sandbox_image set in config.toml +if [[ -z ${sandbox_image:-} ]]; then + echo >&2 "Skipping ... missing sandbox_image from /etc/containerd/config.toml" + exit 0 +fi + ### Short-circuit fetching sandbox image if its already present -if [[ "$(sudo ctr --namespace k8s.io image ls | grep $sandbox_image)" != "" ]]; then +if [[ -n $(sudo ctr --namespace k8s.io image ls | grep "${sandbox_image}") ]]; then + echo >&2 "Skipping ... sandbox_image '${sandbox_image}' is already present" exit 0 fi @@ -29,9 +35,9 @@ function retry() { done } -ecr_password=$(retry aws ecr get-login-password --region $region) +# for public, non-ecr repositories even if this fails to get ECR credentials the image will pull +ecr_password=$(retry aws ecr get-login-password --region "${region}") if [[ -z ${ecr_password} ]]; then - echo >&2 "Unable to retrieve the ECR password." - exit 1 + echo >&2 "Unable to retrieve the ECR password. Image pull may not be properly authenticated." fi retry sudo crictl pull --creds "AWS:${ecr_password}" "${sandbox_image}" From 68d44aeb60fc001a260ad4d257adcbc40a78e809 Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 12 Feb 2024 11:55:19 -0800 Subject: [PATCH 611/621] Sync main branch to CodeCommit (#1654) --- .github/workflows/sync-to-codecommit.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/sync-to-codecommit.yaml b/.github/workflows/sync-to-codecommit.yaml index fda159369..a1748c4ca 100644 --- a/.github/workflows/sync-to-codecommit.yaml +++ b/.github/workflows/sync-to-codecommit.yaml @@ -27,5 +27,5 @@ jobs: - run: git remote add codecommit ${{ secrets.AWS_CODECOMMIT_REPO_URL }} - run: git checkout master - run: git push codecommit master - - run: git checkout al2023 - - run: git push codecommit al2023 + - run: git checkout main + - run: git push codecommit main From 5d9020bb1e9aca54901227a98a364deff1ada90d Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 12 Feb 2024 12:30:31 -0800 Subject: [PATCH 612/621] Add warning about default branch change (#1655) --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 350d47f35..e8ed05096 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,13 @@ # Amazon EKS AMI Build Specification +## ⚠️ The default branch of this repository is changing! + +Development will continue on `main`. The default branch of this repository will be changed to `main` on **February 29, 2024**. The `master` branch will be deleted on **March 30, 2024**. + +This change coincides with a reorganization of the project sources. You may continue using the `master` branch as you update your downstream dependencies, but you'll need to explicitly check out the `master` branch after February 29, 2024. + +--- + This repository contains resources and configuration scripts for building a custom Amazon EKS AMI with [HashiCorp Packer](https://www.packer.io/). This is the same configuration that Amazon EKS uses to create the official Amazon From e988b53449984d3438306a26ddaac6408ae3a4f1 Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 12 Feb 2024 14:25:25 -0800 Subject: [PATCH 613/621] Change docs branch to main (#1657) --- .github/workflows/deploy-docs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy-docs.yaml b/.github/workflows/deploy-docs.yaml index 30328b76a..33ee10d43 100644 --- a/.github/workflows/deploy-docs.yaml +++ b/.github/workflows/deploy-docs.yaml @@ -3,7 +3,7 @@ on: workflow_dispatch: push: branches: - - 'master' + - 'main' jobs: mkdocs: permissions: From 8bb63f545e74e00eb3a2b919f147ad5cfce51725 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 12 Feb 2024 20:05:12 -0800 Subject: [PATCH 614/621] Update CHANGELOG.md for release v20240209 (#1660) Co-authored-by: GitHub --- CHANGELOG.md | 419 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 418 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 91505ef8a..b4925eb17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,423 @@ +# AMI Release v20240209 + + +## What's Changed +* Specify region for local zones in sandbox image ecr auth by @ndbaker1 in https://github.com/awslabs/amazon-eks-ami/pull/1626 +* Fix CHANGELOG space errors by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1647 + + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20240202...v20240209 + +--- + +

AMI Details

+ + +
+Kubernetes 1.29 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.29-v202402091.29.0-20240209s3://amazon-eks/1.29.0/2024-01-04/
amazon-eks-gpu-node-1.29-v20240209
amazon-eks-arm64-node-1.29-v20240209
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.209-198.812.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.28-v202402091.28.5-20240209s3://amazon-eks/1.28.5/2024-01-04/
amazon-eks-gpu-node-1.28-v20240209
amazon-eks-arm64-node-1.28-v20240209
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.209-198.812.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.27-v202402091.27.9-20240209s3://amazon-eks/1.27.9/2024-01-04/
amazon-eks-gpu-node-1.27-v20240209
amazon-eks-arm64-node-1.27-v20240209
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.209-198.812.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.26-v202402091.26.12-20240209s3://amazon-eks/1.26.12/2024-01-04/
amazon-eks-gpu-node-1.26-v20240209
amazon-eks-arm64-node-1.26-v20240209
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.209-198.812.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.25-v202402091.25.16-20240209s3://amazon-eks/1.25.16/2024-01-04/
amazon-eks-gpu-node-1.25-v20240209
amazon-eks-arm64-node-1.25-v20240209
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.209-198.812.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.24-v202402091.24.17-20240209s3://amazon-eks/1.24.17/2024-01-04/
amazon-eks-gpu-node-1.24-v20240209
amazon-eks-arm64-node-1.24-v20240209
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.10.209-198.812.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.23-v202402091.23.17-20240209s3://amazon-eks/1.23.17/2024-01-04/
amazon-eks-gpu-node-1.23-v20240209
amazon-eks-arm64-node-1.23-v20240209
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.4.268-181.368.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.25 and above: `5.10.192-183.736.amzn2` + +--- + + # AMI Release v20240202 @@ -6971,4 +7388,4 @@ Note: CNI >= 1.2.1 is required for t3 and r5 instance support. * EKS Launch AMI - + \ No newline at end of file From 202ea4c304b8ac95fbc2db67be46c8d7e48f754c Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 14 Feb 2024 09:20:29 -0800 Subject: [PATCH 615/621] Remove documentation workflow from master (#1664) --- .github/workflows/deploy-docs.yaml | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 .github/workflows/deploy-docs.yaml diff --git a/.github/workflows/deploy-docs.yaml b/.github/workflows/deploy-docs.yaml deleted file mode 100644 index 33ee10d43..000000000 --- a/.github/workflows/deploy-docs.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: Deploy documentation -on: - workflow_dispatch: - push: - branches: - - 'main' -jobs: - mkdocs: - permissions: - contents: write - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - run: pip install mkdocs mkdocs-material - - run: mkdocs gh-deploy --strict --no-history --force From 5c6d0e05c648e37371e10d904648fd4a4b79d14d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 19 Feb 2024 10:29:35 -0800 Subject: [PATCH 616/621] Update CHANGELOG.md for release v20240213 (#1668) Co-authored-by: GitHub --- CHANGELOG.md | 684 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 684 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b4925eb17..ba6c94172 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,690 @@ +# AMI Release v20240213 + + +## What's Changed +* harden pull-sandbox-image script by @ndbaker1 in https://github.com/awslabs/amazon-eks-ami/pull/1649 +* Merge `al2023` to `main` by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1653 +* Switch branch for dependency review by @Issacwww in https://github.com/awslabs/amazon-eks-ami/pull/1659 + + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20240209...v20240213 + +--- + +> [!NOTE] +> The notes on the [Releases](https://github.com/awslabs/amazon-eks-ami/releases) page may be truncated, and you may not see all supported Kubernetes versions. +> The full release notes can be viewed [here](https://github.com/awslabs/amazon-eks-ami/releases/tag/v20240213). +> More information is in #1666. + +

AMI Details

+ + +
+Kubernetes 1.29 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.29-v202402131.29.0-20240213s3://amazon-eks/1.29.0/2024-01-04/
amazon-eks-node-1.29-v20240213
amazon-eks-node-al2023-arm64-standard-1.29-v20240213
amazon-eks-arm64-node-1.29-v20240213
amazon-eks-gpu-node-1.29-v20240213
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
docker20.10.25-1.amzn2.0.4
efa2.6.0-1.amzn2
kernel5.10.209-198.812.amzn25.10.192-183.736.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.1705.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.75-99.163.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.28-v202402131.28.5-20240213s3://amazon-eks/1.28.5/2024-01-04/
amazon-eks-node-1.28-v20240213
amazon-eks-node-al2023-arm64-standard-1.28-v20240213
amazon-eks-arm64-node-1.28-v20240213
amazon-eks-gpu-node-1.28-v20240213
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
docker20.10.25-1.amzn2.0.4
efa2.6.0-1.amzn2
kernel5.10.209-198.812.amzn25.10.192-183.736.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.1705.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.75-99.163.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.27-v202402131.27.9-20240213s3://amazon-eks/1.27.9/2024-01-04/
amazon-eks-node-1.27-v20240213
amazon-eks-node-al2023-arm64-standard-1.27-v20240213
amazon-eks-arm64-node-1.27-v20240213
amazon-eks-gpu-node-1.27-v20240213
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
docker20.10.25-1.amzn2.0.4
efa2.6.0-1.amzn2
kernel5.10.209-198.812.amzn25.10.192-183.736.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.1705.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.75-99.163.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.26-v202402131.26.12-20240213s3://amazon-eks/1.26.12/2024-01-04/
amazon-eks-node-1.26-v20240213
amazon-eks-node-al2023-arm64-standard-1.26-v20240213
amazon-eks-arm64-node-1.26-v20240213
amazon-eks-gpu-node-1.26-v20240213
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
docker20.10.25-1.amzn2.0.4
efa2.6.0-1.amzn2
kernel5.10.209-198.812.amzn25.10.192-183.736.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.1705.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.75-99.163.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.25-v202402131.25.16-20240213s3://amazon-eks/1.25.16/2024-01-04/
amazon-eks-node-1.25-v20240213
amazon-eks-node-al2023-arm64-standard-1.25-v20240213
amazon-eks-arm64-node-1.25-v20240213
amazon-eks-gpu-node-1.25-v20240213
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
docker20.10.25-1.amzn2.0.4
efa2.6.0-1.amzn2
kernel5.10.209-198.812.amzn25.10.192-183.736.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.1705.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.75-99.163.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.24-v202402131.24.17-20240213s3://amazon-eks/1.24.17/2024-01-04/
amazon-eks-node-1.24-v20240213
amazon-eks-node-al2023-arm64-standard-1.24-v20240213
amazon-eks-arm64-node-1.24-v20240213
amazon-eks-gpu-node-1.24-v20240213
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.10.209-198.812.amzn25.4.254-170.358.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.1705.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.75-99.163.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.23-v202402131.23.17-20240213s3://amazon-eks/1.23.17/2024-01-04/
amazon-eks-node-1.23-v20240213
amazon-eks-node-al2023-arm64-standard-1.23-v20240213
amazon-eks-arm64-node-1.23-v20240213
amazon-eks-gpu-node-1.23-v20240213
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.4.268-181.368.amzn25.4.254-170.358.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.1705.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.75-99.163.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.25 and above: `5.10.192-183.736.amzn2` + +--- + + # AMI Release v20240209 From 8d7b5f89f511ef018905c8e24a6c1917e3b8bbdb Mon Sep 17 00:00:00 2001 From: Jay Deokar <23660509+jaydeokar@users.noreply.github.com> Date: Thu, 22 Feb 2024 12:46:45 -0800 Subject: [PATCH 617/621] Collect Network Policy ebpf data from log collector script (#1595) --- log-collector-script/linux/eks-log-collector.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 6c4c03528..062468dd1 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -279,6 +279,7 @@ collect() { get_networking_info get_cni_config get_cni_configuration_variables + get_network_policy_ebpf_info get_docker_logs get_sandboxImage_info get_cpu_throttled_processes @@ -509,6 +510,18 @@ get_sysctls_info() { ok } +get_network_policy_ebpf_info() { + try "collect network policy ebpf loaded data" + echo "*** EBPF loaded data ***" >> "${COLLECT_DIR}"/networking/ebpf-data.txt + LOADED_EBPF=$(/opt/cni/bin/aws-eks-na-cli ebpf loaded-ebpfdata | tee -a "${COLLECT_DIR}"/networking/ebpf-data.txt) + + for mapid in $(echo "$LOADED_EBPF" | grep "Map ID:" | sed 's/Map ID: \+//' | sort | uniq); do + echo "*** EBPF Maps Data for Map ID $mapid ***" >> "${COLLECT_DIR}"/networking/ebpf-maps-data.txt + /opt/cni/bin/aws-eks-na-cli ebpf dump-maps $mapid >> "${COLLECT_DIR}"/networking/ebpf-maps-data.txt + done + ok +} + get_networking_info() { try "collect networking infomation" From b72281cb04bb895c63cbc5646cee6f99489c304f Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 29 Feb 2024 12:42:40 -0800 Subject: [PATCH 618/621] Update warning about branch migration (#1695) --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e8ed05096..403a0d1be 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # Amazon EKS AMI Build Specification -## ⚠️ The default branch of this repository is changing! +## This branch will be deleted on **March 30, 2024**! -Development will continue on `main`. The default branch of this repository will be changed to `main` on **February 29, 2024**. The `master` branch will be deleted on **March 30, 2024**. +The default branch of this repository has changed to `main`. This change coincides with a reorganization of the project sources. You may continue using the `master` branch as you update your downstream dependencies, but you'll need to explicitly check out the `master` branch after February 29, 2024. From 69a35707693a56310b23b864ae955b88075abe0e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 29 Feb 2024 18:43:08 -0800 Subject: [PATCH 619/621] Update CHANGELOG.md for release v20240227 (#1694) Co-authored-by: GitHub --- CHANGELOG.md | 698 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 698 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ba6c94172..448d3b598 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,704 @@ +# AMI Release v20240227 + + +> [!NOTE] +> This release includes changes in the Kubernetes 1.29 GPU AMI to address a compatibility issue with the EFA and NVIDIA kernel modules. More information is available in https://github.com/awslabs/amazon-eks-ami/issues/1494. + +## What's Changed +* Allow `containerd` config imports by @ndbaker1 in https://github.com/awslabs/amazon-eks-ami/pull/1630 +* cleanup al2023 templates by @ndbaker1 in https://github.com/awslabs/amazon-eks-ami/pull/1682 +* Do not prepare local disks by default by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1686 +* Add InstanceOptions with LocalDiskStrategy by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1688 +* Remove setup-local-disks unit from al2023 template by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1691 + + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20240213...v20240227 + +--- + +

AMI Details

+ + +
+Kubernetes 1.29 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.29-v202402271.29.0-20240227s3://amazon-eks/1.29.0/2024-01-04/
amazon-eks-node-1.29-v20240227
amazon-eks-node-al2023-arm64-standard-1.29-v20240227
amazon-eks-arm64-node-1.29-v20240227
amazon-eks-gpu-node-1.29-v20240227
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.2222.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda-12-212.2.2-1
efa2.6.0-1.amzn2
kernel5.10.209-198.858.amzn2
nvidia-driver-latest-dkms535.161.07-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.2222.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.77-99.164.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.28-v202402271.28.5-20240227s3://amazon-eks/1.28.5/2024-01-04/
amazon-eks-node-1.28-v20240227
amazon-eks-node-al2023-arm64-standard-1.28-v20240227
amazon-eks-arm64-node-1.28-v20240227
amazon-eks-gpu-node-1.28-v20240227
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.2222.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
cuda-12-212.2.2-1
docker20.10.25-1.amzn2.0.4
efa2.6.0-1.amzn2
kernel5.10.209-198.858.amzn25.10.192-183.736.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.2222.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.77-99.164.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.27-v202402271.27.9-20240227s3://amazon-eks/1.27.9/2024-01-04/
amazon-eks-node-1.27-v20240227
amazon-eks-node-al2023-arm64-standard-1.27-v20240227
amazon-eks-arm64-node-1.27-v20240227
amazon-eks-gpu-node-1.27-v20240227
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.2222.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
cuda-12-212.2.2-1
docker20.10.25-1.amzn2.0.4
efa2.6.0-1.amzn2
kernel5.10.209-198.858.amzn25.10.192-183.736.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.2222.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.77-99.164.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.26-v202402271.26.12-20240227s3://amazon-eks/1.26.12/2024-01-04/
amazon-eks-node-1.26-v20240227
amazon-eks-node-al2023-arm64-standard-1.26-v20240227
amazon-eks-arm64-node-1.26-v20240227
amazon-eks-gpu-node-1.26-v20240227
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.2222.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
cuda-12-212.2.2-1
docker20.10.25-1.amzn2.0.4
efa2.6.0-1.amzn2
kernel5.10.209-198.858.amzn25.10.192-183.736.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.2222.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.77-99.164.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.25-v202402271.25.16-20240227s3://amazon-eks/1.25.16/2024-01-04/
amazon-eks-node-1.25-v20240227
amazon-eks-node-al2023-arm64-standard-1.25-v20240227
amazon-eks-arm64-node-1.25-v20240227
amazon-eks-gpu-node-1.25-v20240227
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.2222.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
cuda-12-212.2.2-1
docker20.10.25-1.amzn2.0.4
efa2.6.0-1.amzn2
kernel5.10.209-198.858.amzn25.10.192-183.736.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.2222.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.77-99.164.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.24-v202402271.24.17-20240227s3://amazon-eks/1.24.17/2024-01-04/
amazon-eks-node-1.24-v20240227
amazon-eks-node-al2023-arm64-standard-1.24-v20240227
amazon-eks-arm64-node-1.24-v20240227
amazon-eks-gpu-node-1.24-v20240227
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.2222.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.10.209-198.858.amzn25.4.254-170.358.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.2222.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.77-99.164.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.23-v202402271.23.17-20240227s3://amazon-eks/1.23.17/2024-01-04/
amazon-eks-node-1.23-v20240227
amazon-eks-node-al2023-arm64-standard-1.23-v20240227
amazon-eks-arm64-node-1.23-v20240227
amazon-eks-gpu-node-1.23-v20240227
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.2222.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.4.268-181.370.amzn25.4.254-170.358.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.2222.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.77-99.164.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.25-1.28: `5.10.192-183.736.amzn2` + +--- + + # AMI Release v20240213 From af90671048464ba352fa26560cd9e17de154cc0e Mon Sep 17 00:00:00 2001 From: "Jon \"The Nice Guy\" Spriggs" Date: Wed, 6 Mar 2024 18:40:43 +0000 Subject: [PATCH 620/621] Update eks-log-collector.sh to support iptables-legacy (#1427) --- .../linux/eks-log-collector.sh | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 062468dd1..ba2e81d73 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -268,6 +268,7 @@ collect() { get_mounts_info get_selinux_info get_iptables_info + get_iptables_legacy_info get_pkglist get_system_services get_containerd_info @@ -328,7 +329,7 @@ get_selinux_info() { get_iptables_info() { if ! command -v iptables > /dev/null 2>&1; then - echo "IPtables not installed" | tee -a iptables.txt + echo "IPtables not installed" | tee -a "${COLLECT_DIR}"/iptables.txt else try "collect iptables information" iptables --wait 1 --numeric --verbose --list --table mangle | tee "${COLLECT_DIR}"/networking/iptables-mangle.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables-mangle.txt @@ -341,6 +342,21 @@ get_iptables_info() { ok } +get_iptables_legacy_info() { + if ! command -v iptables-legacy > /dev/null 2>&1; then + echo "IPtables-legacy not installed" | tee -a "${COLLECT_DIR}"/iptables-legacy.txt + else + try "collect iptables-legacy information" + iptables-legacy --wait 1 --numeric --verbose --list --table mangle | tee "${COLLECT_DIR}"/networking/iptables-legacy-mangle.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables-legacy-mangle.txt + iptables-legacy --wait 1 --numeric --verbose --list --table filter | tee "${COLLECT_DIR}"/networking/iptables-legacy-filter.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables-legacy-filter.txt + iptables-legacy --wait 1 --numeric --verbose --list --table nat | tee "${COLLECT_DIR}"/networking/iptables-legacy-nat.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables-legacy-nat.txt + iptables-legacy --wait 1 --numeric --verbose --list | tee "${COLLECT_DIR}"/networking/iptables-legacy.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables-legacy.txt + iptables-legacy-save > "${COLLECT_DIR}"/networking/iptables-legacy-save.txt + fi + + ok +} + get_common_logs() { try "collect common operating system logs" From 73c18148437c84a54c9df7d6d66608f5d92bff38 Mon Sep 17 00:00:00 2001 From: Hirotaka Tagawa / wafuwafu13 Date: Wed, 6 Mar 2024 18:46:29 +0000 Subject: [PATCH 621/621] fix(log-collector): add `ok` to `get_modinfo` and `get_ipamd_info` func (#1624) --- log-collector-script/linux/eks-log-collector.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index ba2e81d73..72f6caf22 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -416,6 +416,8 @@ get_kernel_info() { get_modinfo() { try "collect modinfo" modinfo lustre > "${COLLECT_DIR}/modinfo/lustre" + + ok } get_docker_logs() { @@ -498,6 +500,8 @@ get_ipamd_info() { echo "Ignoring IPAM introspection stats as mentioned" | tee -a "${COLLECT_DIR}"/ipamd/ipam_introspection_ignore.txt fi + ok + if [[ "${ignore_metrics}" == "false" ]]; then try "collect L-IPAMD prometheus metrics" curl --max-time 3 --silent http://localhost:61678/metrics > "${COLLECT_DIR}"/ipamd/metrics.json 2>&1 @@ -505,6 +509,8 @@ get_ipamd_info() { echo "Ignoring Prometheus Metrics collection as mentioned" | tee -a "${COLLECT_DIR}"/ipamd/ipam_metrics_ignore.txt fi + ok + try "collect L-IPAMD checkpoint" cp /var/run/aws-node/ipam.json "${COLLECT_DIR}"/ipamd/ipam.json