diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 000000000..deaf311fc --- /dev/null +++ b/.editorconfig @@ -0,0 +1,11 @@ +# ensure that these rules are equivalent to the flags to shfmt in the Makefile. +# we can't use this file with shfmt directly because there's no way to express +# shebang matching on files without the `sh` extension. +[*.sh] +indent_style = space +indent_size = 2 +binary_next_line = true +switch_case_indent = true +space_redirects = true +keep_padding = false +function_next_line = false diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 000000000..d04cc330c --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,4 @@ +# Applied code style rules to shell files +6014c4e6872a23f82ca295afa93b033207042876 +# Addressed space errors +bde408b340d992aad39e13de1aaf929f358f4338 diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index e779d6499..360447696 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,8 +1,14 @@ -*Issue #, if available:* +**Issue #, if available:** -*Description of changes:* +**Description of changes:** By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. + +**Testing Done** + + + +*[See this guide for recommended testing for PRs.](../doc/CONTRIBUTING.md#testing-changes) Some tests may not apply. Completing tests and providing additional validation steps are not required, but it is recommended and may reduce review time and time to merge.* diff --git a/.github/actions/bot/.gitignore b/.github/actions/bot/.gitignore new file mode 100644 index 000000000..c2658d7d1 --- /dev/null +++ b/.github/actions/bot/.gitignore @@ -0,0 +1 @@ +node_modules/ diff --git a/.github/actions/bot/README.md b/.github/actions/bot/README.md new file mode 100644 index 000000000..526846d91 --- /dev/null +++ b/.github/actions/bot/README.md @@ -0,0 +1,21 @@ +# bot + +This GitHub Action parses commands from pull request comments and executes them. + +Only authorized users (members and owners of this repository) are able to execute commands. + +Commands look like `/COMMAND ARGS`, for example: +``` +/echo hello world +``` + +Multiple commands can be included in a comment, one per line; but each command must be unique. + +Some commands accept additional, named arguments specified on subsequent lines. +Named arguments look like `+NAME ARGS`, for example: +``` +/ci launch ++build cache_container_images=true +``` + +Multiple named arguments can be specified. \ No newline at end of file diff --git a/.github/actions/bot/action.yaml b/.github/actions/bot/action.yaml new file mode 100644 index 000000000..dfb471a30 --- /dev/null +++ b/.github/actions/bot/action.yaml @@ -0,0 +1,13 @@ +name: "Bot" +description: "🤖 beep boop" +runs: + using: "composite" + steps: + - uses: "actions/checkout@v3" + - uses: "actions/github-script@v6" + with: + script: | + const crypto = require('crypto'); + const uuid = crypto.randomUUID(); + const bot = require('./.github/actions/bot/index.js'); + await bot(core, github, context, uuid); \ No newline at end of file diff --git a/.github/actions/bot/index.js b/.github/actions/bot/index.js new file mode 100644 index 000000000..c24398f6d --- /dev/null +++ b/.github/actions/bot/index.js @@ -0,0 +1,213 @@ +// this script cannot require/import, because it's called by actions/github-script. +// any dependencies must be passed in the inline script in action.yaml + +async function bot(core, github, context, uuid) { + const payload = context.payload; + + if (!payload.comment) { + console.log("No comment found in payload"); + return; + } + console.log("Comment found in payload"); + + // user's org membership must be public for the author_association to be MEMBER + // go to the org's member page, find yourself, and set the visibility to public + const author = payload.comment.user.login; + const authorized = ["OWNER", "MEMBER"].includes(payload.comment.author_association); + if (!authorized) { + console.log(`Comment author is not authorized: ${author}`); + return; + } + console.log(`Comment author is authorized: ${author}`); + + let commands; + try { + commands = parseCommands(uuid, payload, payload.comment.body); + } catch (error) { + console.log(error); + const reply = `@${author} I didn't understand [that](${payload.comment.html_url})! 🤔\n\nTake a look at my [logs](${getBotWorkflowURL(payload, context)}).` + replyToCommand(github, payload, reply); + return; + } + if (commands.length === 0) { + console.log("No commands found in comment body"); + return; + } + const uniqueCommands = [...new Set(commands.map(command => typeof command))]; + if (uniqueCommands.length != commands.length) { + replyToCommand(github, payload, `@${author} you can't use the same command more than once! 🙅`); + return; + } + console.log(commands.length + " command(s) found in comment body"); + + for (const command of commands) { + const reply = await command.run(author, github); + if (typeof reply === 'string') { + replyToCommand(github, payload, reply); + } else if (reply) { + console.log(`Command returned: ${reply}`); + } else { + console.log("Command did not return a reply"); + } + } +} + +// replyToCommand creates a comment on the same PR that triggered this workflow +function replyToCommand(github, payload, reply) { + github.rest.issues.createComment({ + owner: payload.repository.owner.login, + repo: payload.repository.name, + issue_number: payload.issue.number, + body: reply + }); +} + +// getBotWorkflowURL returns an HTML URL for this workflow execution of the bot +function getBotWorkflowURL(payload, context) { + return `https://github.com/${payload.repository.owner.login}/${payload.repository.name}/actions/runs/${context.runId}`; +} + +// parseCommands splits the comment body into lines and parses each line as a command or named arguments to the previous command. +function parseCommands(uuid, payload, commentBody) { + const commands = []; + if (!commentBody) { + return commands; + } + const lines = commentBody.split(/\r?\n/); + for (const line of lines) { + console.log(`Parsing line: ${line}`); + const command = parseCommand(uuid, payload, line); + if (command) { + commands.push(command); + } else { + const namedArguments = parseNamedArguments(line); + if (namedArguments) { + const previousCommand = commands.at(-1); + if (previousCommand) { + if (typeof previousCommand.addNamedArguments === 'function') { + previousCommand.addNamedArguments(namedArguments.name, namedArguments.args); + } else { + throw new Error(`Parsed named arguments but previous command (${previousCommand.constructor.name}) does not support arguments: ${JSON.stringify(namedArguments)}`); + } + } else { + // don't treat this as an error, because the named argument syntax might just be someone '+1'-ing. + console.log(`Parsed named arguments with no previous command: ${JSON.stringify(namedArguments)}`); + } + } + } + } + return commands +} + +// parseCommand parses a line as a command. +// The format of a command is `/NAME ARGS...`. +// Leading and trailing spaces are ignored. +function parseCommand(uuid, payload, line) { + const command = line.trim().match(/^\/([a-z\-]+)(?:\s+(.+))?$/); + if (command) { + return buildCommand(uuid, payload, command[1], command[2]); + } + return null; +} + +// buildCommand builds a command from a name and arguments. +function buildCommand(uuid, payload, name, args) { + switch (name) { + case "echo": + return new EchoCommand(uuid, payload, args); + case "ci": + return new CICommand(uuid, payload, args); + default: + console.log(`Unknown command: ${name}`); + return null; + } +} + +// parseNamedArgument parses a line as named arguments. +// The format of a command is `+NAME ARGS...`. +// Leading and trailing spaces are ignored. +function parseNamedArguments(line) { + const parsed = line.trim().match(/^\+([a-z\-]+)(?:\s+(.+))?$/); + if (parsed) { + return { + name: parsed[1], + args: parsed[2] + } + } + return null; +} + +class EchoCommand { + constructor(uuid, payload, args) { + this.phrase = args ? args : "echo"; + } + + run(author) { + return `@${author} *${this.phrase}*`; + } +} + +class CICommand { + constructor(uuid, payload, args) { + this.repository_owner = payload.repository.owner.login; + this.repository_name = payload.repository.name; + this.pr_number = payload.issue.number; + this.comment_url = payload.comment.html_url; + this.uuid = uuid; + this.goal = "test"; + // "test" goal, which executes all CI stages, is the default when no goal is specified + if (args != null && args != "") { + this.goal = args; + } + this.goal_args = {}; + } + + addNamedArguments(goal, args) { + this.goal_args[goal] = args; + } + + async run(author, github) { + const pr = await github.rest.pulls.get({ + owner: this.repository_owner, + repo: this.repository_name, + pull_number: this.pr_number + }); + const mergeable = pr.data.mergeable; + switch (mergeable) { + case true: + break; + case false: + case null: + return `@${author} this PR is not currently mergeable, you'll need to rebase it first.`; + default: + throw new Error(`Unknown mergeable value: ${mergeable}`); + } + const inputs = { + uuid: this.uuid, + pr_number: this.pr_number.toString(), + git_sha: pr.data.merge_commit_sha, + goal: this.goal, + requester: author, + comment_url: this.comment_url + }; + for (const [goal, args] of Object.entries(this.goal_args)) { + inputs[`${goal}_arguments`] = args; + } + console.log(`Dispatching workflow with inputs: ${JSON.stringify(inputs)}`); + await github.rest.actions.createWorkflowDispatch({ + owner: this.repository_owner, + repo: this.repository_name, + workflow_id: 'ci-manual.yaml', + ref: 'master', + inputs: inputs + }); + return null; + } +} + + +module.exports = async (core, github, context, uuid) => { + bot(core, github, context, uuid).catch((error) => { + core.setFailed(error); + }); +} diff --git a/.github/actions/bot/package-lock.json b/.github/actions/bot/package-lock.json new file mode 100644 index 000000000..333a0db57 --- /dev/null +++ b/.github/actions/bot/package-lock.json @@ -0,0 +1,430 @@ +{ + "name": "bot", + "version": "1.0.0", + "lockfileVersion": 2, + "requires": true, + "packages": { + "": { + "name": "bot", + "version": "1.0.0", + "dependencies": { + "@actions/core": "^1.10.0", + "@actions/github": "^5.1.1" + } + }, + "node_modules/@actions/core": { + "version": "1.10.0", + "resolved": "https://registry.npmjs.org/@actions/core/-/core-1.10.0.tgz", + "integrity": "sha512-2aZDDa3zrrZbP5ZYg159sNoLRb61nQ7awl5pSvIq5Qpj81vwDzdMRKzkWJGJuwVvWpvZKx7vspJALyvaaIQyug==", + "dependencies": { + "@actions/http-client": "^2.0.1", + "uuid": "^8.3.2" + } + }, + "node_modules/@actions/github": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/@actions/github/-/github-5.1.1.tgz", + "integrity": "sha512-Nk59rMDoJaV+mHCOJPXuvB1zIbomlKS0dmSIqPGxd0enAXBnOfn4VWF+CGtRCwXZG9Epa54tZA7VIRlJDS8A6g==", + "dependencies": { + "@actions/http-client": "^2.0.1", + "@octokit/core": "^3.6.0", + "@octokit/plugin-paginate-rest": "^2.17.0", + "@octokit/plugin-rest-endpoint-methods": "^5.13.0" + } + }, + "node_modules/@actions/http-client": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/@actions/http-client/-/http-client-2.1.1.tgz", + "integrity": "sha512-qhrkRMB40bbbLo7gF+0vu+X+UawOvQQqNAA/5Unx774RS8poaOhThDOG6BGmxvAnxhQnDp2BG/ZUm65xZILTpw==", + "dependencies": { + "tunnel": "^0.0.6" + } + }, + "node_modules/@octokit/auth-token": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/@octokit/auth-token/-/auth-token-2.5.0.tgz", + "integrity": "sha512-r5FVUJCOLl19AxiuZD2VRZ/ORjp/4IN98Of6YJoJOkY75CIBuYfmiNHGrDwXr+aLGG55igl9QrxX3hbiXlLb+g==", + "dependencies": { + "@octokit/types": "^6.0.3" + } + }, + "node_modules/@octokit/core": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@octokit/core/-/core-3.6.0.tgz", + "integrity": "sha512-7RKRKuA4xTjMhY+eG3jthb3hlZCsOwg3rztWh75Xc+ShDWOfDDATWbeZpAHBNRpm4Tv9WgBMOy1zEJYXG6NJ7Q==", + "dependencies": { + "@octokit/auth-token": "^2.4.4", + "@octokit/graphql": "^4.5.8", + "@octokit/request": "^5.6.3", + "@octokit/request-error": "^2.0.5", + "@octokit/types": "^6.0.3", + "before-after-hook": "^2.2.0", + "universal-user-agent": "^6.0.0" + } + }, + "node_modules/@octokit/endpoint": { + "version": "6.0.12", + "resolved": "https://registry.npmjs.org/@octokit/endpoint/-/endpoint-6.0.12.tgz", + "integrity": "sha512-lF3puPwkQWGfkMClXb4k/eUT/nZKQfxinRWJrdZaJO85Dqwo/G0yOC434Jr2ojwafWJMYqFGFa5ms4jJUgujdA==", + "dependencies": { + "@octokit/types": "^6.0.3", + "is-plain-object": "^5.0.0", + "universal-user-agent": "^6.0.0" + } + }, + "node_modules/@octokit/graphql": { + "version": "4.8.0", + "resolved": "https://registry.npmjs.org/@octokit/graphql/-/graphql-4.8.0.tgz", + "integrity": "sha512-0gv+qLSBLKF0z8TKaSKTsS39scVKF9dbMxJpj3U0vC7wjNWFuIpL/z76Qe2fiuCbDRcJSavkXsVtMS6/dtQQsg==", + "dependencies": { + "@octokit/request": "^5.6.0", + "@octokit/types": "^6.0.3", + "universal-user-agent": "^6.0.0" + } + }, + "node_modules/@octokit/openapi-types": { + "version": "12.11.0", + "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-12.11.0.tgz", + "integrity": "sha512-VsXyi8peyRq9PqIz/tpqiL2w3w80OgVMwBHltTml3LmVvXiphgeqmY9mvBw9Wu7e0QWk/fqD37ux8yP5uVekyQ==" + }, + "node_modules/@octokit/plugin-paginate-rest": { + "version": "2.21.3", + "resolved": "https://registry.npmjs.org/@octokit/plugin-paginate-rest/-/plugin-paginate-rest-2.21.3.tgz", + "integrity": "sha512-aCZTEf0y2h3OLbrgKkrfFdjRL6eSOo8komneVQJnYecAxIej7Bafor2xhuDJOIFau4pk0i/P28/XgtbyPF0ZHw==", + "dependencies": { + "@octokit/types": "^6.40.0" + }, + "peerDependencies": { + "@octokit/core": ">=2" + } + }, + "node_modules/@octokit/plugin-rest-endpoint-methods": { + "version": "5.16.2", + "resolved": "https://registry.npmjs.org/@octokit/plugin-rest-endpoint-methods/-/plugin-rest-endpoint-methods-5.16.2.tgz", + "integrity": "sha512-8QFz29Fg5jDuTPXVtey05BLm7OB+M8fnvE64RNegzX7U+5NUXcOcnpTIK0YfSHBg8gYd0oxIq3IZTe9SfPZiRw==", + "dependencies": { + "@octokit/types": "^6.39.0", + "deprecation": "^2.3.1" + }, + "peerDependencies": { + "@octokit/core": ">=3" + } + }, + "node_modules/@octokit/request": { + "version": "5.6.3", + "resolved": "https://registry.npmjs.org/@octokit/request/-/request-5.6.3.tgz", + "integrity": "sha512-bFJl0I1KVc9jYTe9tdGGpAMPy32dLBXXo1dS/YwSCTL/2nd9XeHsY616RE3HPXDVk+a+dBuzyz5YdlXwcDTr2A==", + "dependencies": { + "@octokit/endpoint": "^6.0.1", + "@octokit/request-error": "^2.1.0", + "@octokit/types": "^6.16.1", + "is-plain-object": "^5.0.0", + "node-fetch": "^2.6.7", + "universal-user-agent": "^6.0.0" + } + }, + "node_modules/@octokit/request-error": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/@octokit/request-error/-/request-error-2.1.0.tgz", + "integrity": "sha512-1VIvgXxs9WHSjicsRwq8PlR2LR2x6DwsJAaFgzdi0JfJoGSO8mYI/cHJQ+9FbN21aa+DrgNLnwObmyeSC8Rmpg==", + "dependencies": { + "@octokit/types": "^6.0.3", + "deprecation": "^2.0.0", + "once": "^1.4.0" + } + }, + "node_modules/@octokit/types": { + "version": "6.41.0", + "resolved": "https://registry.npmjs.org/@octokit/types/-/types-6.41.0.tgz", + "integrity": "sha512-eJ2jbzjdijiL3B4PrSQaSjuF2sPEQPVCPzBvTHJD9Nz+9dw2SGH4K4xeQJ77YfTq5bRQ+bD8wT11JbeDPmxmGg==", + "dependencies": { + "@octokit/openapi-types": "^12.11.0" + } + }, + "node_modules/before-after-hook": { + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/before-after-hook/-/before-after-hook-2.2.3.tgz", + "integrity": "sha512-NzUnlZexiaH/46WDhANlyR2bXRopNg4F/zuSA3OpZnllCUgRaOF2znDioDWrmbNVsuZk6l9pMquQB38cfBZwkQ==" + }, + "node_modules/deprecation": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/deprecation/-/deprecation-2.3.1.tgz", + "integrity": "sha512-xmHIy4F3scKVwMsQ4WnVaS8bHOx0DmVwRywosKhaILI0ywMDWPtBSku2HNxRvF7jtwDRsoEwYQSfbxj8b7RlJQ==" + }, + "node_modules/is-plain-object": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-5.0.0.tgz", + "integrity": "sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/node-fetch": { + "version": "2.6.13", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.13.tgz", + "integrity": "sha512-StxNAxh15zr77QvvkmveSQ8uCQ4+v5FkvNTj0OESmiHu+VRi/gXArXtkWMElOsOUNLtUEvI4yS+rdtOHZTwlQA==", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "dependencies": { + "wrappy": "1" + } + }, + "node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" + }, + "node_modules/tunnel": { + "version": "0.0.6", + "resolved": "https://registry.npmjs.org/tunnel/-/tunnel-0.0.6.tgz", + "integrity": "sha512-1h/Lnq9yajKY2PEbBadPXj3VxsDDu844OnaAo52UVmIzIvwwtBPIuNvkjuzBlTWpfJyUbG3ez0KSBibQkj4ojg==", + "engines": { + "node": ">=0.6.11 <=0.7.0 || >=0.7.3" + } + }, + "node_modules/universal-user-agent": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/universal-user-agent/-/universal-user-agent-6.0.0.tgz", + "integrity": "sha512-isyNax3wXoKaulPDZWHQqbmIx1k2tb9fb3GGDBRxCscfYV2Ch7WxPArBsFEG8s/safwXTT7H4QGhaIkTp9447w==" + }, + "node_modules/uuid": { + "version": "8.3.2", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz", + "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==", + "bin": { + "uuid": "dist/bin/uuid" + } + }, + "node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==" + }, + "node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" + } + }, + "dependencies": { + "@actions/core": { + "version": "1.10.0", + "resolved": "https://registry.npmjs.org/@actions/core/-/core-1.10.0.tgz", + "integrity": "sha512-2aZDDa3zrrZbP5ZYg159sNoLRb61nQ7awl5pSvIq5Qpj81vwDzdMRKzkWJGJuwVvWpvZKx7vspJALyvaaIQyug==", + "requires": { + "@actions/http-client": "^2.0.1", + "uuid": "^8.3.2" + } + }, + "@actions/github": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/@actions/github/-/github-5.1.1.tgz", + "integrity": "sha512-Nk59rMDoJaV+mHCOJPXuvB1zIbomlKS0dmSIqPGxd0enAXBnOfn4VWF+CGtRCwXZG9Epa54tZA7VIRlJDS8A6g==", + "requires": { + "@actions/http-client": "^2.0.1", + "@octokit/core": "^3.6.0", + "@octokit/plugin-paginate-rest": "^2.17.0", + "@octokit/plugin-rest-endpoint-methods": "^5.13.0" + } + }, + "@actions/http-client": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/@actions/http-client/-/http-client-2.1.1.tgz", + "integrity": "sha512-qhrkRMB40bbbLo7gF+0vu+X+UawOvQQqNAA/5Unx774RS8poaOhThDOG6BGmxvAnxhQnDp2BG/ZUm65xZILTpw==", + "requires": { + "tunnel": "^0.0.6" + } + }, + "@octokit/auth-token": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/@octokit/auth-token/-/auth-token-2.5.0.tgz", + "integrity": "sha512-r5FVUJCOLl19AxiuZD2VRZ/ORjp/4IN98Of6YJoJOkY75CIBuYfmiNHGrDwXr+aLGG55igl9QrxX3hbiXlLb+g==", + "requires": { + "@octokit/types": "^6.0.3" + } + }, + "@octokit/core": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@octokit/core/-/core-3.6.0.tgz", + "integrity": "sha512-7RKRKuA4xTjMhY+eG3jthb3hlZCsOwg3rztWh75Xc+ShDWOfDDATWbeZpAHBNRpm4Tv9WgBMOy1zEJYXG6NJ7Q==", + "requires": { + "@octokit/auth-token": "^2.4.4", + "@octokit/graphql": "^4.5.8", + "@octokit/request": "^5.6.3", + "@octokit/request-error": "^2.0.5", + "@octokit/types": "^6.0.3", + "before-after-hook": "^2.2.0", + "universal-user-agent": "^6.0.0" + } + }, + "@octokit/endpoint": { + "version": "6.0.12", + "resolved": "https://registry.npmjs.org/@octokit/endpoint/-/endpoint-6.0.12.tgz", + "integrity": "sha512-lF3puPwkQWGfkMClXb4k/eUT/nZKQfxinRWJrdZaJO85Dqwo/G0yOC434Jr2ojwafWJMYqFGFa5ms4jJUgujdA==", + "requires": { + "@octokit/types": "^6.0.3", + "is-plain-object": "^5.0.0", + "universal-user-agent": "^6.0.0" + } + }, + "@octokit/graphql": { + "version": "4.8.0", + "resolved": "https://registry.npmjs.org/@octokit/graphql/-/graphql-4.8.0.tgz", + "integrity": "sha512-0gv+qLSBLKF0z8TKaSKTsS39scVKF9dbMxJpj3U0vC7wjNWFuIpL/z76Qe2fiuCbDRcJSavkXsVtMS6/dtQQsg==", + "requires": { + "@octokit/request": "^5.6.0", + "@octokit/types": "^6.0.3", + "universal-user-agent": "^6.0.0" + } + }, + "@octokit/openapi-types": { + "version": "12.11.0", + "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-12.11.0.tgz", + "integrity": "sha512-VsXyi8peyRq9PqIz/tpqiL2w3w80OgVMwBHltTml3LmVvXiphgeqmY9mvBw9Wu7e0QWk/fqD37ux8yP5uVekyQ==" + }, + "@octokit/plugin-paginate-rest": { + "version": "2.21.3", + "resolved": "https://registry.npmjs.org/@octokit/plugin-paginate-rest/-/plugin-paginate-rest-2.21.3.tgz", + "integrity": "sha512-aCZTEf0y2h3OLbrgKkrfFdjRL6eSOo8komneVQJnYecAxIej7Bafor2xhuDJOIFau4pk0i/P28/XgtbyPF0ZHw==", + "requires": { + "@octokit/types": "^6.40.0" + } + }, + "@octokit/plugin-rest-endpoint-methods": { + "version": "5.16.2", + "resolved": "https://registry.npmjs.org/@octokit/plugin-rest-endpoint-methods/-/plugin-rest-endpoint-methods-5.16.2.tgz", + "integrity": "sha512-8QFz29Fg5jDuTPXVtey05BLm7OB+M8fnvE64RNegzX7U+5NUXcOcnpTIK0YfSHBg8gYd0oxIq3IZTe9SfPZiRw==", + "requires": { + "@octokit/types": "^6.39.0", + "deprecation": "^2.3.1" + } + }, + "@octokit/request": { + "version": "5.6.3", + "resolved": "https://registry.npmjs.org/@octokit/request/-/request-5.6.3.tgz", + "integrity": "sha512-bFJl0I1KVc9jYTe9tdGGpAMPy32dLBXXo1dS/YwSCTL/2nd9XeHsY616RE3HPXDVk+a+dBuzyz5YdlXwcDTr2A==", + "requires": { + "@octokit/endpoint": "^6.0.1", + "@octokit/request-error": "^2.1.0", + "@octokit/types": "^6.16.1", + "is-plain-object": "^5.0.0", + "node-fetch": "^2.6.7", + "universal-user-agent": "^6.0.0" + } + }, + "@octokit/request-error": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/@octokit/request-error/-/request-error-2.1.0.tgz", + "integrity": "sha512-1VIvgXxs9WHSjicsRwq8PlR2LR2x6DwsJAaFgzdi0JfJoGSO8mYI/cHJQ+9FbN21aa+DrgNLnwObmyeSC8Rmpg==", + "requires": { + "@octokit/types": "^6.0.3", + "deprecation": "^2.0.0", + "once": "^1.4.0" + } + }, + "@octokit/types": { + "version": "6.41.0", + "resolved": "https://registry.npmjs.org/@octokit/types/-/types-6.41.0.tgz", + "integrity": "sha512-eJ2jbzjdijiL3B4PrSQaSjuF2sPEQPVCPzBvTHJD9Nz+9dw2SGH4K4xeQJ77YfTq5bRQ+bD8wT11JbeDPmxmGg==", + "requires": { + "@octokit/openapi-types": "^12.11.0" + } + }, + "before-after-hook": { + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/before-after-hook/-/before-after-hook-2.2.3.tgz", + "integrity": "sha512-NzUnlZexiaH/46WDhANlyR2bXRopNg4F/zuSA3OpZnllCUgRaOF2znDioDWrmbNVsuZk6l9pMquQB38cfBZwkQ==" + }, + "deprecation": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/deprecation/-/deprecation-2.3.1.tgz", + "integrity": "sha512-xmHIy4F3scKVwMsQ4WnVaS8bHOx0DmVwRywosKhaILI0ywMDWPtBSku2HNxRvF7jtwDRsoEwYQSfbxj8b7RlJQ==" + }, + "is-plain-object": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-5.0.0.tgz", + "integrity": "sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==" + }, + "node-fetch": { + "version": "2.6.13", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.13.tgz", + "integrity": "sha512-StxNAxh15zr77QvvkmveSQ8uCQ4+v5FkvNTj0OESmiHu+VRi/gXArXtkWMElOsOUNLtUEvI4yS+rdtOHZTwlQA==", + "requires": { + "whatwg-url": "^5.0.0" + } + }, + "once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "requires": { + "wrappy": "1" + } + }, + "tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" + }, + "tunnel": { + "version": "0.0.6", + "resolved": "https://registry.npmjs.org/tunnel/-/tunnel-0.0.6.tgz", + "integrity": "sha512-1h/Lnq9yajKY2PEbBadPXj3VxsDDu844OnaAo52UVmIzIvwwtBPIuNvkjuzBlTWpfJyUbG3ez0KSBibQkj4ojg==" + }, + "universal-user-agent": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/universal-user-agent/-/universal-user-agent-6.0.0.tgz", + "integrity": "sha512-isyNax3wXoKaulPDZWHQqbmIx1k2tb9fb3GGDBRxCscfYV2Ch7WxPArBsFEG8s/safwXTT7H4QGhaIkTp9447w==" + }, + "uuid": { + "version": "8.3.2", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz", + "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==" + }, + "webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==" + }, + "whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "requires": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, + "wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" + } + } +} diff --git a/.github/actions/bot/package.json b/.github/actions/bot/package.json new file mode 100644 index 000000000..0c3a320e9 --- /dev/null +++ b/.github/actions/bot/package.json @@ -0,0 +1,13 @@ +{ + "name": "bot", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "command": "./local-harness.js $@" + }, + "dependencies": { + "@actions/core": "^1.10.0", + "@actions/github": "^5.1.1" + } +} diff --git a/.github/actions/ci/build/action.yaml b/.github/actions/ci/build/action.yaml new file mode 100644 index 000000000..befdc6f7e --- /dev/null +++ b/.github/actions/ci/build/action.yaml @@ -0,0 +1,34 @@ +name: "[CI] Build" +inputs: + git_sha: + required: true + type: string + build_id: + required: true + type: string + k8s_version: + required: true + type: string + additional_arguments: + required: false + type: string +outputs: + ami_id: + value: ${{ steps.build.outputs.ami_id }} +runs: + using: "composite" + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.git_sha }} + - id: build + shell: bash + run: | + packer plugins install github.com/hashicorp/amazon + AMI_NAME="amazon-eks-node-${{ inputs.k8s_version }}-${{ inputs.build_id }}" + make k8s=${{ inputs.k8s_version }} ami_name=${AMI_NAME} ${{ inputs.additional_arguments }} + echo "ami_id=$(jq -r .builds[0].artifact_id "${AMI_NAME}-manifest.json" | cut -d ':' -f 2)" >> $GITHUB_OUTPUT + - uses: actions/upload-artifact@v3 + with: + name: version-info + path: "*-version-info.json" diff --git a/.github/actions/ci/launch/action.yaml b/.github/actions/ci/launch/action.yaml new file mode 100644 index 000000000..c5e6303b8 --- /dev/null +++ b/.github/actions/ci/launch/action.yaml @@ -0,0 +1,52 @@ +name: '[CI] Integration test / Launch' +inputs: + build_id: + required: true + type: string + ami_id: + required: true + type: string + k8s_version: + required: true + type: string + aws_region: + required: true + type: string +outputs: + cluster_name: + value: ${{ steps.launch.outputs.cluster_name }} +runs: + using: "composite" + steps: + - id: launch + shell: bash + run: | + wget --no-verbose -O eksctl.tar.gz "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz" + tar xf eksctl.tar.gz && chmod +x ./eksctl + + SANITIZED_K8S_VERSION=$(echo ${{ inputs.k8s_version }} | tr -d '.') + CLUSTER_NAME="$SANITIZED_K8S_VERSION-${{ inputs.build_id }}" + + echo '--- + apiVersion: eksctl.io/v1alpha5 + kind: ClusterConfig + metadata: + name: "'$CLUSTER_NAME'" + region: "${{ inputs.aws_region }}" + version: "${{ inputs.k8s_version }}" + nodeGroups: + - name: "${{ inputs.build_id }}" + instanceType: m5.large + minSize: 3 + maxSize: 3 + desiredCapacity: 3 + ami: "${{ inputs.ami_id }}" + amiFamily: AmazonLinux2 + overrideBootstrapCommand: | + #!/bin/bash + source /var/lib/cloud/scripts/eksctl/bootstrap.helper.sh + /etc/eks/bootstrap.sh "'$CLUSTER_NAME'" --kubelet-extra-args "--node-labels=${NODE_LABELS}"' >> cluster.yaml + cat cluster.yaml + + ./eksctl create cluster --config-file cluster.yaml + echo "cluster_name=$CLUSTER_NAME" >> $GITHUB_OUTPUT diff --git a/.github/actions/ci/sonobuoy/action.yaml b/.github/actions/ci/sonobuoy/action.yaml new file mode 100644 index 000000000..e829719b9 --- /dev/null +++ b/.github/actions/ci/sonobuoy/action.yaml @@ -0,0 +1,15 @@ +name: '[CI] Integration test / Sonobuoy' +inputs: + cluster_name: + required: true + type: string +runs: + using: "composite" + steps: + - shell: bash + run: | + aws eks update-kubeconfig --name ${{ inputs.cluster_name }} + wget --no-verbose -O sonobuoy.tar.gz "https://github.com/vmware-tanzu/sonobuoy/releases/download/v0.56.11/sonobuoy_0.56.11_linux_amd64.tar.gz" + tar xf sonobuoy.tar.gz && chmod +x ./sonobuoy + ./sonobuoy run --wait + ./sonobuoy results $(./sonobuoy retrieve) diff --git a/.github/actions/janitor/ami-sweeper/action.yaml b/.github/actions/janitor/ami-sweeper/action.yaml new file mode 100644 index 000000000..e7735cc32 --- /dev/null +++ b/.github/actions/janitor/ami-sweeper/action.yaml @@ -0,0 +1,13 @@ +name: "[Janitor] AMI sweeper" +description: "🗑️ Deletes CI AMI's when they're no longer needed" +inputs: + max_age_seconds: + description: "Number of seconds after creation when an AMI becomes eligible for deletion" + required: true +runs: + using: "composite" + steps: + - run: ${{ github.action_path }}/script.sh + shell: bash + env: + MAX_AGE_SECONDS: ${{ inputs.max_age_seconds }} diff --git a/.github/actions/janitor/ami-sweeper/script.sh b/.github/actions/janitor/ami-sweeper/script.sh new file mode 100755 index 000000000..f20e6005a --- /dev/null +++ b/.github/actions/janitor/ami-sweeper/script.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +set -o errexit +set -o pipefail + +MAX_AGE_SECONDS=${MAX_AGE_SECONDS:-$1} +if [ -z "${MAX_AGE_SECONDS}" ]; then + echo "usage: $0 MAX_AGE_SECONDS" + exit 1 +fi + +set -o nounset + +# https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-retries.html +AWS_RETRY_MODE=standard +AWS_MAX_ATTEMPTS=5 + +function jqb64() { + if [ "$#" -lt 2 ]; then + echo "usage: jqb64 BASE64_JSON JQ_ARGS..." + exit 1 + fi + BASE64_JSON="$1" + shift + echo "$BASE64_JSON" | base64 --decode | jq "$@" +} +for IMAGE_DETAILS in $(aws ec2 describe-images --owners self --output json | jq -r '.Images[] | @base64'); do + NAME=$(jqb64 "$IMAGE_DETAILS" -r '.Name') + IMAGE_ID=$(jqb64 "$IMAGE_DETAILS" -r '.ImageId') + CREATION_DATE=$(jqb64 "$IMAGE_DETAILS" -r '.CreationDate') + CREATION_DATE_SECONDS=$(date -d "$CREATION_DATE" '+%s') + CURRENT_TIME_SECONDS=$(date '+%s') + MIN_CREATION_DATE_SECONDS=$(($CURRENT_TIME_SECONDS - $MAX_AGE_SECONDS)) + if [ "$CREATION_DATE_SECONDS" -lt "$MIN_CREATION_DATE_SECONDS" ]; then + aws ec2 deregister-image --image-id "$IMAGE_ID" + for SNAPSHOT_ID in $(jqb64 "$IMAGE_DETAILS" -r '.BlockDeviceMappings[].Ebs.SnapshotId'); do + aws ec2 delete-snapshot --snapshot-id "$SNAPSHOT_ID" + done + echo "Deleted $IMAGE_ID: $NAME" + fi +done diff --git a/.github/actions/janitor/cluster-sweeper/action.yaml b/.github/actions/janitor/cluster-sweeper/action.yaml new file mode 100644 index 000000000..e53de27d1 --- /dev/null +++ b/.github/actions/janitor/cluster-sweeper/action.yaml @@ -0,0 +1,13 @@ +name: "[Janitor] Cluster sweeper" +description: "🗑️ Deletes CI clusters when they're no longer needed" +inputs: + max_age_seconds: + description: "Number of seconds after creation when a cluster becomes eligible for deletion" + required: true +runs: + using: "composite" + steps: + - run: ${{ github.action_path }}/script.sh + shell: bash + env: + MAX_AGE_SECONDS: ${{ inputs.max_age_seconds }} diff --git a/.github/actions/janitor/cluster-sweeper/script.sh b/.github/actions/janitor/cluster-sweeper/script.sh new file mode 100755 index 000000000..57a20759d --- /dev/null +++ b/.github/actions/janitor/cluster-sweeper/script.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +set -o errexit +set -o pipefail + +MAX_AGE_SECONDS=${MAX_AGE_SECONDS:-$1} +if [ -z "${MAX_AGE_SECONDS}" ]; then + echo "usage: $0 MAX_AGE_SECONDS" + exit 1 +fi + +set -o nounset + +# https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-retries.html +AWS_RETRY_MODE=standard +AWS_MAX_ATTEMPTS=5 + +function iso8601_is_eligible_for_deletion() { + local TIME_IN_ISO8601="$1" + local TIME_IN_SECONDS=$(date -d "$TIME_IN_ISO8601" '+%s') + local CURRENT_TIME_IN_SECONDS=$(date '+%s') + MIN_TIME_SECONDS=$(($CURRENT_TIME_IN_SECONDS - $MAX_AGE_SECONDS)) + [ "$TIME_IN_SECONDS" -lt "$MIN_TIME_SECONDS" ] +} +function cluster_is_eligible_for_deletion() { + local CLUSTER_NAME="$1" + local CREATED_AT_ISO8601=$(aws eks describe-cluster --name $CLUSTER_NAME --query 'cluster.createdAt' --output text) + iso8601_is_eligible_for_deletion "$CREATED_AT_ISO8601" +} +wget --no-verbose -O eksctl.tar.gz "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz" +tar xf eksctl.tar.gz && chmod +x ./eksctl +for CLUSTER in $(aws eks list-clusters --query 'clusters[]' --output text); do + if cluster_is_eligible_for_deletion $CLUSTER; then + echo "Deleting cluster $CLUSTER" + ./eksctl delete cluster --name "$CLUSTER" --force --disable-nodegroup-eviction + fi +done diff --git a/.github/release.yaml b/.github/release.yaml new file mode 100644 index 000000000..5fbdeeba5 --- /dev/null +++ b/.github/release.yaml @@ -0,0 +1,5 @@ +--- +changelog: + exclude: + labels: + - "changelog/exclude" diff --git a/.github/workflows/bot-trigger.yaml b/.github/workflows/bot-trigger.yaml new file mode 100644 index 000000000..d728d4f10 --- /dev/null +++ b/.github/workflows/bot-trigger.yaml @@ -0,0 +1,14 @@ +name: Bot +run-name: 🤖 beep boop +on: + issue_comment: + types: + - created +jobs: + bot: + if: ${{ github.event.issue.pull_request }} + runs-on: ubuntu-latest + permissions: write-all + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/bot diff --git a/.github/workflows/ci-auto.yaml b/.github/workflows/ci-auto.yaml new file mode 100644 index 000000000..879ba2bb3 --- /dev/null +++ b/.github/workflows/ci-auto.yaml @@ -0,0 +1,20 @@ +name: "[CI] Auto" +on: + pull_request: + types: + - opened + - reopened + - synchronize +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - run: echo "$(go env GOPATH)/bin" >> $GITHUB_PATH + - run: go install mvdan.cc/sh/v3/cmd/shfmt@latest + - run: make lint + unit-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - run: make test diff --git a/.github/workflows/ci-manual.yaml b/.github/workflows/ci-manual.yaml new file mode 100644 index 000000000..2860b75c7 --- /dev/null +++ b/.github/workflows/ci-manual.yaml @@ -0,0 +1,186 @@ +name: '[CI] Manual' +run-name: "#${{ inputs.pr_number }} - ${{ inputs.uuid }}" +on: + workflow_dispatch: + inputs: + requester: + required: true + type: string + comment_url: + required: true + type: string + uuid: + required: true + type: string + pr_number: + required: true + type: string + git_sha: + required: true + type: string + goal: + required: true + type: choice + default: "test" + options: + - "build" + - "launch" + - "test" + build_arguments: + required: false + type: string +jobs: + setup: + runs-on: ubuntu-latest + outputs: + git_sha_short: ${{ steps.variables.outputs.git_sha_short }} + workflow_run_url: ${{ steps.variables.outputs.workflow_run_url }} + kubernetes_versions: ${{ steps.variables.outputs.kubernetes_versions }} + build_id: ${{ steps.variables.outputs.build_id }} + ci_step_name_prefix: ${{ steps.variables.outputs.ci_step_name_prefix }} + steps: + - id: variables + run: | + echo "git_sha_short=$(echo ${{ inputs.git_sha }} | rev | cut -c-7 | rev)" >> $GITHUB_OUTPUT + echo "workflow_run_url=https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" >> $GITHUB_OUTPUT + # grab supported versions directly from eksctl + wget --no-verbose -O eksctl.tar.gz "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz" + tar xzf eksctl.tar.gz && chmod +x ./eksctl + echo "kubernetes_versions=$(./eksctl version --output json | jq -c .EKSServerSupportedVersions)" >> $GITHUB_OUTPUT + echo "build_id=ci-${{ inputs.pr_number }}-${{ needs.setup.outputs.git_sha_short }}-${{ inputs.uuid }}" >> $GITHUB_OUTPUT + echo 'ci_step_name_prefix=CI:' >> $GITHUB_OUTPUT + + notify-start: + runs-on: ubuntu-latest + needs: + - setup + steps: + - uses: actions/github-script@v6 + with: + script: | + github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: ${{ inputs.pr_number }}, + body: `@${{ inputs.requester }} roger [that](${{ inputs.comment_url }})! I've dispatched a [workflow](${{ needs.setup.outputs.workflow_run_url }}). 👍` + }); + kubernetes-versions: + runs-on: ubuntu-latest + name: ${{ matrix.k8s_version }} + needs: + - setup + - notify-start + permissions: + id-token: write + contents: read + strategy: + # don't bail out of all sub-tasks if one fails + fail-fast: false + matrix: + k8s_version: ${{ fromJson(needs.setup.outputs.kubernetes_versions) }} + steps: + - uses: actions/checkout@v3 + with: + ref: 'master' + - uses: aws-actions/configure-aws-credentials@v2 + with: + aws-region: ${{ secrets.AWS_REGION }} + role-to-assume: ${{ secrets.AWS_ROLE_ARN_CI }} + # 2.5 hours (job usually completes within 2 hours) + role-duration-seconds: 9000 + - name: "${{ needs.setup.outputs.ci_step_name_prefix }} Build" + id: build + uses: ./.github/actions/ci/build + with: + git_sha: ${{ inputs.git_sha }} + k8s_version: ${{ matrix.k8s_version }} + build_id: ${{ needs.setup.outputs.build_id }} + additional_arguments: ${{ inputs.build_arguments }} + - if: ${{ inputs.goal == 'launch' || inputs.goal == 'test' }} + name: "${{ needs.setup.outputs.ci_step_name_prefix }} Launch" + id: launch + uses: ./.github/actions/ci/launch + with: + ami_id: ${{ steps.build.outputs.ami_id }} + k8s_version: ${{ matrix.k8s_version }} + build_id: ${{ needs.setup.outputs.build_id }} + aws_region: ${{ secrets.AWS_REGION }} + - if: ${{ inputs.goal == 'test' }} + name: "${{ needs.setup.outputs.ci_step_name_prefix }} Test" + id: sonobuoy + uses: ./.github/actions/ci/sonobuoy + with: + cluster_name: ${{ steps.launch.outputs.cluster_name }} + notify-outcome: + if: ${{ always() }} + runs-on: ubuntu-latest + needs: + - setup + - kubernetes-versions + steps: + - uses: actions/github-script@v6 + with: + script: | + const { data } = await github.rest.actions.listJobsForWorkflowRun({ + owner: context.repo.owner, + repo: context.repo.repo, + run_id: context.runId + }); + const conclusionEmojis = { + "success": "✅", + "skipped": "⏭️", + "failure": "❌", + "cancelled": "🚮" + }; + const uniqueStepNames = new Set(); + const stepConclusionsByK8sVersion = new Map(); + const ciStepNamePrefix = "${{ needs.setup.outputs.ci_step_name_prefix }}"; + for (const job of data.jobs) { + if (/\d+\.\d+/.test(job.name)) { + const k8sVersion = job.name; + for (const step of job.steps) { + if (step.name.startsWith(ciStepNamePrefix)) { + const stepName = step.name.substring(ciStepNamePrefix.length).trim(); + let stepConclusions = stepConclusionsByK8sVersion.get(k8sVersion); + if (!stepConclusions) { + stepConclusions = new Map(); + stepConclusionsByK8sVersion.set(k8sVersion, stepConclusions); + } + stepConclusions.set(stepName, step.conclusion); + uniqueStepNames.add(stepName); + } + } + } + } + const headers = [{ + data: 'Kubernetes version', + header: true + }]; + for (const stepName of uniqueStepNames.values()) { + headers.push({ + data: stepName, + header: true + }); + } + const rows = []; + for (const stepConclusionsForK8sVersion of [...stepConclusionsByK8sVersion.entries()].sort()) { + const k8sVersion = stepConclusionsForK8sVersion[0]; + const row = [k8sVersion]; + for (const step of stepConclusionsForK8sVersion[1].entries()) { + row.push(`${step[1]} ${conclusionEmojis[step[1]]}`); + } + rows.push(row); + } + const commentBody = core.summary + .addRaw("@${{ inputs.requester }} the workflow that you requested has completed. 🎉") + .addTable([ + headers, + ...rows, + ]) + .stringify(); + github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: ${{ inputs.pr_number }}, + body: commentBody + }); diff --git a/.github/workflows/janitor.yaml b/.github/workflows/janitor.yaml new file mode 100644 index 000000000..47fec1059 --- /dev/null +++ b/.github/workflows/janitor.yaml @@ -0,0 +1,38 @@ +name: "Janitor" +on: + workflow_dispatch: + schedule: + # hourly at the top of the hour + - cron: "0 * * * *" +permissions: + id-token: write + contents: read +jobs: + cluster-sweeper: + # disable in forks + if: github.repository == 'awslabs/amazon-eks-ami' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: aws-actions/configure-aws-credentials@v2 + with: + aws-region: ${{ secrets.AWS_REGION }} + role-to-assume: ${{ secrets.AWS_ROLE_ARN_JANITOR }} + - uses: ./.github/actions/janitor/cluster-sweeper + with: + # 3 hours + max_age_seconds: 10800 + ami-sweeper: + # disable in forks + if: github.repository == 'awslabs/amazon-eks-ami' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: aws-actions/configure-aws-credentials@v2 + with: + aws-region: ${{ secrets.AWS_REGION }} + role-to-assume: ${{ secrets.AWS_ROLE_ARN_JANITOR }} + - uses: ./.github/actions/janitor/ami-sweeper + with: + # 3 days + max_age_seconds: 259200 diff --git a/.github/workflows/sync-eni-max-pods.yaml b/.github/workflows/sync-eni-max-pods.yaml new file mode 100644 index 000000000..2affd7873 --- /dev/null +++ b/.github/workflows/sync-eni-max-pods.yaml @@ -0,0 +1,52 @@ +name: '[Sync] Update eni-max-pods.txt' +on: + workflow_dispatch: + schedule: + # once a day + - cron: '0 0 * * *' +permissions: + id-token: write + contents: write + pull-requests: write +jobs: + update-max-pods: + # this workflow will always fail in forks; bail if this isn't running in the upstream + if: github.repository == 'awslabs/amazon-eks-ami' + runs-on: ubuntu-latest + steps: + - uses: aws-actions/configure-aws-credentials@v2 + with: + aws-region: ${{ secrets.AWS_REGION }} + role-to-assume: ${{ secrets.AWS_ROLE_ARN_SYNC_ENI_MAX_PODS }} + - uses: actions/checkout@v3 + with: + repository: awslabs/amazon-eks-ami + ref: refs/heads/master + path: amazon-eks-ami/ + - uses: actions/checkout@v3 + with: + repository: aws/amazon-vpc-cni-k8s + ref: refs/heads/master + path: amazon-vpc-cni-k8s/ + - run: | + #!/usr/bin/env bash + set -o errexit + cd amazon-vpc-cni-k8s/ + make generate-limits + cp misc/eni-max-pods.txt ../amazon-eks-ami/files/eni-max-pods.txt + - uses: peter-evans/create-pull-request@v4 + with: + branch: update-eni-max-pods + path: amazon-eks-ami/ + add-paths: files/eni-max-pods.txt + commit-message: "Update eni-max-pods.txt" + committer: "GitHub " + author: "GitHub " + labels: | + changelog/exclude + title: "Update eni-max-pods.txt" + body: | + Generated by [aws/amazon-vpc-cni-k8s](https://github.com/aws/amazon-vpc-cni-k8s): + ``` + make generate-limits + ``` diff --git a/.github/workflows/sync-to-codecommit.yaml b/.github/workflows/sync-to-codecommit.yaml new file mode 100644 index 000000000..a1748c4ca --- /dev/null +++ b/.github/workflows/sync-to-codecommit.yaml @@ -0,0 +1,31 @@ +name: '[Sync] Push to CodeCommit' + +on: + schedule: + # twice an hour, at :00 and :30 + - cron: '0,30 * * * *' + +jobs: + mirror: + if: github.repository == 'awslabs/amazon-eks-ami' + runs-on: ubuntu-latest + # These permissions are needed to interact with GitHub's OIDC Token endpoint. + permissions: + id-token: write + contents: read + steps: + - uses: actions/checkout@v2 + with: + # fetch complete history + fetch-depth: 0 + - uses: aws-actions/configure-aws-credentials@v1 + with: + aws-region: ${{ secrets.AWS_REGION }} + role-to-assume: ${{ secrets.AWS_ROLE_ARN_SYNC_TO_CODECOMMIT }} + - run: git config credential.helper '!aws codecommit credential-helper $@' + - run: git config credential.UseHttpPath true + - run: git remote add codecommit ${{ secrets.AWS_CODECOMMIT_REPO_URL }} + - run: git checkout master + - run: git push codecommit master + - run: git checkout main + - run: git push codecommit main diff --git a/.github/workflows/update-changelog.yaml b/.github/workflows/update-changelog.yaml new file mode 100644 index 000000000..1b7c5680e --- /dev/null +++ b/.github/workflows/update-changelog.yaml @@ -0,0 +1,62 @@ +name: "[Release] Update CHANGELOG.md" +on: + release: + types: [released] +permissions: + contents: write + pull-requests: write +jobs: + setup: + # this workflow will always fail in forks; bail if this isn't running in the upstream + if: github.repository == 'awslabs/amazon-eks-ami' + runs-on: ubuntu-latest + outputs: + tag_name: ${{ steps.variables.outputs.tag_name }} + steps: + - id: variables + run: | + echo "tag_name=$(echo ${{ github.ref }} | cut -d/ -f3)" >> $GITHUB_OUTPUT + update-changelog: + runs-on: ubuntu-latest + needs: + - setup + steps: + - uses: actions/checkout@v3 + with: + repository: awslabs/amazon-eks-ami + ref: refs/heads/master + path: amazon-eks-ami/ + - uses: actions/github-script@v6 + with: + script: | + const fs = require('fs'); + const changelogPath = './amazon-eks-ami/CHANGELOG.md'; + const placeholder = ''; + const tagName = '${{ needs.setup.outputs.tag_name }}'; + const release = await github.rest.repos.getReleaseByTag({ + tag: tagName, + owner: context.repo.owner, + repo: context.repo.repo, + }); + const changelog = fs.readFileSync(changelogPath, 'utf8'); + if (changelog.includes(release.data.name)) { + throw new Error(`changelog already includes ${release.data.name}`); + } + const newEntry = `# ${release.data.name}\n${release.data.body}`; + let updatedChangelog = changelog.replace(placeholder, placeholder + '\n\n' + newEntry + '\n---\n'); + // if the release notes are modified in the GitHub web editor, trailing spaces can be added accidentally + updatedChangelog = updatedChangelog.replace(/\s+$/, ''); + fs.writeFileSync(changelogPath, updatedChangelog); + - uses: peter-evans/create-pull-request@v4 + with: + branch: update-changelog + path: amazon-eks-ami/ + add-paths: CHANGELOG.md + commit-message: "Update CHANGELOG.md for release ${{ needs.setup.outputs.tag_name }}" + committer: "GitHub " + author: "GitHub " + title: "Update CHANGELOG.md" + labels: | + changelog/exclude + body: | + Adds CHANGELOG.md entry for release [${{ needs.setup.outputs.tag_name }}](https://github.com/awslabs/amazon-eks-ami/releases/tag/${{ needs.setup.outputs.tag_name }}). diff --git a/.gitignore b/.gitignore index 3a369e0c2..12527754f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,7 @@ -manifest.json +*manifest.json *.swp +.idea +*version-info.json +.DS_Store +site/ +.git-commit diff --git a/ArchiveBuildConfig.yaml b/ArchiveBuildConfig.yaml index f3631b356..d7a4de238 100644 --- a/ArchiveBuildConfig.yaml +++ b/ArchiveBuildConfig.yaml @@ -8,11 +8,13 @@ dependencies: source: dirs: - src: files/ + - src: scripts/ + - src: log-collector-script/ files: - src: Makefile - src: eks-worker-al2.json - - src: install-worker.sh - - src: amazon-eks-nodegroup.yaml + - src: eks-worker-al2-variables.json + - src: .git-commit archive: name: amazon-eks-ami.tar.gz - type: tgz \ No newline at end of file + type: tgz diff --git a/CHANGELOG.md b/CHANGELOG.md index 359a4c48e..448d3b598 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,8566 @@ # Changelog + + + +# AMI Release v20240227 + + +> [!NOTE] +> This release includes changes in the Kubernetes 1.29 GPU AMI to address a compatibility issue with the EFA and NVIDIA kernel modules. More information is available in https://github.com/awslabs/amazon-eks-ami/issues/1494. + +## What's Changed +* Allow `containerd` config imports by @ndbaker1 in https://github.com/awslabs/amazon-eks-ami/pull/1630 +* cleanup al2023 templates by @ndbaker1 in https://github.com/awslabs/amazon-eks-ami/pull/1682 +* Do not prepare local disks by default by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1686 +* Add InstanceOptions with LocalDiskStrategy by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1688 +* Remove setup-local-disks unit from al2023 template by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1691 + + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20240213...v20240227 + +--- + +

AMI Details

+ + +
+Kubernetes 1.29 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.29-v202402271.29.0-20240227s3://amazon-eks/1.29.0/2024-01-04/
amazon-eks-node-1.29-v20240227
amazon-eks-node-al2023-arm64-standard-1.29-v20240227
amazon-eks-arm64-node-1.29-v20240227
amazon-eks-gpu-node-1.29-v20240227
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.2222.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda-12-212.2.2-1
efa2.6.0-1.amzn2
kernel5.10.209-198.858.amzn2
nvidia-driver-latest-dkms535.161.07-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.2222.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.77-99.164.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.28-v202402271.28.5-20240227s3://amazon-eks/1.28.5/2024-01-04/
amazon-eks-node-1.28-v20240227
amazon-eks-node-al2023-arm64-standard-1.28-v20240227
amazon-eks-arm64-node-1.28-v20240227
amazon-eks-gpu-node-1.28-v20240227
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.2222.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
cuda-12-212.2.2-1
docker20.10.25-1.amzn2.0.4
efa2.6.0-1.amzn2
kernel5.10.209-198.858.amzn25.10.192-183.736.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.2222.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.77-99.164.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.27-v202402271.27.9-20240227s3://amazon-eks/1.27.9/2024-01-04/
amazon-eks-node-1.27-v20240227
amazon-eks-node-al2023-arm64-standard-1.27-v20240227
amazon-eks-arm64-node-1.27-v20240227
amazon-eks-gpu-node-1.27-v20240227
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.2222.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
cuda-12-212.2.2-1
docker20.10.25-1.amzn2.0.4
efa2.6.0-1.amzn2
kernel5.10.209-198.858.amzn25.10.192-183.736.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.2222.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.77-99.164.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.26-v202402271.26.12-20240227s3://amazon-eks/1.26.12/2024-01-04/
amazon-eks-node-1.26-v20240227
amazon-eks-node-al2023-arm64-standard-1.26-v20240227
amazon-eks-arm64-node-1.26-v20240227
amazon-eks-gpu-node-1.26-v20240227
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.2222.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
cuda-12-212.2.2-1
docker20.10.25-1.amzn2.0.4
efa2.6.0-1.amzn2
kernel5.10.209-198.858.amzn25.10.192-183.736.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.2222.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.77-99.164.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.25-v202402271.25.16-20240227s3://amazon-eks/1.25.16/2024-01-04/
amazon-eks-node-1.25-v20240227
amazon-eks-node-al2023-arm64-standard-1.25-v20240227
amazon-eks-arm64-node-1.25-v20240227
amazon-eks-gpu-node-1.25-v20240227
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.2222.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
cuda-12-212.2.2-1
docker20.10.25-1.amzn2.0.4
efa2.6.0-1.amzn2
kernel5.10.209-198.858.amzn25.10.192-183.736.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.2222.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.77-99.164.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.24-v202402271.24.17-20240227s3://amazon-eks/1.24.17/2024-01-04/
amazon-eks-node-1.24-v20240227
amazon-eks-node-al2023-arm64-standard-1.24-v20240227
amazon-eks-arm64-node-1.24-v20240227
amazon-eks-gpu-node-1.24-v20240227
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.2222.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.10.209-198.858.amzn25.4.254-170.358.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.2222.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.77-99.164.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.23-v202402271.23.17-20240227s3://amazon-eks/1.23.17/2024-01-04/
amazon-eks-node-1.23-v20240227
amazon-eks-node-al2023-arm64-standard-1.23-v20240227
amazon-eks-arm64-node-1.23-v20240227
amazon-eks-gpu-node-1.23-v20240227
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.2222.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.4.268-181.370.amzn25.4.254-170.358.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.2222.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.77-99.164.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.25-1.28: `5.10.192-183.736.amzn2` + +--- + + +# AMI Release v20240213 + + +## What's Changed +* harden pull-sandbox-image script by @ndbaker1 in https://github.com/awslabs/amazon-eks-ami/pull/1649 +* Merge `al2023` to `main` by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1653 +* Switch branch for dependency review by @Issacwww in https://github.com/awslabs/amazon-eks-ami/pull/1659 + + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20240209...v20240213 + +--- + +> [!NOTE] +> The notes on the [Releases](https://github.com/awslabs/amazon-eks-ami/releases) page may be truncated, and you may not see all supported Kubernetes versions. +> The full release notes can be viewed [here](https://github.com/awslabs/amazon-eks-ami/releases/tag/v20240213). +> More information is in #1666. + +

AMI Details

+ + +
+Kubernetes 1.29 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.29-v202402131.29.0-20240213s3://amazon-eks/1.29.0/2024-01-04/
amazon-eks-node-1.29-v20240213
amazon-eks-node-al2023-arm64-standard-1.29-v20240213
amazon-eks-arm64-node-1.29-v20240213
amazon-eks-gpu-node-1.29-v20240213
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
docker20.10.25-1.amzn2.0.4
efa2.6.0-1.amzn2
kernel5.10.209-198.812.amzn25.10.192-183.736.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.1705.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.75-99.163.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.28-v202402131.28.5-20240213s3://amazon-eks/1.28.5/2024-01-04/
amazon-eks-node-1.28-v20240213
amazon-eks-node-al2023-arm64-standard-1.28-v20240213
amazon-eks-arm64-node-1.28-v20240213
amazon-eks-gpu-node-1.28-v20240213
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
docker20.10.25-1.amzn2.0.4
efa2.6.0-1.amzn2
kernel5.10.209-198.812.amzn25.10.192-183.736.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.1705.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.75-99.163.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.27-v202402131.27.9-20240213s3://amazon-eks/1.27.9/2024-01-04/
amazon-eks-node-1.27-v20240213
amazon-eks-node-al2023-arm64-standard-1.27-v20240213
amazon-eks-arm64-node-1.27-v20240213
amazon-eks-gpu-node-1.27-v20240213
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
docker20.10.25-1.amzn2.0.4
efa2.6.0-1.amzn2
kernel5.10.209-198.812.amzn25.10.192-183.736.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.1705.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.75-99.163.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.26-v202402131.26.12-20240213s3://amazon-eks/1.26.12/2024-01-04/
amazon-eks-node-1.26-v20240213
amazon-eks-node-al2023-arm64-standard-1.26-v20240213
amazon-eks-arm64-node-1.26-v20240213
amazon-eks-gpu-node-1.26-v20240213
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
docker20.10.25-1.amzn2.0.4
efa2.6.0-1.amzn2
kernel5.10.209-198.812.amzn25.10.192-183.736.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.1705.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.75-99.163.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.25-v202402131.25.16-20240213s3://amazon-eks/1.25.16/2024-01-04/
amazon-eks-node-1.25-v20240213
amazon-eks-node-al2023-arm64-standard-1.25-v20240213
amazon-eks-arm64-node-1.25-v20240213
amazon-eks-gpu-node-1.25-v20240213
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
docker20.10.25-1.amzn2.0.4
efa2.6.0-1.amzn2
kernel5.10.209-198.812.amzn25.10.192-183.736.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.1705.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.75-99.163.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.24-v202402131.24.17-20240213s3://amazon-eks/1.24.17/2024-01-04/
amazon-eks-node-1.24-v20240213
amazon-eks-node-al2023-arm64-standard-1.24-v20240213
amazon-eks-arm64-node-1.24-v20240213
amazon-eks-gpu-node-1.24-v20240213
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.10.209-198.812.amzn25.4.254-170.358.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.1705.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.75-99.163.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + + + + + + +
AMI NamesRelease versionIncluded artifacts
amazon-eks-node-al2023-x86_64-standard-1.23-v202402131.23.17-20240213s3://amazon-eks/1.23.17/2024-01-04/
amazon-eks-node-1.23-v20240213
amazon-eks-node-al2023-arm64-standard-1.23-v20240213
amazon-eks-arm64-node-1.23-v20240213
amazon-eks-gpu-node-1.23-v20240213
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageAL2_x86_64AL2_ARM_64AL2_x86_64_GPU
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.4.268-181.368.amzn25.4.254-170.358.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
+ + + + + + + + + + + + + + + + + + + + + + +
PackageAL2023_x86_64_STANDARDAL2023_ARM_64_STANDARD
amazon-ssm-agent3.2.1705.0-1.amzn2023
containerd1.7.11-1.amzn2023.0.1
kernel6.1.75-99.163.amzn2023
runc1.1.11-1.amzn2023.0.1
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.25 and above: `5.10.192-183.736.amzn2` + +--- + + +# AMI Release v20240209 + + +## What's Changed +* Specify region for local zones in sandbox image ecr auth by @ndbaker1 in https://github.com/awslabs/amazon-eks-ami/pull/1626 +* Fix CHANGELOG space errors by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1647 + + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20240202...v20240209 + +--- + +

AMI Details

+ + +
+Kubernetes 1.29 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.29-v202402091.29.0-20240209s3://amazon-eks/1.29.0/2024-01-04/
amazon-eks-gpu-node-1.29-v20240209
amazon-eks-arm64-node-1.29-v20240209
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.209-198.812.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.28-v202402091.28.5-20240209s3://amazon-eks/1.28.5/2024-01-04/
amazon-eks-gpu-node-1.28-v20240209
amazon-eks-arm64-node-1.28-v20240209
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.209-198.812.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.27-v202402091.27.9-20240209s3://amazon-eks/1.27.9/2024-01-04/
amazon-eks-gpu-node-1.27-v20240209
amazon-eks-arm64-node-1.27-v20240209
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.209-198.812.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.26-v202402091.26.12-20240209s3://amazon-eks/1.26.12/2024-01-04/
amazon-eks-gpu-node-1.26-v20240209
amazon-eks-arm64-node-1.26-v20240209
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.209-198.812.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.25-v202402091.25.16-20240209s3://amazon-eks/1.25.16/2024-01-04/
amazon-eks-gpu-node-1.25-v20240209
amazon-eks-arm64-node-1.25-v20240209
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.209-198.812.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.24-v202402091.24.17-20240209s3://amazon-eks/1.24.17/2024-01-04/
amazon-eks-gpu-node-1.24-v20240209
amazon-eks-arm64-node-1.24-v20240209
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.10.209-198.812.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.23-v202402091.23.17-20240209s3://amazon-eks/1.23.17/2024-01-04/
amazon-eks-gpu-node-1.23-v20240209
amazon-eks-arm64-node-1.23-v20240209
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.4.268-181.368.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.25 and above: `5.10.192-183.736.amzn2` + +--- + + +# AMI Release v20240202 + + +> [!NOTE] +> This release addresses an issue with Kubernetes 1.29 that allowed the sandbox container image used by `containerd` to be garbage-collected by `kubelet`. More information is available in #1597. + +## What's Changed +* Use crictl to pull sandbox image by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1605 +* Remove sandbox image from build-time cache by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1615 + + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20240129...v20240202 + +--- + +

AMI Details

+ + +
+Kubernetes 1.29 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.29-v202402021.29.0-20240202s3://amazon-eks/1.29.0/2024-01-04/
amazon-eks-gpu-node-1.29-v20240202
amazon-eks-arm64-node-1.29-v20240202
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.28-v202402021.28.5-20240202s3://amazon-eks/1.28.5/2024-01-04/
amazon-eks-gpu-node-1.28-v20240202
amazon-eks-arm64-node-1.28-v20240202
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.27-v202402021.27.9-20240202s3://amazon-eks/1.27.9/2024-01-04/
amazon-eks-gpu-node-1.27-v20240202
amazon-eks-arm64-node-1.27-v20240202
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.26-v202402021.26.12-20240202s3://amazon-eks/1.26.12/2024-01-04/
amazon-eks-gpu-node-1.26-v20240202
amazon-eks-arm64-node-1.26-v20240202
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.25-v202402021.25.16-20240202s3://amazon-eks/1.25.16/2024-01-04/
amazon-eks-gpu-node-1.25-v20240202
amazon-eks-arm64-node-1.25-v20240202
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.24-v202402021.24.17-20240202s3://amazon-eks/1.24.17/2024-01-04/
amazon-eks-gpu-node-1.24-v20240202
amazon-eks-arm64-node-1.24-v20240202
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.23-v202402021.23.17-20240202s3://amazon-eks/1.23.17/2024-01-04/
amazon-eks-gpu-node-1.23-v20240202
amazon-eks-arm64-node-1.23-v20240202
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.11-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.4.266-178.365.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.25 and above: `5.10.192-183.736.amzn2` + +--- + + +# AMI Release v20240129 + + + + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20240117...v20240129 + +--- + +

AMI Details

+ + +
+Kubernetes 1.29 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.29-v202401291.29.0-20240129s3://amazon-eks/1.29.0/2024-01-04/
amazon-eks-gpu-node-1.29-v20240129
amazon-eks-arm64-node-1.29-v20240129
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.28-v202401291.28.5-20240129s3://amazon-eks/1.28.5/2024-01-04/
amazon-eks-gpu-node-1.28-v20240129
amazon-eks-arm64-node-1.28-v20240129
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.27-v202401291.27.9-20240129s3://amazon-eks/1.27.9/2024-01-04/
amazon-eks-gpu-node-1.27-v20240129
amazon-eks-arm64-node-1.27-v20240129
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.26-v202401291.26.12-20240129s3://amazon-eks/1.26.12/2024-01-04/
amazon-eks-gpu-node-1.26-v20240129
amazon-eks-arm64-node-1.26-v20240129
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.25-v202401291.25.16-20240129s3://amazon-eks/1.25.16/2024-01-04/
amazon-eks-gpu-node-1.25-v20240129
amazon-eks-arm64-node-1.25-v20240129
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.24-v202401291.24.17-20240129s3://amazon-eks/1.24.17/2024-01-04/
amazon-eks-gpu-node-1.24-v20240129
amazon-eks-arm64-node-1.24-v20240129
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.10.205-195.807.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.23-v202401291.23.17-20240129s3://amazon-eks/1.23.17/2024-01-04/
amazon-eks-gpu-node-1.23-v20240129
amazon-eks-arm64-node-1.23-v20240129
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.4.266-178.365.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.11-1.amzn2
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.25 and above: `5.10.192-183.736.amzn2` + +--- + + +# AMI Release v20240117 + + +## What's Changed +* Sync `al2023` branch to CodeCommit by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1571 + + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20240110...v20240117 + +--- + +

AMI Details

+ + +
+Kubernetes 1.29 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.29-v202401171.29.0-20240117s3://amazon-eks/1.29.0/2024-01-04/
amazon-eks-gpu-node-1.29-v20240117
amazon-eks-arm64-node-1.29-v20240117
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.804.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.28-v202401171.28.5-20240117s3://amazon-eks/1.28.5/2024-01-04/
amazon-eks-gpu-node-1.28-v20240117
amazon-eks-arm64-node-1.28-v20240117
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.804.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.27-v202401171.27.9-20240117s3://amazon-eks/1.27.9/2024-01-04/
amazon-eks-gpu-node-1.27-v20240117
amazon-eks-arm64-node-1.27-v20240117
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.804.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.26-v202401171.26.12-20240117s3://amazon-eks/1.26.12/2024-01-04/
amazon-eks-gpu-node-1.26-v20240117
amazon-eks-arm64-node-1.26-v20240117
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.804.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.25-v202401171.25.16-20240117s3://amazon-eks/1.25.16/2024-01-04/
amazon-eks-gpu-node-1.25-v20240117
amazon-eks-arm64-node-1.25-v20240117
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.804.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.24-v202401171.24.17-20240117s3://amazon-eks/1.24.17/2024-01-04/
amazon-eks-gpu-node-1.24-v20240117
amazon-eks-arm64-node-1.24-v20240117
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.10.205-195.804.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.23-v202401171.23.17-20240117s3://amazon-eks/1.23.17/2024-01-04/
amazon-eks-gpu-node-1.23-v20240117
amazon-eks-arm64-node-1.23-v20240117
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.4.265-176.364.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.25 and above: `5.10.192-183.736.amzn2` + +--- + + +# AMI Release v20240110 + + + + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20231230...v20240110 + +--- + +

AMI Details

+ + +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.28-v202401101.28.5-20240110s3://amazon-eks/1.28.5/2024-01-04/
amazon-eks-gpu-node-1.28-v20240110
amazon-eks-arm64-node-1.28-v20240110
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.804.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.27-v202401101.27.9-20240110s3://amazon-eks/1.27.9/2024-01-04/
amazon-eks-gpu-node-1.27-v20240110
amazon-eks-arm64-node-1.27-v20240110
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.804.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.26-v202401101.26.12-20240110s3://amazon-eks/1.26.12/2024-01-04/
amazon-eks-gpu-node-1.26-v20240110
amazon-eks-arm64-node-1.26-v20240110
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.804.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.25-v202401101.25.16-20240110s3://amazon-eks/1.25.16/2024-01-04/
amazon-eks-gpu-node-1.25-v20240110
amazon-eks-arm64-node-1.25-v20240110
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.205-195.804.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.24-v202401101.24.17-20240110s3://amazon-eks/1.24.17/2024-01-04/
amazon-eks-gpu-node-1.24-v20240110
amazon-eks-arm64-node-1.24-v20240110
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.10.205-195.804.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.23-v202401101.23.17-20240110s3://amazon-eks/1.23.17/2024-01-04/
amazon-eks-gpu-node-1.23-v20240110
amazon-eks-arm64-node-1.23-v20240110
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.4
kernel5.4.265-176.364.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.25 and above: `5.10.192-183.736.amzn2` + +--- + + +# AMI Release v20231230 + + +## What's Changed +* Revert "Set containerd LimitNOFILE to recommended value (#1535)" by @mmerkes in https://github.com/awslabs/amazon-eks-ami/pull/1552 + + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20231220...v20231230 + +--- + +

AMI Details

+ + +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.28-v202312301.28.3-20231230s3://amazon-eks/1.28.3/2023-11-14/
amazon-eks-gpu-node-1.28-v20231230
amazon-eks-arm64-node-1.28-v20231230
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.201-191.748.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.27-v202312301.27.7-20231230s3://amazon-eks/1.27.7/2023-11-14/
amazon-eks-gpu-node-1.27-v20231230
amazon-eks-arm64-node-1.27-v20231230
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.201-191.748.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.26-v202312301.26.10-20231230s3://amazon-eks/1.26.10/2023-11-14/
amazon-eks-gpu-node-1.26-v20231230
amazon-eks-arm64-node-1.26-v20231230
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.201-191.748.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.25-v202312301.25.15-20231230s3://amazon-eks/1.25.15/2023-11-14/
amazon-eks-gpu-node-1.25-v20231230
amazon-eks-arm64-node-1.25-v20231230
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.201-191.748.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.24-v202312301.24.17-20231230s3://amazon-eks/1.24.17/2023-11-14/
amazon-eks-gpu-node-1.24-v20231230
amazon-eks-arm64-node-1.24-v20231230
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.3
kernel5.10.201-191.748.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.23-v202312301.23.17-20231230s3://amazon-eks/1.23.17/2023-11-14/
amazon-eks-gpu-node-1.23-v20231230
amazon-eks-arm64-node-1.23-v20231230
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.3
kernel5.4.261-174.360.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.25 and above: `5.10.192-183.736.amzn2` + +--- + + +# AMI Release v20231220 + + +## What's Changed +* Set containerd LimitNOFILE to recommended value by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1535 +* Update get-ecr-uri.sh with ca-west-1 account by @mmerkes in https://github.com/awslabs/amazon-eks-ami/pull/1542 +* Fix typo opt names in `bootstrap.sh` logging by @ketozhang in https://github.com/awslabs/amazon-eks-ami/pull/1547 + +## New Contributors +* @ketozhang made their first contribution in https://github.com/awslabs/amazon-eks-ami/pull/1547 + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20231201...v20231220 + +--- + +

AMI Details

+ + +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.28-v202312201.28.3-20231220s3://amazon-eks/1.28.3/2023-11-14/
amazon-eks-gpu-node-1.28-v20231220
amazon-eks-arm64-node-1.28-v20231220
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.201-191.748.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.27-v202312201.27.7-20231220s3://amazon-eks/1.27.7/2023-11-14/
amazon-eks-gpu-node-1.27-v20231220
amazon-eks-arm64-node-1.27-v20231220
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.201-191.748.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.26-v202312201.26.10-20231220s3://amazon-eks/1.26.10/2023-11-14/
amazon-eks-gpu-node-1.26-v20231220
amazon-eks-arm64-node-1.26-v20231220
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.201-191.748.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.25-v202312201.25.15-20231220s3://amazon-eks/1.25.15/2023-11-14/
amazon-eks-gpu-node-1.25-v20231220
amazon-eks-arm64-node-1.25-v20231220
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.2-1
efa2.6.0-1.amzn2
kernel5.10.201-191.748.amzn2
nvidia-driver-latest-dkms535.129.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.24-v202312201.24.17-20231220s3://amazon-eks/1.24.17/2023-11-14/
amazon-eks-gpu-node-1.24-v20231220
amazon-eks-arm64-node-1.24-v20231220
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.3
kernel5.10.201-191.748.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.23-v202312201.23.17-20231220s3://amazon-eks/1.23.17/2023-11-14/
amazon-eks-gpu-node-1.23-v20231220
amazon-eks-arm64-node-1.23-v20231220
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.3
kernel5.4.261-174.360.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.25 and above: `5.10.192-183.736.amzn2` + +--- + + +# AMI Release v20231201 + + +## What's Changed +* Check for ecr-fips endpoint availability by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1524 +* Install SSM agent from AL core repo by default by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1531 +* Update to `containerd` 1.7 by @cartermckinnon in https://github.com/awslabs/amazon-eks-ami/pull/1516 + +## New Contributors +* @JoeNorth made their first contribution in https://github.com/awslabs/amazon-eks-ami/pull/1533 + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20231116...v20231201 + +--- + +

AMI Details

+ + +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.28-v202312011.28.3-20231201s3://amazon-eks/1.28.3/2023-11-14/
amazon-eks-gpu-node-1.28-v20231201
amazon-eks-arm64-node-1.28-v20231201
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.0-1
kernel5.10.199-190.747.amzn2
nvidia-driver-latest-dkms535.54.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.27-v202312011.27.7-20231201s3://amazon-eks/1.27.7/2023-11-14/
amazon-eks-gpu-node-1.27-v20231201
amazon-eks-arm64-node-1.27-v20231201
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.0-1
kernel5.10.199-190.747.amzn2
nvidia-driver-latest-dkms535.54.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.26-v202312011.26.10-20231201s3://amazon-eks/1.26.10/2023-11-14/
amazon-eks-gpu-node-1.26-v20231201
amazon-eks-arm64-node-1.26-v20231201
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.0-1
kernel5.10.199-190.747.amzn2
nvidia-driver-latest-dkms535.54.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.25-v202312011.25.15-20231201s3://amazon-eks/1.25.15/2023-11-14/
amazon-eks-gpu-node-1.25-v20231201
amazon-eks-arm64-node-1.25-v20231201
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda12.2.0-1
kernel5.10.199-190.747.amzn2
nvidia-driver-latest-dkms535.54.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.24-v202312011.24.17-20231201s3://amazon-eks/1.24.17/2023-11-14/
amazon-eks-gpu-node-1.24-v20231201
amazon-eks-arm64-node-1.24-v20231201
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.3
kernel5.10.199-190.747.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.23-v202312011.23.17-20231201s3://amazon-eks/1.23.17/2023-11-14/
amazon-eks-gpu-node-1.23-v20231201
amazon-eks-arm64-node-1.23-v20231201
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1.amzn2
containerd1.7.2-1.amzn2.0.1
cuda11.4.0-1
docker20.10.25-1.amzn2.0.3
kernel5.4.259-173.361.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.25 and above: `5.10.192-183.736.amzn2` + +--- + + +# AMI Release v20231116 + + +## What's Changed +* Sets docker to the latest 20.10 version by @mmerkes in https://github.com/awslabs/amazon-eks-ami/pull/1510 + +## New Contributors +* @edmondceausu made their first contribution in https://github.com/awslabs/amazon-eks-ami/pull/1504 + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20231106...v20231116 + +--- + +

AMI Details

+ + +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.28-v202311161.28.3-20231116s3://amazon-eks/1.28.3/2023-11-14/
amazon-eks-gpu-node-1.28-v20231116
amazon-eks-arm64-node-1.28-v20231116
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1798.0-1
containerd1.6.19-1.amzn2.0.5
cuda12.2.0-1
kernel5.10.198-187.748.amzn2
nvidia-driver-latest-dkms535.54.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.27-v202311161.27.7-20231116s3://amazon-eks/1.27.7/2023-11-14/
amazon-eks-gpu-node-1.27-v20231116
amazon-eks-arm64-node-1.27-v20231116
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1798.0-1
containerd1.6.19-1.amzn2.0.5
cuda12.2.0-1
kernel5.10.198-187.748.amzn2
nvidia-driver-latest-dkms535.54.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.26-v202311161.26.10-20231116s3://amazon-eks/1.26.10/2023-11-14/
amazon-eks-gpu-node-1.26-v20231116
amazon-eks-arm64-node-1.26-v20231116
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1798.0-1
containerd1.6.19-1.amzn2.0.5
cuda12.2.0-1
kernel5.10.198-187.748.amzn2
nvidia-driver-latest-dkms535.54.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.25-v202311161.25.15-20231116s3://amazon-eks/1.25.15/2023-11-14/
amazon-eks-gpu-node-1.25-v20231116
amazon-eks-arm64-node-1.25-v20231116
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1798.0-1
containerd1.6.19-1.amzn2.0.5
cuda12.2.0-1
kernel5.10.198-187.748.amzn2
nvidia-driver-latest-dkms535.54.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.24-v202311161.24.17-20231116s3://amazon-eks/1.24.17/2023-11-14/
amazon-eks-gpu-node-1.24-v20231116
amazon-eks-arm64-node-1.24-v20231116
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1798.0-1
containerd1.6.19-1.amzn2.0.5
cuda11.4.0-1
docker20.10.25-1.amzn2.0.3
kernel5.10.198-187.748.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.23-v202311161.23.17-20231116s3://amazon-eks/1.23.17/2023-11-14/
amazon-eks-gpu-node-1.23-v20231116
amazon-eks-arm64-node-1.23-v20231116
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1798.0-1
containerd1.6.19-1.amzn2.0.5
cuda11.4.0-1
docker20.10.25-1.amzn2.0.3
kernel5.4.258-171.360.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.24 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.25 and above: `5.10.192-183.736.amzn2` + +--- + + +# AMI Release v20231106 +## What's Changed +* Add new i4i sizes to eni-max-pods.txt by @github-actions in https://github.com/awslabs/amazon-eks-ami/pull/1495 +* Set nerdctl default namespace to k8s.io by @reegnz in https://github.com/awslabs/amazon-eks-ami/pull/1488 +* Skip installing amazon-ssm-agent if already present by @pjaudiomv in https://github.com/awslabs/amazon-eks-ami/pull/1501 + +## New Contributors +* @pjaudiomv made their first contribution in https://github.com/awslabs/amazon-eks-ami/pull/1501 + +**Full Changelog**: https://github.com/awslabs/amazon-eks-ami/compare/v20231027...v20231106 + +--- + +

AMI Details

+ + +
+Kubernetes 1.28 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.28-v202311061.28.3-20231106s3://amazon-eks/1.28.3/2023-11-02/
amazon-eks-gpu-node-1.28-v20231106
amazon-eks-arm64-node-1.28-v20231106
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1
containerd1.6.19-1.amzn2.0.5
cuda12.2.0-1
kernel5.10.198-187.748.amzn2
nvidia-driver-latest-dkms535.54.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.27 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.27-v202311061.27.7-20231106s3://amazon-eks/1.27.7/2023-11-02/
amazon-eks-gpu-node-1.27-v20231106
amazon-eks-arm64-node-1.27-v20231106
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1
containerd1.6.19-1.amzn2.0.5
cuda11.4.0-1
kernel5.10.198-187.748.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.26 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.26-v202311061.26.10-20231106s3://amazon-eks/1.26.10/2023-11-02/
amazon-eks-gpu-node-1.26-v20231106
amazon-eks-arm64-node-1.26-v20231106
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1
containerd1.6.19-1.amzn2.0.5
cuda11.4.0-1
kernel5.10.198-187.748.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.25 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.25-v202311061.25.15-20231106s3://amazon-eks/1.25.15/2023-11-02/
amazon-eks-gpu-node-1.25-v20231106
amazon-eks-arm64-node-1.25-v20231106
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1
containerd1.6.19-1.amzn2.0.5
cuda11.4.0-1
kernel5.10.198-187.748.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.24 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.24-v202311061.24.17-20231106s3://amazon-eks/1.24.17/2023-11-02/
amazon-eks-gpu-node-1.24-v20231106
amazon-eks-arm64-node-1.24-v20231106
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1
containerd1.6.19-1.amzn2.0.5
cuda11.4.0-1
docker20.10.23-1.amzn2.0.1
kernel5.10.198-187.748.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ +
+Kubernetes 1.23 + + + + + + + + + + + + + + + + + +
AMI namesRelease versionIncluded artifacts
amazon-eks-node-1.23-v202311061.23.17-20231106s3://amazon-eks/1.23.17/2023-11-02/
amazon-eks-gpu-node-1.23-v20231106
amazon-eks-arm64-node-1.23-v20231106
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PackageVersion
amazon-ssm-agent3.2.1705.0-1
containerd1.6.19-1.amzn2.0.5
cuda11.4.0-1
docker20.10.23-1.amzn2.0.1
kernel5.4.258-171.360.amzn2
nvidia-driver-latest-dkms470.182.03-1.el7
runc1.1.7-4.amzn2
+
+ + +> **Note** +> A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible. More information is available in #1494. +> To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: +> - Kubernetes 1.27 and below: `5.4.254-170.358.amzn2` +> - Kubernetes 1.28 and above: `5.10.192-183.736.amzn2` + +--- + +### AMI Release v20231027 +* amazon-eks-gpu-node-1.28-v20231027 +* amazon-eks-gpu-node-1.27-v20231027 +* amazon-eks-gpu-node-1.26-v20231027 +* amazon-eks-gpu-node-1.25-v20231027 +* amazon-eks-gpu-node-1.24-v20231027 +* amazon-eks-gpu-node-1.23-v20231027 +* amazon-eks-arm64-node-1.28-v20231027 +* amazon-eks-arm64-node-1.27-v20231027 +* amazon-eks-arm64-node-1.26-v20231027 +* amazon-eks-arm64-node-1.25-v20231027 +* amazon-eks-arm64-node-1.24-v20231027 +* amazon-eks-arm64-node-1.23-v20231027 +* amazon-eks-node-1.28-v20231027 +* amazon-eks-node-1.27-v20231027 +* amazon-eks-node-1.26-v20231027 +* amazon-eks-node-1.25-v20231027 +* amazon-eks-node-1.24-v20231027 +* amazon-eks-node-1.23-v20231027 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.28.2-20231027` +* `1.27.6-20231027` +* `1.26.9-20231027` +* `1.25.14-20231027` +* `1.24.17-20231027` +* `1.23.17-20231027` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.28.2/2023-10-17/ +* s3://amazon-eks/1.27.6/2023-10-17/ +* s3://amazon-eks/1.26.9/2023-10-17/ +* s3://amazon-eks/1.25.14/2023-10-17/ +* s3://amazon-eks/1.24.17/2023-10-17/ +* s3://amazon-eks/1.23.17/2023-10-17/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.257-170.359.amzn2 + * Kubernetes 1.24 and above: 5.10.197-186.748.amzn2 + * ⚠️ **Note: A recent change in the Linux kernel caused the EFA and NVIDIA drivers to be incompatible.** More information is available in https://github.com/awslabs/amazon-eks-ami/issues/1494. To prevent unexpected failures, the kernel in the GPU AMI will remain at the following versions until we have determined a solution: + * Kubernetes 1.27 and below: 5.4.254-170.358.amzn2 + * Kubernetes 1.28 and above: 5.10.192-183.736.amzn2 +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.5 +* `runc`: 1.1.7-4.amzn2 +* `cuda`: 12.2.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.2.1705.0-1 + +Notable changes: +- Add optional FIPS support ([#1458](https://github.com/awslabs/amazon-eks-ami/pull/1458)) +- Fix region in cached image names ([#1461](https://github.com/awslabs/amazon-eks-ami/pull/1461)) +- Update curl for [ALAS-2023-2287](https://alas.aws.amazon.com/AL2/ALAS-2023-2287.html) +- Update kernel for [ALASKERNEL-5.10-2023-039](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-039.html) + +Minor changes: +- Add r7i to eni-max-pods.txt ([#1473](https://github.com/awslabs/amazon-eks-ami/pull/1473)) +- Correctly tag cached images for us-gov-west-1 FIPS endpoint ([#1476](https://github.com/awslabs/amazon-eks-ami/pull/1476)) +- Add new i4i sizes to eni-max-pods.txt ([#1495](https://github.com/awslabs/amazon-eks-ami/pull/1495)) + +### AMI Release v20231002 +* amazon-eks-gpu-node-1.28-v20231002 +* amazon-eks-gpu-node-1.27-v20231002 +* amazon-eks-gpu-node-1.26-v20231002 +* amazon-eks-gpu-node-1.25-v20231002 +* amazon-eks-gpu-node-1.24-v20231002 +* amazon-eks-gpu-node-1.23-v20231002 +* amazon-eks-arm64-node-1.28-v20231002 +* amazon-eks-arm64-node-1.27-v20231002 +* amazon-eks-arm64-node-1.26-v20231002 +* amazon-eks-arm64-node-1.25-v20231002 +* amazon-eks-arm64-node-1.24-v20231002 +* amazon-eks-arm64-node-1.23-v20231002 +* amazon-eks-node-1.28-v20231002 +* amazon-eks-node-1.27-v20231002 +* amazon-eks-node-1.26-v20231002 +* amazon-eks-node-1.25-v20231002 +* amazon-eks-node-1.24-v20231002 +* amazon-eks-node-1.23-v20231002 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.28.1-20231002` +* `1.27.5-20231002` +* `1.26.8-20231002` +* `1.25.13-20231002` +* `1.24.17-20231002` +* `1.23.17-20231002` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.28.1/20230914/ +* s3://amazon-eks/1.27.5/20230914/ +* s3://amazon-eks/1.26.8/20230914/ +* s3://amazon-eks/1.25.13/20230914/ +* s3://amazon-eks/1.24.17/20230914/ +* s3://amazon-eks/1.23.17/20230914/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.254-170.358.amzn2 + * Kubernetes 1.24 and above: 5.10.192-183.736.amzn2 + * **Note** that the GPU AMI on Kubernetes 1.27 and below will continue to use kernel-5.4 as we work to address a [compatibility issue](https://github.com/awslabs/amazon-eks-ami/issues/1222) with `nvidia-driver-latest-dkms`. +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.3 +* `runc`: 1.1.7-3.amzn2 +* `cuda`: 12.2.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.2.1630.0-1 + +Notable changes: + - SSM agent upgraded to `3.2.1630.0-1` + - Update `libssh2` for [ALAS-2023-2257](https://alas.aws.amazon.com/AL2/ALAS-2023-2257.html) + +### AMI Release v20230919 +* amazon-eks-gpu-node-1.28-v20230919 +* amazon-eks-gpu-node-1.27-v20230919 +* amazon-eks-gpu-node-1.26-v20230919 +* amazon-eks-gpu-node-1.25-v20230919 +* amazon-eks-gpu-node-1.24-v20230919 +* amazon-eks-gpu-node-1.23-v20230919 +* amazon-eks-arm64-node-1.28-v20230919 +* amazon-eks-arm64-node-1.27-v20230919 +* amazon-eks-arm64-node-1.26-v20230919 +* amazon-eks-arm64-node-1.25-v20230919 +* amazon-eks-arm64-node-1.24-v20230919 +* amazon-eks-arm64-node-1.23-v20230919 +* amazon-eks-node-1.28-v20230919 +* amazon-eks-node-1.27-v20230919 +* amazon-eks-node-1.26-v20230919 +* amazon-eks-node-1.25-v20230919 +* amazon-eks-node-1.24-v20230919 +* amazon-eks-node-1.23-v20230919 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.28.1-20230919` +* `1.27.5-20230919` +* `1.26.8-20230919` +* `1.25.13-20230919` +* `1.24.17-20230919` +* `1.23.17-20230919` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.28.1/20230914/ +* s3://amazon-eks/1.27.5/20230914/ +* s3://amazon-eks/1.26.8/20230914/ +* s3://amazon-eks/1.25.13/20230914/ +* s3://amazon-eks/1.24.17/20230914/ +* s3://amazon-eks/1.23.17/20230914/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.254-170.358.amzn2 + * Kubernetes 1.24 and above: 5.10.192-183.736.amzn2 + * **Note** that the GPU AMI on Kubernetes 1.27 and below will continue to use kernel-5.4 due to a [compatibility issue](https://github.com/awslabs/amazon-eks-ami/issues/1222) with `nvidia-driver-latest-dkms`. +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.3 +* `runc`: 1.1.7-3.amzn2 +* `cuda`: 12.2.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.2.1542.0-1 + +Notable changes: + - kernel-5.10 updated to address: + - [ALAS2KERNEL-5.10-2023-039](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-039.html) + - Add support for Kubernetes 1.28 ([#1431](https://github.com/awslabs/amazon-eks-ami/pull/1431)) + - GPU AMI: + - Released with [Neuron version 2.14.0](https://awsdocs-neuron.readthedocs-hosted.com/en/latest/release-notes/index.html#neuron-2-14-0-09-15-2023) + - GPU AMIs on Kubernetes 1.28 and above: + - Upgraded `kernel` to 5.10 + - Upgraded `cuda` version to 12.2 + - Upgraded Nvidia driver to 535.54.03-1 + - [Installed EFA version 1.26.1](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa-start.html#efa-start-enable) + - Limited deeper [sleep states](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/processor_state_control.html) + +### AMI Release v20230825 +* amazon-eks-gpu-node-1.27-v20230825 +* amazon-eks-gpu-node-1.26-v20230825 +* amazon-eks-gpu-node-1.25-v20230825 +* amazon-eks-gpu-node-1.24-v20230825 +* amazon-eks-gpu-node-1.23-v20230825 +* amazon-eks-arm64-node-1.27-v20230825 +* amazon-eks-arm64-node-1.26-v20230825 +* amazon-eks-arm64-node-1.25-v20230825 +* amazon-eks-arm64-node-1.24-v20230825 +* amazon-eks-arm64-node-1.23-v20230825 +* amazon-eks-node-1.27-v20230825 +* amazon-eks-node-1.26-v20230825 +* amazon-eks-node-1.25-v20230825 +* amazon-eks-node-1.24-v20230825 +* amazon-eks-node-1.23-v20230825 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.27.4-20230825` +* `1.26.7-20230825` +* `1.25.12-20230825` +* `1.24.16-20230825` +* `1.23.17-20230825` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.27.4/2023-08-16/ +* s3://amazon-eks/1.26.7/2023-08-16/ +* s3://amazon-eks/1.25.12/2023-08-16/ +* s3://amazon-eks/1.24.16/2023-08-16/ +* s3://amazon-eks/1.23.17/2023-08-16/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.253-167.359.amzn2 + * Kubernetes 1.24 and above: 5.10.186-179.751.amzn2 + * **Note** that the GPU AMI will continue to use kernel-5.4 as we work to address a [compatibility issue](https://github.com/awslabs/amazon-eks-ami/issues/1222) with `nvidia-driver-latest-dkms`. +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.3 +* `runc`: 1.1.7-3.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.2.1478.0-1 + +Notable changes: + - containerd updated to address: + - [ALAS2DOCKER-2023-029](https://alas.aws.amazon.com/AL2/ALASDOCKER-2023-029.html) + - runc updated to address: + - [ALAS2DOCKER-2023-028](https://alas.aws.amazon.com/AL2/ALASDOCKER-2023-028.html) + - Fetch new IMDS token for every request. ([#1395](https://github.com/awslabs/amazon-eks-ami/pull/1395)) + +### AMI Release v20230816 +* amazon-eks-gpu-node-1.27-v20230816 +* amazon-eks-gpu-node-1.26-v20230816 +* amazon-eks-gpu-node-1.25-v20230816 +* amazon-eks-gpu-node-1.24-v20230816 +* amazon-eks-gpu-node-1.23-v20230816 +* amazon-eks-arm64-node-1.27-v20230816 +* amazon-eks-arm64-node-1.26-v20230816 +* amazon-eks-arm64-node-1.25-v20230816 +* amazon-eks-arm64-node-1.24-v20230816 +* amazon-eks-arm64-node-1.23-v20230816 +* amazon-eks-node-1.27-v20230816 +* amazon-eks-node-1.26-v20230816 +* amazon-eks-node-1.25-v20230816 +* amazon-eks-node-1.24-v20230816 +* amazon-eks-node-1.23-v20230816 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.27.3-20230816` +* `1.26.6-20230816` +* `1.25.11-20230816` +* `1.24.15-20230816` +* `1.23.17-20230816` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.27.3/2023-08-14/ +* s3://amazon-eks/1.26.6/2023-08-14/ +* s3://amazon-eks/1.25.11/2023-08-14/ +* s3://amazon-eks/1.24.15/2023-08-14/ +* s3://amazon-eks/1.23.17/2023-08-15/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.250-166.369.amzn2 + * Kubernetes 1.24 and above: 5.10.186-179.751.amzn2 +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.7-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.2.1377.0-1 +Notable changes: +- Install latest runc `1.1.*` ([#1384](https://github.com/awslabs/amazon-eks-ami/pull/1384)). +- Install latest amazon-ssm-agent from S3 ([#1370](https://github.com/awslabs/amazon-eks-ami/pull/1370)). +- `kernel` updated to address: + - [ALASKERNEL-5.4-2023-050](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2023-050.html) + - [ALASKERNEL-5.10-2023-038](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-038.html) + +Other changes: +- Do not set `KubeletCredentialProviders` feature flag for 1.28+ ([#1375](https://github.com/awslabs/amazon-eks-ami/pull/1375)) +- Cache IMDS tokens per-user ([#1386](https://github.com/awslabs/amazon-eks-ami/pull/1386)) + +### AMI Release v20230728 +* amazon-eks-gpu-node-1.27-v20230728 +* amazon-eks-gpu-node-1.26-v20230728 +* amazon-eks-gpu-node-1.25-v20230728 +* amazon-eks-gpu-node-1.24-v20230728 +* amazon-eks-gpu-node-1.23-v20230728 +* amazon-eks-arm64-node-1.27-v20230728 +* amazon-eks-arm64-node-1.26-v20230728 +* amazon-eks-arm64-node-1.25-v20230728 +* amazon-eks-arm64-node-1.24-v20230728 +* amazon-eks-arm64-node-1.23-v20230728 +* amazon-eks-node-1.27-v20230728 +* amazon-eks-node-1.26-v20230728 +* amazon-eks-node-1.25-v20230728 +* amazon-eks-node-1.24-v20230728 +* amazon-eks-node-1.23-v20230728 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.27.3-20230728` +* `1.26.6-20230728` +* `1.25.11-20230728` +* `1.24.15-20230728` +* `1.23.17-20230728` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.27.3/2023-06-30/ +* s3://amazon-eks/1.26.6/2023-06-30/ +* s3://amazon-eks/1.25.11/2023-06-30/ +* s3://amazon-eks/1.24.15/2023-06-30/ +* s3://amazon-eks/1.23.17/2023-06-30/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.249-163.359.amzn2 + * Kubernetes 1.24 and above: 5.10.184-175.749.amzn2 +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.5-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +- Kernel fix for `CVE-2023-3117` and `CVE-2023-35001` with new versions: [5.10 kernel](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-037.html) and [5.4 kernel](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2023-049.html) +- Mount bpffs on all supported Kubernetes versions. ([#1349](https://github.com/awslabs/amazon-eks-ami/pull/1349)) +- Enable discard_unpacked_layers by default to clean up compressed image layers in containerd's content store.([#1360](https://github.com/awslabs/amazon-eks-ami/pull/1360)) + +### AMI Release v20230711 +* amazon-eks-gpu-node-1.27-v20230711 +* amazon-eks-gpu-node-1.26-v20230711 +* amazon-eks-gpu-node-1.25-v20230711 +* amazon-eks-gpu-node-1.24-v20230711 +* amazon-eks-gpu-node-1.23-v20230711 +* amazon-eks-arm64-node-1.27-v20230711 +* amazon-eks-arm64-node-1.26-v20230711 +* amazon-eks-arm64-node-1.25-v20230711 +* amazon-eks-arm64-node-1.24-v20230711 +* amazon-eks-arm64-node-1.23-v20230711 +* amazon-eks-node-1.27-v20230711 +* amazon-eks-node-1.26-v20230711 +* amazon-eks-node-1.25-v20230711 +* amazon-eks-node-1.24-v20230711 +* amazon-eks-node-1.23-v20230711 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.27.3-20230711` +* `1.26.6-20230711` +* `1.25.11-20230711` +* `1.24.15-20230711` +* `1.23.17-20230711` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.27.3/2023-06-30/ +* s3://amazon-eks/1.26.6/2023-06-30/ +* s3://amazon-eks/1.25.11/2023-06-30/ +* s3://amazon-eks/1.24.15/2023-06-30/ +* s3://amazon-eks/1.23.17/2023-06-30/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.247-162.350.amzn2 + * Kubernetes 1.24 and above: 5.10.184-175.731.amzn2 +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.5-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +- Kubelet versions bumped up for k8s version 1.23-1.27 to address [bug](https://github.com/kubernetes/kubernetes/issues/116847#issuecomment-1552938714) +- Source VPC CNI plugin version bumped from 0.8.0 to 1.2.0 + +### AMI Release v20230703 +* amazon-eks-gpu-node-1.27-v20230703 +* amazon-eks-gpu-node-1.26-v20230703 +* amazon-eks-gpu-node-1.25-v20230703 +* amazon-eks-gpu-node-1.24-v20230703 +* amazon-eks-gpu-node-1.23-v20230703 +* amazon-eks-gpu-node-1.22-v20230703 +* amazon-eks-arm64-node-1.27-v20230703 +* amazon-eks-arm64-node-1.26-v20230703 +* amazon-eks-arm64-node-1.25-v20230703 +* amazon-eks-arm64-node-1.24-v20230703 +* amazon-eks-arm64-node-1.23-v20230703 +* amazon-eks-arm64-node-1.22-v20230703 +* amazon-eks-node-1.27-v20230703 +* amazon-eks-node-1.26-v20230703 +* amazon-eks-node-1.25-v20230703 +* amazon-eks-node-1.24-v20230703 +* amazon-eks-node-1.23-v20230703 +* amazon-eks-node-1.22-v20230703 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.27.1-20230703` +* `1.26.4-20230703` +* `1.25.9-20230703` +* `1.24.13-20230703` +* `1.23.17-20230703` +* `1.22.17-20230703` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.27.1/2023-04-19/ +* s3://amazon-eks/1.26.4/2023-05-11/ +* s3://amazon-eks/1.25.9/2023-05-11/ +* s3://amazon-eks/1.24.13/2023-05-11/ +* s3://amazon-eks/1.23.17/2023-05-11/ +* s3://amazon-eks/1.22.17/2023-05-11/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.247-162.350.amzn2 + * Kubernetes 1.24 and above: 5.10.184-175.731.amzn2 +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.5-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +- This is the last AMI release for Kubernetes 1.22 +- Update Kernel to 5.4.247-162.350.amzn2 to address [ALASKERNEL-5.4-2023-048](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2023-048.html), [CVE-2023-1206](https://alas.aws.amazon.com/cve/html/CVE-2023-1206.html) +- Update Kernel to 5.10.184-175.731.amzn2 to address [ALASKERNEL-5.10-2023-035](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-035.html), [CVE-2023-1206](https://alas.aws.amazon.com/cve/html/CVE-2023-1206.html) +- Use recommended clocksources ([#1328](https://github.com/awslabs/amazon-eks-ami/pull/1328)) +- Add configurable working directory ([#1231](https://github.com/awslabs/amazon-eks-ami/pull/1231)) +- Update eni-max-pods.txt ([#1330](https://github.com/awslabs/amazon-eks-ami/pull/1330)) +- Mount bpffs by default on 1.25+ ([#1320](https://github.com/awslabs/amazon-eks-ami/pull/1320)) + +### AMI Release v20230607 +* amazon-eks-gpu-node-1.27-v20230607 +* amazon-eks-gpu-node-1.26-v20230607 +* amazon-eks-gpu-node-1.25-v20230607 +* amazon-eks-gpu-node-1.24-v20230607 +* amazon-eks-gpu-node-1.23-v20230607 +* amazon-eks-gpu-node-1.22-v20230607 +* amazon-eks-arm64-node-1.27-v20230607 +* amazon-eks-arm64-node-1.26-v20230607 +* amazon-eks-arm64-node-1.25-v20230607 +* amazon-eks-arm64-node-1.24-v20230607 +* amazon-eks-arm64-node-1.23-v20230607 +* amazon-eks-arm64-node-1.22-v20230607 +* amazon-eks-node-1.27-v20230607 +* amazon-eks-node-1.26-v20230607 +* amazon-eks-node-1.25-v20230607 +* amazon-eks-node-1.24-v20230607 +* amazon-eks-node-1.23-v20230607 +* amazon-eks-node-1.22-v20230607 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.27.1-20230607` +* `1.26.4-20230607` +* `1.25.9-20230607` +* `1.24.13-20230607` +* `1.23.17-20230607` +* `1.22.17-20230607` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.27.1/2023-04-19/ +* s3://amazon-eks/1.26.4/2023-05-11/ +* s3://amazon-eks/1.25.9/2023-05-11/ +* s3://amazon-eks/1.24.13/2023-05-11/ +* s3://amazon-eks/1.23.17/2023-05-11/ +* s3://amazon-eks/1.22.17/2023-05-11/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.242-156.349.amzn2 + * Kubernetes 1.24 and above: 5.10.179-168.710.amzn2 +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.5-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +* `5.4` kernel update to `5.4.242-156.349.amzn2` and `5.10` kernel update to `5.10.179-168.710.amzn2` address [CVE-2023-32233](https://alas.aws.amazon.com/cve/html/CVE-2023-32233.html) +* Updating `runc` version to `1.1.5-1.amzn2` which contains fixes for [CVE-2023-28642](https://explore.alas.aws.amazon.com/CVE-2023-27561.html) and [CVE-2023-27561](https://explore.alas.aws.amazon.com/CVE-2023-28642.html). + +### AMI Release v20230526 +* amazon-eks-gpu-node-1.27-v20230526 +* amazon-eks-gpu-node-1.26-v20230526 +* amazon-eks-gpu-node-1.25-v20230526 +* amazon-eks-gpu-node-1.24-v20230526 +* amazon-eks-gpu-node-1.23-v20230526 +* amazon-eks-gpu-node-1.22-v20230526 +* amazon-eks-arm64-node-1.27-v20230526 +* amazon-eks-arm64-node-1.26-v20230526 +* amazon-eks-arm64-node-1.25-v20230526 +* amazon-eks-arm64-node-1.24-v20230526 +* amazon-eks-arm64-node-1.23-v20230526 +* amazon-eks-arm64-node-1.22-v20230526 +* amazon-eks-node-1.27-v20230526 +* amazon-eks-node-1.26-v20230526 +* amazon-eks-node-1.25-v20230526 +* amazon-eks-node-1.24-v20230526 +* amazon-eks-node-1.23-v20230526 +* amazon-eks-node-1.22-v20230526 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.27.1-20230526` +* `1.26.4-20230526` +* `1.25.9-20230526` +* `1.24.13-20230526` +* `1.23.17-20230526` +* `1.22.17-20230526` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.27.1/2023-04-19/ +* s3://amazon-eks/1.26.4/2023-05-11/ +* s3://amazon-eks/1.25.9/2023-05-11/ +* s3://amazon-eks/1.24.13/2023-05-11/ +* s3://amazon-eks/1.23.17/2023-05-11/ +* s3://amazon-eks/1.22.17/2023-05-11/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.242-155.348.amzn2 + * Kubernetes 1.24 and above: 5.10.179-166.674.amzn2 +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +* `5.4` kernel update to `5.4.242-155.348.amzn2` addresses CVE [ALAS2KERNEL-5.4-2023-045](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2023-045.html) +* `5.10` kernel update to `5.10.179-166.674.amzn2` addresses [ALAS2KERNEL-5.10-2023-032](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-032.html) +* `Glib` update to `glib2-2.56.1-9.amzn2` addresses [ALAS-2023-2049](https://alas.aws.amazon.com/AL2/ALAS-2023-2049.html) + +### AMI Release v20230513 +* amazon-eks-gpu-node-1.27-v20230513 +* amazon-eks-gpu-node-1.26-v20230513 +* amazon-eks-gpu-node-1.25-v20230513 +* amazon-eks-gpu-node-1.24-v20230513 +* amazon-eks-gpu-node-1.23-v20230513 +* amazon-eks-gpu-node-1.22-v20230513 +* amazon-eks-arm64-node-1.27-v20230513 +* amazon-eks-arm64-node-1.26-v20230513 +* amazon-eks-arm64-node-1.25-v20230513 +* amazon-eks-arm64-node-1.24-v20230513 +* amazon-eks-arm64-node-1.23-v20230513 +* amazon-eks-arm64-node-1.22-v20230513 +* amazon-eks-node-1.27-v20230513 +* amazon-eks-node-1.26-v20230513 +* amazon-eks-node-1.25-v20230513 +* amazon-eks-node-1.24-v20230513 +* amazon-eks-node-1.23-v20230513 +* amazon-eks-node-1.22-v20230513 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.27.1-20230513` +* `1.26.4-20230513` +* `1.25.9-20230513` +* `1.24.13-20230513` +* `1.23.17-20230513` +* `1.22.17-20230513` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.27.1/2023-04-19/ +* s3://amazon-eks/1.26.4/2023-05-11/ +* s3://amazon-eks/1.25.9/2023-05-11/ +* s3://amazon-eks/1.24.13/2023-05-11/ +* s3://amazon-eks/1.23.17/2023-05-11/ +* s3://amazon-eks/1.22.17/2023-05-11/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.241-150.347.amzn2 + * Kubernetes 1.24 and above: 5.10.178-162.673.amzn2 +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: + - Add support for Kubernetes 1.27 ([#1300](https://github.com/awslabs/amazon-eks-ami/pull/1300)) + +Other changes: + - Updated max pods for i4g instance types ([#1296](https://github.com/awslabs/amazon-eks-ami/commit/0de475c5f802acd470d9a2f1fdd521b7949a25ec)) + +### AMI Release v20230509 +* amazon-eks-gpu-node-1.26-v20230509 +* amazon-eks-gpu-node-1.25-v20230509 +* amazon-eks-gpu-node-1.24-v20230509 +* amazon-eks-gpu-node-1.23-v20230509 +* amazon-eks-gpu-node-1.22-v20230509 +* amazon-eks-arm64-node-1.26-v20230509 +* amazon-eks-arm64-node-1.25-v20230509 +* amazon-eks-arm64-node-1.24-v20230509 +* amazon-eks-arm64-node-1.23-v20230509 +* amazon-eks-arm64-node-1.22-v20230509 +* amazon-eks-node-1.26-v20230509 +* amazon-eks-node-1.25-v20230509 +* amazon-eks-node-1.24-v20230509 +* amazon-eks-node-1.23-v20230509 +* amazon-eks-node-1.22-v20230509 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.26.2-20230509` +* `1.25.7-20230509` +* `1.24.11-20230509` +* `1.23.17-20230509` +* `1.22.17-20230509` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.26.2/2023-03-17/ +* s3://amazon-eks/1.25.7/2023-03-17/ +* s3://amazon-eks/1.24.11/2023-03-17/ +* s3://amazon-eks/1.23.17/2023-03-17/ +* s3://amazon-eks/1.22.17/2023-03-17/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.241-150.347.amzn2 + * Kubernetes 1.24 and above: 5.10.178-162.673.amzn2 +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +- The new AMIs have updated docker version 20.10.23-1.amzn2.0.1 that addresses two docker CVEs; [CVE-2022-36109 - docker](https://alas.aws.amazon.com/cve/html/CVE-2022-36109.html) and [CVE-2022-37708 - docker](https://alas.aws.amazon.com/cve/html/CVE-2022-37708.html). +- For the GPU Variants of these AMIs, the Nvidia Fabric Manager version is upgraded from 470.161.03-1 to 470.182.03-1. +- Fix ECR pattern for aws-cn ([#1280](https://github.com/awslabs/amazon-eks-ami/pull/1280)) +- Fix imds setting for multiple enis on ipv6 ([1275](https://github.com/awslabs/amazon-eks-ami/pull/1275)) + +### AMI Release v20230501 +* amazon-eks-gpu-node-1.26-v20230501 +* amazon-eks-gpu-node-1.25-v20230501 +* amazon-eks-gpu-node-1.24-v20230501 +* amazon-eks-gpu-node-1.23-v20230501 +* amazon-eks-gpu-node-1.22-v20230501 +* amazon-eks-arm64-node-1.26-v20230501 +* amazon-eks-arm64-node-1.25-v20230501 +* amazon-eks-arm64-node-1.24-v20230501 +* amazon-eks-arm64-node-1.23-v20230501 +* amazon-eks-arm64-node-1.22-v20230501 +* amazon-eks-node-1.26-v20230501 +* amazon-eks-node-1.25-v20230501 +* amazon-eks-node-1.24-v20230501 +* amazon-eks-node-1.23-v20230501 +* amazon-eks-node-1.22-v20230501 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.26.2-20230501` +* `1.25.7-20230501` +* `1.24.11-20230501` +* `1.23.17-20230501` +* `1.22.17-20230501` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.26.2/2023-03-17/ +* s3://amazon-eks/1.25.7/2023-03-17/ +* s3://amazon-eks/1.24.11/2023-03-17/ +* s3://amazon-eks/1.23.17/2023-03-17/ +* s3://amazon-eks/1.22.17/2023-03-17/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.241-150.347.amzn2 + * Kubernetes 1.24 and above: 5.10.178-162.673.amzn2 +* `dockerd`: 20.10.17-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +- Add bootstrap option to create a local NVMe raid0 or individual volume mounts ([#1171](https://github.com/awslabs/amazon-eks-ami/pull/1171)) +- Improve bootstrap logging ([#1276](https://github.com/awslabs/amazon-eks-ami/pull/1276)) +- Use credential provider API v1 in 1.27+, v1alpha1 in 1.26- ([#1269](https://github.com/awslabs/amazon-eks-ami/pull/1269)) +- Override hostname to match EC2's PrivateDnsName ([#1264](https://github.com/awslabs/amazon-eks-ami/pull/1264)) +- Add ethtool ([#1261](https://github.com/awslabs/amazon-eks-ami/pull/1261)) +- Update `kernel-5.10` for [ALASKERNEL-5.10-2023-031](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-031.html) +- Kernel version upgrade to `5.10.178-162.673.amzn2` fixes the [Containers failing to create and probe exec errors related to seccomp on recent kernel-5.10 versions](https://github.com/awslabs/amazon-eks-ami/issues/1219) issue + + +### AMI Release v20230411 +* amazon-eks-gpu-node-1.26-v20230411 +* amazon-eks-gpu-node-1.25-v20230411 +* amazon-eks-gpu-node-1.24-v20230411 +* amazon-eks-gpu-node-1.23-v20230411 +* amazon-eks-gpu-node-1.22-v20230411 +* amazon-eks-arm64-node-1.26-v20230411 +* amazon-eks-arm64-node-1.25-v20230411 +* amazon-eks-arm64-node-1.24-v20230411 +* amazon-eks-arm64-node-1.23-v20230411 +* amazon-eks-arm64-node-1.22-v20230411 +* amazon-eks-node-1.26-v20230411 +* amazon-eks-node-1.25-v20230411 +* amazon-eks-node-1.24-v20230411 +* amazon-eks-node-1.23-v20230411 +* amazon-eks-node-1.22-v20230411 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.26.2-20230411` +* `1.25.7-20230411` +* `1.24.11-20230411` +* `1.23.17-20230411` +* `1.22.17-20230411` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.26.2/2023-03-17/ +* s3://amazon-eks/1.25.7/2023-03-17/ +* s3://amazon-eks/1.24.11/2023-03-17/ +* s3://amazon-eks/1.23.17/2023-03-17/ +* s3://amazon-eks/1.22.17/2023-03-17/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.238-148.347.amzn2 + * Kubernetes 1.24 and above: 5.10.176-157.645.amzn2 +* `dockerd`: 20.10.17-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.4 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0 + +Notable changes: +- The AMI changes include update for 5.4 kernel version from `5.4.238-148.346.amzn2` to `kernel-5.4.238-148.347.amzn2`. `kernel-5.4.238-148.346` had a fatal issue affecting SMB mounts in which a null pointer dereference caused a panic. As a result, this package was removed from the Amazon Linux 2 repositories. + +### AMI Release v20230406 +* amazon-eks-gpu-node-1.26-v20230406 +* amazon-eks-gpu-node-1.25-v20230406 +* amazon-eks-gpu-node-1.24-v20230406 +* amazon-eks-gpu-node-1.23-v20230406 +* amazon-eks-gpu-node-1.22-v20230406 +* amazon-eks-arm64-node-1.26-v20230406 +* amazon-eks-arm64-node-1.25-v20230406 +* amazon-eks-arm64-node-1.24-v20230406 +* amazon-eks-arm64-node-1.23-v20230406 +* amazon-eks-arm64-node-1.22-v20230406 +* amazon-eks-node-1.26-v20230406 +* amazon-eks-node-1.25-v20230406 +* amazon-eks-node-1.24-v20230406 +* amazon-eks-node-1.23-v20230406 +* amazon-eks-node-1.22-v20230406 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.26.2-20230406` +* `1.25.7-20230406` +* `1.24.11-20230406` +* `1.23.17-20230406` +* `1.22.17-20230406` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.26.2/2023-03-17/ +* s3://amazon-eks/1.25.7/2023-03-17/ +* s3://amazon-eks/1.24.11/2023-03-17/ +* s3://amazon-eks/1.23.17/2023-03-17/ +* s3://amazon-eks/1.22.17/2023-03-17/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.238-148.346.amzn2 + * Kubernetes 1.24 and above: 5.10.173-154.642.amzn2 +* `dockerd`: 20.10.17-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.4 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0 + +Notable changes: +- Add support for Kubernetes 1.26 ([#1246](https://github.com/awslabs/amazon-eks-ami/pull/1246)) +- Add support `inf2`, `trn1n` instance types ([#1251](https://github.com/awslabs/amazon-eks-ami/pull/1251)) +- Updated `containerd` to address: + - [ALASDOCKER-2023-023](https://alas.aws.amazon.com/AL2/ALASDOCKER-2023-023.html) +- Fixed `ecr-credential-provider` flags not being passed correctly to `kubelet` ([#1240](https://github.com/awslabs/amazon-eks-ami/pull/1240)) + - Added `--image-credential-provider-config` and `--image-credential-provider-bin-dir` flags to the `systemd` units. + - Set `KubeletCredentialProviders` feature flag to `true` in the `kubelet` JSON config. + +Other changes: +- Use `gp3 volume_type` for 1.27+ ([#1197](https://github.com/awslabs/amazon-eks-ami/pull/1197)) +- Use default kubelet API QPS for 1.27+ ([#1241](https://github.com/awslabs/amazon-eks-ami/pull/1241)) +- Remove `--container-runtime` kubelet flag for 1.27+ ([#1250](https://github.com/awslabs/amazon-eks-ami/pull/1250)) + +### AMI Release v20230322 +* amazon-eks-gpu-node-1.25-v20230322 +* amazon-eks-gpu-node-1.24-v20230322 +* amazon-eks-gpu-node-1.23-v20230322 +* amazon-eks-gpu-node-1.22-v20230322 +* amazon-eks-arm64-node-1.25-v20230322 +* amazon-eks-arm64-node-1.24-v20230322 +* amazon-eks-arm64-node-1.23-v20230322 +* amazon-eks-arm64-node-1.22-v20230322 +* amazon-eks-node-1.25-v20230322 +* amazon-eks-node-1.24-v20230322 +* amazon-eks-node-1.23-v20230322 +* amazon-eks-node-1.22-v20230322 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.25.7-20230322` +* `1.24.11-20230322` +* `1.23.17-20230322` +* `1.22.17-20230322` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.25.7/2023-03-17/ +* s3://amazon-eks/1.24.11/2023-03-17/ +* s3://amazon-eks/1.23.17/2023-03-17/ +* s3://amazon-eks/1.22.17/2023-03-17/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.235-144.344.amzn2 + * Kubernetes 1.24 and above: 5.10.173-154.642.amzn2 + * The GPU AMI will continue to use `kernel-5.4` for all Kubernetes versions as we work to address a compatibility issue with `nvidia-driver-latest-dkms` ([#1222](https://github.com/awslabs/amazon-eks-ami/issues/1222)). +* `dockerd`: 20.10.17-1.amzn2.0.1 + * **Note** that with Kubernetes 1.25+, Docker is only installed on GPU AMI's. This is subject to change as we remove unnecessary dependencies, and we recommend completing the migration to `containerd` immediately. +* `containerd`: 1.6.6-1.amzn2.0.2 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +- Validate package versionlocks ([#1195](https://github.com/awslabs/amazon-eks-ami/pull/1195)) +- Updated `kernel-5.4` to address: + - [ALASKERNEL-5.4-2023-043](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2023-043.html) +- Updated `kernel-5.10` to address: + - [ALASKERNEL-5.10-2023-027](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-027.html) + - [ALASKERNEL-5.10-2023-028](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-028.html) + +### AMI Release v20230304 +* amazon-eks-gpu-node-1.25-v20230304 +* amazon-eks-gpu-node-1.24-v20230304 +* amazon-eks-gpu-node-1.23-v20230304 +* amazon-eks-gpu-node-1.22-v20230304 +* amazon-eks-gpu-node-1.21-v20230304 +* amazon-eks-arm64-node-1.25-v20230304 +* amazon-eks-arm64-node-1.24-v20230304 +* amazon-eks-arm64-node-1.23-v20230304 +* amazon-eks-arm64-node-1.22-v20230304 +* amazon-eks-arm64-node-1.21-v20230304 +* amazon-eks-node-1.25-v20230304 +* amazon-eks-node-1.24-v20230304 +* amazon-eks-node-1.23-v20230304 +* amazon-eks-node-1.22-v20230304 +* amazon-eks-node-1.21-v20230304 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.25.6-20230304` +* `1.24.10-20230304` +* `1.23.16-20230304` +* `1.22.17-20230304` +* `1.21.14-20230304` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.25.6/20230130/ +* s3://amazon-eks/1.24.10/20230130/ +* s3://amazon-eks/1.23.16/20230130/ +* s3://amazon-eks/1.22.17/20230130/ +* s3://amazon-eks/1.21.14/20230130/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.231-137.341.amzn2 + * Kubernetes 1.24 and above: 5.10.167-147.601.amzn2 +* `dockerd`: 20.10.17-1.amzn2.0.1 + * **Note** that with Kubernetes 1.25+, Docker is only installed on GPU AMI's. This is subject to change as we remove unnecessary dependencies, and we recommend completing the migration to `containerd` immediately. +* `containerd`: 1.6.6-1.amzn2.0.2 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +- This is the last AMI release for Kubernetes 1.21 +- This is the first AMI release available in `ap-southeast-4` + +Minor changes: +- Adds a user guide section about packages in the versionlock file. [(#1199)](https://github.com/awslabs/amazon-eks-ami/pull/1199) + +### AMI Release v20230217 +* amazon-eks-gpu-node-1.25-v20230217 +* amazon-eks-gpu-node-1.24-v20230217 +* amazon-eks-gpu-node-1.23-v20230217 +* amazon-eks-gpu-node-1.22-v20230217 +* amazon-eks-gpu-node-1.21-v20230217 +* amazon-eks-arm64-node-1.25-v20230217 +* amazon-eks-arm64-node-1.24-v20230217 +* amazon-eks-arm64-node-1.23-v20230217 +* amazon-eks-arm64-node-1.22-v20230217 +* amazon-eks-arm64-node-1.21-v20230217 +* amazon-eks-node-1.25-v20230217 +* amazon-eks-node-1.24-v20230217 +* amazon-eks-node-1.23-v20230217 +* amazon-eks-node-1.22-v20230217 +* amazon-eks-node-1.21-v20230217 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.25.6-20230217` +* `1.24.10-20230217` +* `1.23.16-20230217` +* `1.22.17-20230217` +* `1.21.14-20230217` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.25.6/20230130/ +* s3://amazon-eks/1.24.10/20230130/ +* s3://amazon-eks/1.23.16/20230130/ +* s3://amazon-eks/1.22.17/20230211/ +* s3://amazon-eks/1.21.14/20230130/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.228-132.418.amzn2 + * Kubernetes 1.24 and above: 5.10.165-143.735.amzn2 +* `dockerd`: 20.10.17-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.6-1.amzn2.0.2 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +- Kubernetes 1.24+ now use `kernel-5.10` for x86 and ARM AMIs. + - The GPU AMI will continue to use `kernel-5.4` as we work to address a compatibility issue with `nvidia-driver-latest-dkms`. +- The `kernel` package is now properly version-locked [#1191](https://github.com/awslabs/amazon-eks-ami/pull/1191). + - See [#1193](https://github.com/awslabs/amazon-eks-ami/issues/1193) for more information. +- New AMIs released for kubernetes version 1.25 +- Pressure stall information (PSI) is now enabled [#1161](https://github.com/awslabs/amazon-eks-ami/pull/1161). + +Minor changes: +- Updated `eni-max-pods.txt` with new instance types. +- Allow `kernel_version` to be set to any value (such as `5.15`) when building a custom AMI. + +### [Recalled] AMI Release v20230211 +* amazon-eks-gpu-node-1.25-v20230211 +* amazon-eks-gpu-node-1.24-v20230211 +* amazon-eks-gpu-node-1.23-v20230211 +* amazon-eks-gpu-node-1.22-v20230211 +* amazon-eks-gpu-node-1.21-v20230211 +* amazon-eks-arm64-node-1.25-v20230211 +* amazon-eks-arm64-node-1.24-v20230211 +* amazon-eks-arm64-node-1.23-v20230211 +* amazon-eks-arm64-node-1.22-v20230211 +* amazon-eks-arm64-node-1.21-v20230211 +* amazon-eks-node-1.25-v20230211 +* amazon-eks-node-1.24-v20230211 +* amazon-eks-node-1.23-v20230211 +* amazon-eks-node-1.22-v20230211 +* amazon-eks-node-1.21-v20230211 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.25.6-20230211` +* `1.24.10-20230211` +* `1.23.16-20230211` +* `1.22.17-20230211` +* `1.21.14-20230211` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.25.6/2023-01-30/ +* s3://amazon-eks/1.24.10/2023-01-30/ +* s3://amazon-eks/1.23.16/2023-01-30/ +* s3://amazon-eks/1.22.17/2023-01-30/ +* s3://amazon-eks/1.21.14/2023-01-30/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.228-132.418.amzn2 + * Kubernetes 1.24 and above: 5.10.165-143.735.amzn2 +* `dockerd`: 20.10.17-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.6-1.amzn2.0.2 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +- This is the first AMI release for Kubernetes 1.25. +- Kubernetes 1.24+ now use `kernel-5.10` for x86 and ARM AMIs. + - The GPU AMI will continue to use `kernel-5.4` as we work to address a compatibility issue with `nvidia-driver-latest-dkms`. +- The `kernel` package is now version-locked. + +Minor changes: +- Updated `eni-max-pods.txt` with new instance types. +- Allow `kernel_version` to be set to any value (such as `5.15`) when building a custom AMI. +- Fix a misconfiguration in the GPU AMI with `containerd`'s registry certificates. [#1168](https://github.com/awslabs/amazon-eks-ami/issues/1168). + +### AMI Release v20230203 +* amazon-eks-gpu-node-1.24-v20230203 +* amazon-eks-gpu-node-1.23-v20230203 +* amazon-eks-gpu-node-1.22-v20230203 +* amazon-eks-gpu-node-1.21-v20230203 +* amazon-eks-arm64-node-1.24-v20230203 +* amazon-eks-arm64-node-1.23-v20230203 +* amazon-eks-arm64-node-1.22-v20230203 +* amazon-eks-arm64-node-1.21-v20230203 +* amazon-eks-node-1.24-v20230203 +* amazon-eks-node-1.23-v20230203 +* amazon-eks-node-1.22-v20230203 +* amazon-eks-node-1.21-v20230203 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.24.9-20230203` +* `1.23.15-20230203` +* `1.22.17-20230203` +* `1.21.14-20230203` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.24.9/2023-01-11/ +* s3://amazon-eks/1.23.15/2023-01-11/ +* s3://amazon-eks/1.22.17/2023-01-11/ +* s3://amazon-eks/1.21.14/2023-01-11/ + +AMI details: +* kernel: 5.4.228-131.415.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: 1.1.4-1.amzn2 +* cuda: 11.4.0-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +* Reverted [Use external cloud provider for EKS Local deployments](https://github.com/awslabs/amazon-eks-ami/commit/4b9b546dc325e6372e705f1e192f68395ce017db) + +### AMI Release v20230127 +* amazon-eks-gpu-node-1.24-v20230127 +* amazon-eks-gpu-node-1.23-v20230127 +* amazon-eks-gpu-node-1.22-v20230127 +* amazon-eks-gpu-node-1.21-v20230127 +* amazon-eks-arm64-node-1.24-v20230127 +* amazon-eks-arm64-node-1.23-v20230127 +* amazon-eks-arm64-node-1.22-v20230127 +* amazon-eks-arm64-node-1.21-v20230127 +* amazon-eks-node-1.24-v20230127 +* amazon-eks-node-1.23-v20230127 +* amazon-eks-node-1.22-v20230127 +* amazon-eks-node-1.21-v20230127 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.24.9-20230127` +* `1.23.15-20230127` +* `1.22.17-20230127` +* `1.21.14-20230127` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.24.9/2023-01-11/ +* s3://amazon-eks/1.23.15/2023-01-11/ +* s3://amazon-eks/1.22.17/2023-01-11/ +* s3://amazon-eks/1.21.14/2023-01-11/ + +AMI details: +* kernel: 5.4.228-131.415.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: 1.1.4-1.amzn2 +* cuda: 11.4.0-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +- Updated kernel version to `5.4.228-131.415.amzn2` for: + - [ALAS2KERNEL-5.4-2023-041](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2023-041.html). +- Add support for `C6in`, `M6in`, `M6idn`, `R6in`, `R6idn` and `Hpc6id` instances [#1153](https://github.com/awslabs/amazon-eks-ami/pull/1153) +- This is the first AMI release available in `ap-south-2`, `eu-central-2`, and `eu-south-2`. +- Cache image content without unpacking/snapshotting [#1144](https://github.com/awslabs/amazon-eks-ami/pull/1144) + - Container image caching has been re-enabled for 1.24 AMI's. + +Minor changes: +- Update AWS CLI to `2.9.18` +- Configure containerd registry certificates by default in the GPU AMI. + +### AMI Release v20230105 +* amazon-eks-gpu-node-1.24-v20230105 +* amazon-eks-gpu-node-1.23-v20230105 +* amazon-eks-gpu-node-1.22-v20230105 +* amazon-eks-gpu-node-1.21-v20230105 +* amazon-eks-gpu-node-1.20-v20230105 +* amazon-eks-arm64-node-1.24-v20230105 +* amazon-eks-arm64-node-1.23-v20230105 +* amazon-eks-arm64-node-1.22-v20230105 +* amazon-eks-arm64-node-1.21-v20230105 +* amazon-eks-arm64-node-1.20-v20230105 +* amazon-eks-node-1.24-v20230105 +* amazon-eks-node-1.23-v20230105 +* amazon-eks-node-1.22-v20230105 +* amazon-eks-node-1.21-v20230105 +* amazon-eks-node-1.20-v20230105 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.24.7-20230105` +* `1.23.13-20230105` +* `1.22.15-20230105` +* `1.21.14-20230105` +* `1.20.15-20230105` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.24.7/2022-10-31/ +* s3://amazon-eks/1.23.13/2022-10-31/ +* s3://amazon-eks/1.22.15/2022-10-31/ +* s3://amazon-eks/1.21.14/2022-10-31/ +* s3://amazon-eks/1.20.15/2022-10-31/ + +AMI details: +* kernel: 5.4.226-129.415.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: 1.1.4-1.amzn2 +* cuda: 11.4.0-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +- This will be the last release for 1.20 AMI's. +- Decrease `launch_block_device_mappings_volume_size` to 4 ([#1143](https://github.com/awslabs/amazon-eks-ami/pull/1143)). + - This fixes an issue with 4GiB launch block devices. More information is available in [#1142](https://github.com/awslabs/amazon-eks-ami/issues/1142). +- Container image caching has been disabled while we work to optimize the disk usage of this feature. This feature was only enabled for 1.24 AMI's in the previous release, [v20221222](https://github.com/awslabs/amazon-eks-ami/releases/tag/v20221222). + +Minor changes: +- Update AWS CLI to `2.9.12` + +### AMI Release v20221222 +* amazon-eks-gpu-node-1.24-v20221222 +* amazon-eks-gpu-node-1.23-v20221222 +* amazon-eks-gpu-node-1.22-v20221222 +* amazon-eks-gpu-node-1.21-v20221222 +* amazon-eks-gpu-node-1.20-v20221222 +* amazon-eks-arm64-node-1.24-v20221222 +* amazon-eks-arm64-node-1.23-v20221222 +* amazon-eks-arm64-node-1.22-v20221222 +* amazon-eks-arm64-node-1.21-v20221222 +* amazon-eks-arm64-node-1.20-v20221222 +* amazon-eks-node-1.24-v20221222 +* amazon-eks-node-1.23-v20221222 +* amazon-eks-node-1.22-v20221222 +* amazon-eks-node-1.21-v20221222 +* amazon-eks-node-1.20-v20221222 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.24.7-20221222` +* `1.23.13-20221222` +* `1.22.15-20221222` +* `1.21.14-20221222` +* `1.20.15-20221222` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.24.7/2022-10-31/ +* s3://amazon-eks/1.23.13/2022-10-31/ +* s3://amazon-eks/1.22.15/2022-10-31/ +* s3://amazon-eks/1.21.14/2022-10-31/ +* s3://amazon-eks/1.20.15/2022-10-31/ + +AMI details: +* kernel: 5.4.226-129.415.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: 1.1.4-1.amzn2 +* cuda: 11.4.0-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +- Kernel updated to `5.4.226-129.415.amzn2` for: + - [ALASKERNEL-5.4-2022-040](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2022-040.html) + - [ALASKERNEL-5.4-2022-039](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2022-039.html) +- NVIDIA driver updated to `470.161.03-1` to address security issues. More information is available in [NVIDIA security bulletin #5415](https://nvidia.custhelp.com/app/answers/detail/a_id/5415). +- Cache pause, vpc-cni, and kube-proxy images during build ([#938](https://github.com/awslabs/amazon-eks-ami/pull/938)) + - *Note* that this has only been enabled for 1.24 AMIs at this time. +- Disable yum updates in cloud-init ([#1074](https://github.com/awslabs/amazon-eks-ami/pull/1074)) +- Skip sandbox image pull if already present ([#1090](https://github.com/awslabs/amazon-eks-ami/pull/1090)) +- Move variable defaults to `--var-file` ([#1079](https://github.com/awslabs/amazon-eks-ami/pull/1079)) + +Minor changes: +- Add ECR accounts for `eu-south-2`, `eu-central-2`, `ap-south-2` ([#1125](https://github.com/awslabs/amazon-eks-ami/pull/1125)) +- Handle indentation when parsing `sandbox_image` from `containerd` config ([#1119](https://github.com/awslabs/amazon-eks-ami/pull/1119)) +- Lookup instanceId using IMDSv2 in Windows log collector script ([#1116](https://github.com/awslabs/amazon-eks-ami/pull/1116)) +- Remove `aws_region` and `binary_bucket_region` overrides from Makefile ([#1115](https://github.com/awslabs/amazon-eks-ami/pull/1115)) +- Sym-link awscli to /bin ([#1102](https://github.com/awslabs/amazon-eks-ami/pull/1102)) +- Configure containerd registry certificates by default ([#1049](https://github.com/awslabs/amazon-eks-ami/pull/1049)) + +### AMI Release v20221112 +* amazon-eks-gpu-node-1.24-v20221112 +* amazon-eks-gpu-node-1.23-v20221112 +* amazon-eks-gpu-node-1.22-v20221112 +* amazon-eks-gpu-node-1.21-v20221112 +* amazon-eks-gpu-node-1.20-v20221112 +* amazon-eks-arm64-node-1.24-v20221112 +* amazon-eks-arm64-node-1.23-v20221112 +* amazon-eks-arm64-node-1.22-v20221112 +* amazon-eks-arm64-node-1.21-v20221112 +* amazon-eks-arm64-node-1.20-v20221112 +* amazon-eks-node-1.24-v20221112 +* amazon-eks-node-1.23-v20221112 +* amazon-eks-node-1.22-v20221112 +* amazon-eks-node-1.21-v20221112 +* amazon-eks-node-1.20-v20221112 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.24.7-20221112` +* `1.23.13-20221112` +* `1.22.15-20221112` +* `1.21.14-20221112` +* `1.20.15-20221112` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.24.7/2022-10-31/ +* s3://amazon-eks/1.23.13/2022-10-31/ +* s3://amazon-eks/1.22.15/2022-10-31/ +* s3://amazon-eks/1.21.14/2022-10-31/ +* s3://amazon-eks/1.20.15/2022-10-31/ + +AMI details: +* kernel: 5.4.219-126.411.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: runc-1.1.4-1.amzn2 +* cuda: 470.141.03-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +* Upgrades `runc` to version `1.1.4` +* Updates [aws-iam-authenticator](https://github.com/kubernetes-sigs/aws-iam-authenticator) to version `0.5.10` and updates `kubelet` versions to `1.22.15`, `1.23.13` and `1.24.7` +* [Updates `client.authentication.k8s.io` to `v1beta1`](https://github.com/awslabs/amazon-eks-ami/commit/ce1c11f9db5bf5a730e978e74e13174d4b9f73a3) +* [Updates credential provider API to beta for Kubernetes versions `1.24+`](https://github.com/awslabs/amazon-eks-ami/commit/a521047d1b097b9c3dbb562ca9bdab5a641f347f) +* [Installs awscli v2 bundle when possible](https://github.com/awslabs/amazon-eks-ami/commit/794ed5f10842b436e10c9bc89ee41491a6494ade) + +### AMI Release v20221104 +* amazon-eks-gpu-node-1.24-v20221104 +* amazon-eks-gpu-node-1.23-v20221104 +* amazon-eks-gpu-node-1.22-v20221104 +* amazon-eks-gpu-node-1.21-v20221104 +* amazon-eks-gpu-node-1.20-v20221104 +* amazon-eks-arm64-node-1.24-v20221104 +* amazon-eks-arm64-node-1.23-v20221104 +* amazon-eks-arm64-node-1.22-v20221104 +* amazon-eks-arm64-node-1.21-v20221104 +* amazon-eks-arm64-node-1.20-v20221104 +* amazon-eks-node-1.24-v20221104 +* amazon-eks-node-1.23-v20221104 +* amazon-eks-node-1.22-v20221104 +* amazon-eks-node-1.21-v20221104 +* amazon-eks-node-1.20-v20221104 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.24.6-20221104` +* `1.23.9-20221104` +* `1.22.12-20221104` +* `1.21.14-20221104` +* `1.20.15-20221104` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.24.6/2022-10-05/ +* s3://amazon-eks/1.23.9/2022-07-27/ +* s3://amazon-eks/1.22.12/2022-07-27/ +* s3://amazon-eks/1.21.14/2022-07-27/ +* s3://amazon-eks/1.20.15/2022-07-27/ + +AMI details: +* kernel: 5.4.219-126.411.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: 1.1.3-1.amzn2.0.2 +* cuda: 470.141.03-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +* Adds support for 1.24 with version 1.24.6 +* Upgrades kernel at `5.4.219-126.411.amzn2` to address [known issues with the previous kernel version](https://github.com/awslabs/amazon-eks-ami/issues/1071) + +### AMI Release v20221101 +* amazon-eks-gpu-node-1.23-v20221101 +* amazon-eks-gpu-node-1.22-v20221101 +* amazon-eks-gpu-node-1.21-v20221101 +* amazon-eks-gpu-node-1.20-v20221101 +* amazon-eks-arm64-node-1.23-v20221101 +* amazon-eks-arm64-node-1.22-v20221101 +* amazon-eks-arm64-node-1.21-v20221101 +* amazon-eks-arm64-node-1.20-v20221101 +* amazon-eks-node-1.23-v20221101 +* amazon-eks-node-1.22-v20221101 +* amazon-eks-node-1.21-v20221101 +* amazon-eks-node-1.20-v20221101 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.23.9-20221101` +* `1.22.12-20221101` +* `1.21.14-20221101` +* `1.20.15-20221101` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.23.9/2022-07-27/ +* s3://amazon-eks/1.22.12/2022-07-27/ +* s3://amazon-eks/1.21.14/2022-07-27/ +* s3://amazon-eks/1.20.15/2022-07-27/ + +AMI details: +* kernel: 5.4.209-116.367.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: 1.1.3-1.amzn2.0.2 +* cuda: 470.141.03-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +* Pin Kernel 5.4 to 5.4.209-116.367 to prevent nodes from going into Unready [#1072](https://github.com/awslabs/amazon-eks-ami/pull/1072) +* Increase the kube-api-server QPS from 5/10 to 10/20 [#1030](https://github.com/awslabs/amazon-eks-ami/pull/1030) +* Update docker and containerd for [ALASDOCKER-2022-021](https://alas.aws.amazon.com/AL2/ALASDOCKER-2022-021.html) [#1056](https://github.com/awslabs/amazon-eks-ami/pull/1056) +* runc version is updated to 1.1.3-1.amzn2.0.2 to include ALAS2DOCKER-2022-020 [#1055](https://github.com/awslabs/amazon-eks-ami/pull/1055) +* Release AMI in me-central-1 with version 1.21, 1.22, 1.23. 1.20 is not supported in this region since it will be deprecated soon. +* Fixes an issue with Docker daemon configuration on the GPU AMI (#351). + * **Note** that if you have a workaround in place for this issue, you'll likely need to revert it. + +### [Recalled] AMI Release v20221027 +* amazon-eks-gpu-node-1.23-v20221027 +* amazon-eks-gpu-node-1.22-v20221027 +* amazon-eks-gpu-node-1.21-v20221027 +* amazon-eks-gpu-node-1.20-v20221027 +* amazon-eks-arm64-node-1.23-v20221027 +* amazon-eks-arm64-node-1.22-v20221027 +* amazon-eks-arm64-node-1.21-v20221027 +* amazon-eks-arm64-node-1.20-v20221027 +* amazon-eks-node-1.23-v20221027 +* amazon-eks-node-1.22-v20221027 +* amazon-eks-node-1.21-v20221027 +* amazon-eks-node-1.20-v20221027 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.23.9-20221027` +* `1.22.12-20221027` +* `1.21.14-20221027` +* `1.20.15-20221027` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.23.9/2022-07-27/ +* s3://amazon-eks/1.22.12/2022-07-27/ +* s3://amazon-eks/1.21.14/2022-07-27/ +* s3://amazon-eks/1.20.15/2022-07-27/ + +AMI details: +* kernel: 5.4.217-126.408.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: 1.1.3-1.amzn2.0.2 +* cuda: 470.141.03-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +* cuda is updated to 470.141.03-1. +* Linux kernel is updated to 5.4.217-126.408.amzn2. +* runc version is updated to 1.1.3-1.amzn2.0.2 to include [ALAS2DOCKER-2022-020](https://alas.aws.amazon.com/AL2/ALASDOCKER-2022-020.html). [#1055](https://github.com/awslabs/amazon-eks-ami/pull/1055) +* docker version are update to 20.10.17-1.amzn2.0.1, and containerd version are updated to 1.6.6-1.amzn2.0.2 to include [ALASDOCKER-2022-021](https://alas.aws.amazon.com/AL2/ALASDOCKER-2022-021.html). [#1056](https://github.com/awslabs/amazon-eks-ami/pull/1056) +* Increase the kube-api-server QPS from 5/10 to 10/20. [#1030](https://github.com/awslabs/amazon-eks-ami/pull/1030) +* Release AMI in me-central-1 with version 1.21, 1.22, 1.23. 1.20 will not be supported since it will be deprecated soon. + +### AMI Release v20220926 +* amazon-eks-gpu-node-1.23-v20220926 +* amazon-eks-gpu-node-1.22-v20220926 +* amazon-eks-gpu-node-1.21-v20220926 +* amazon-eks-gpu-node-1.20-v20220926 +* amazon-eks-arm64-node-1.23-v20220926 +* amazon-eks-arm64-node-1.22-v20220926 +* amazon-eks-arm64-node-1.21-v20220926 +* amazon-eks-arm64-node-1.20-v20220926 +* amazon-eks-node-1.23-v20220926 +* amazon-eks-node-1.22-v20220926 +* amazon-eks-node-1.21-v20220926 +* amazon-eks-node-1.20-v20220926 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.23.9-20220926` +* `1.22.12-20220926` +* `1.21.14-20220926` +* `1.20.15-20220926` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.23.9/2022-07-27/ +* s3://amazon-eks/1.22.12/2022-07-27/ +* s3://amazon-eks/1.21.14/2022-07-27/ +* s3://amazon-eks/1.20.15/2022-07-27/ + +AMI details: +* kernel: 5.4.209-116.367.amzn2 +* dockerd: 20.10.17-1.amzn2 +* containerd: 1.6.6-1.amzn2 +* runc: 1.1.3-1.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable Changes: +* Phase 1 of support for Trn1 instances + +### AMI Release v20220914 +* amazon-eks-gpu-node-1.23-v20220914 +* amazon-eks-gpu-node-1.22-v20220914 +* amazon-eks-gpu-node-1.21-v20220914 +* amazon-eks-gpu-node-1.20-v20220914 +* amazon-eks-arm64-node-1.23-v20220914 +* amazon-eks-arm64-node-1.22-v20220914 +* amazon-eks-arm64-node-1.21-v20220914 +* amazon-eks-arm64-node-1.20-v20220914 +* amazon-eks-node-1.23-v20220914 +* amazon-eks-node-1.22-v20220914 +* amazon-eks-node-1.21-v20220914 +* amazon-eks-node-1.20-v20220914 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.23.9-20220914` +* `1.22.12-20220914` +* `1.21.14-20220914` +* `1.20.15-20220914` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.23.9/2022-07-27/ +* s3://amazon-eks/1.22.12/2022-07-27/ +* s3://amazon-eks/1.21.14/2022-07-27/ +* s3://amazon-eks/1.20.15/2022-07-27/ + +AMI details: +* kernel: 5.4.209-116.367.amzn2 +* dockerd: 20.10.17-1.amzn2 +* containerd: 1.6.6-1.amzn2 +* runc: 1.1.3-1.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +- The AWS CLI has been updated to (`1.25.72`)[https://github.com/aws/aws-cli/blob/1.25.72/CHANGELOG.rst#L8] to support local EKS clusters on Outposts. +- This release fixes an issue with DNS cluster IP and IPv6. More info in #931. +- Kernel version updated to `5.4.209-116.367.amzn2` as a part of latest CVE patch (ALASKERNEL-5.4-2022-035)[https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2022-035.html] + +### AMI Release v20220824 +* amazon-eks-gpu-node-1.23-v20220824 +* amazon-eks-gpu-node-1.22-v20220824 +* amazon-eks-gpu-node-1.21-v20220824 +* amazon-eks-gpu-node-1.20-v20220824 +* amazon-eks-gpu-node-1.19-v20220824 +* amazon-eks-arm64-node-1.23-v20220824 +* amazon-eks-arm64-node-1.22-v20220824 +* amazon-eks-arm64-node-1.21-v20220824 +* amazon-eks-arm64-node-1.20-v20220824 +* amazon-eks-arm64-node-1.19-v20220824 +* amazon-eks-node-1.23-v20220824 +* amazon-eks-node-1.22-v20220824 +* amazon-eks-node-1.21-v20220824 +* amazon-eks-node-1.20-v20220824 +* amazon-eks-node-1.19-v20220824 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.23.9-20220824` +* `1.22.12-20220824` +* `1.21.14-20220824` +* `1.20.15-20220824` +* `1.19.15-20220824` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.23.9/2022-07-27/ +* s3://amazon-eks/1.22.12/2022-07-27/ +* s3://amazon-eks/1.21.14/2022-07-27/ +* s3://amazon-eks/1.20.15/2022-07-27/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.209-116.363.amzn2 +* dockerd: 20.10.17-1.amzn2 +* containerd: 1.6.6-1.amzn2 +* runc: 1.1.3-1.amzn2-1.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1575.0-1.amzn2 + +Notable changes: +* We are updating the versions of docker, containerd and runc as part of this AMI release. +* Kernel version is also updated to include the [latest CVE patches](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2022-034.html) +* This is the last release for 1.19 as we are at [end of support for 1.19](https://docs.aws.amazon.com/eks/latest/userguide/kubernetes-versions.html#kubernetes-release-calendar) + +### AMI Release v20220811 +* amazon-eks-gpu-node-1.23-v20220811 +* amazon-eks-gpu-node-1.22-v20220811 +* amazon-eks-gpu-node-1.21-v20220811 +* amazon-eks-gpu-node-1.20-v20220811 +* amazon-eks-gpu-node-1.19-v20220811 +* amazon-eks-arm64-node-1.23-v20220811 +* amazon-eks-arm64-node-1.22-v20220811 +* amazon-eks-arm64-node-1.21-v20220811 +* amazon-eks-arm64-node-1.20-v20220811 +* amazon-eks-arm64-node-1.19-v20220811 +* amazon-eks-node-1.23-v20220811 +* amazon-eks-node-1.22-v20220811 +* amazon-eks-node-1.21-v20220811 +* amazon-eks-node-1.20-v20220811 +* amazon-eks-node-1.19-v20220811 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.23.9/2022-07-27/ +* s3://amazon-eks/1.22.12/2022-07-27/ +* s3://amazon-eks/1.21.14/2022-07-27/ +* s3://amazon-eks/1.20.15/2022-07-27/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.204-113.362.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-3.amzn2 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1575.0-1.amzn2 + +Notable changes: +- Kubelet binaries updated, including a backport of [#109676](https://github.com/kubernetes/kubernetes/pull/109676). +- When using `containerd` as the container runtime, `systemd` will now be used as the cgroup driver. For more information, see [the Kubernetes documentation](https://kubernetes.io/docs/tasks/administer-cluster/kubeadm/configure-cgroup-driver/). +- Updated `aws-neuron-dkms` to `2.3.26` to address [a security issue](https://awsdocs-neuron.readthedocs-hosted.com/en/latest/release-notes/neuron-driver.html#ndriver-2-3-26-0). This is a recommended upgrade for all users of the GPU AMI. + +### AMI Release v20220802 +* amazon-eks-gpu-node-1.23-v20220802 +* amazon-eks-gpu-node-1.22-v20220802 +* amazon-eks-gpu-node-1.21-v20220802 +* amazon-eks-gpu-node-1.20-v20220802 +* amazon-eks-gpu-node-1.19-v20220802 +* amazon-eks-arm64-node-1.23-v20220802 +* amazon-eks-arm64-node-1.22-v20220802 +* amazon-eks-arm64-node-1.21-v20220802 +* amazon-eks-arm64-node-1.20-v20220802 +* amazon-eks-arm64-node-1.19-v20220802 +* amazon-eks-node-1.23-v20220802 +* amazon-eks-node-1.22-v20220802 +* amazon-eks-node-1.21-v20220802 +* amazon-eks-node-1.20-v20220802 +* amazon-eks-node-1.19-v20220802 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.23.7/2022-06-29/ +* s3://amazon-eks/1.22.9/2022-03-09/ +* s3://amazon-eks/1.21.12/2022-05-20/ +* s3://amazon-eks/1.20.15/2022-06-20/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.204-113.362.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-3.amzn2 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1575.0-1.amzn2 + +Notable changes: +* Release 1.23 AMIs publicly + +### AMI Release v20220725 +* amazon-eks-gpu-node-1.22-v20220725 +* amazon-eks-gpu-node-1.21-v20220725 +* amazon-eks-gpu-node-1.20-v20220725 +* amazon-eks-gpu-node-1.19-v20220725 +* amazon-eks-arm64-node-1.22-v20220725 +* amazon-eks-arm64-node-1.21-v20220725 +* amazon-eks-arm64-node-1.20-v20220725 +* amazon-eks-arm64-node-1.19-v20220725 +* amazon-eks-node-1.22-v20220725 +* amazon-eks-node-1.21-v20220725 +* amazon-eks-node-1.20-v20220725 +* amazon-eks-node-1.19-v20220725 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.9/2022-03-09/ +* s3://amazon-eks/1.21.12/2022-05-20/ +* s3://amazon-eks/1.20.15/2022-06-20/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.204-113.362.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-3.amzn2 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1575.0 + +Notable changes: +* Updating pause-container version from 3.1 to 3.5 +* Adding log-collector-script to the AMI +* Kernel version upgraded to 5.4.204-113.362.amzn2 for [CVE-2022-0494](https://alas.aws.amazon.com/cve/html/CVE-2022-0494.html) [CVE-2022-0812](https://alas.aws.amazon.com/cve/html/CVE-2022-0812.html) [CVE-2022-1012](https://alas.aws.amazon.com/cve/html/CVE-2022-1012.html) [CVE-2022-1184](https://alas.aws.amazon.com/cve/html/CVE-2022-1184.html) [CVE-2022-1966](https://alas.aws.amazon.com/cve/html/CVE-2022-1966.html) [CVE-2022-32250](https://alas.aws.amazon.com/cve/html/CVE-2022-32250.html) [CVE-2022-32296](https://alas.aws.amazon.com/cve/html/CVE-2022-32296.html) [CVE-2022-32981](https://alas.aws.amazon.com/cve/html/CVE-2022-32981.html) + + +### AMI Release v20220629 +* amazon-eks-gpu-node-1.22-v20220629 +* amazon-eks-gpu-node-1.21-v20220629 +* amazon-eks-gpu-node-1.20-v20220629 +* amazon-eks-gpu-node-1.19-v20220629 +* amazon-eks-arm64-node-1.22-v20220629 +* amazon-eks-arm64-node-1.21-v20220629 +* amazon-eks-arm64-node-1.20-v20220629 +* amazon-eks-arm64-node-1.19-v20220629 +* amazon-eks-node-1.22-v20220629 +* amazon-eks-node-1.21-v20220629 +* amazon-eks-node-1.20-v20220629 +* amazon-eks-node-1.19-v20220629 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.9/2022-03-09/ +* s3://amazon-eks/1.21.12/2022-05-20/ +* s3://amazon-eks/1.20.15/2022-06-20/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.196-108.356.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-3.amzn2 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1188.0 + +Noted software versions are identical to release v20220620 in the commercial partition. + +### AMI Release v20220620 +* amazon-eks-gpu-node-1.22-v20220620 +* amazon-eks-gpu-node-1.21-v20220620 +* amazon-eks-gpu-node-1.20-v20220620 +* amazon-eks-gpu-node-1.19-v20220620 +* amazon-eks-arm64-node-1.22-v20220620 +* amazon-eks-arm64-node-1.21-v20220620 +* amazon-eks-arm64-node-1.20-v20220620 +* amazon-eks-arm64-node-1.19-v20220620 +* amazon-eks-node-1.22-v20220620 +* amazon-eks-node-1.21-v20220620 +* amazon-eks-node-1.20-v20220620 +* amazon-eks-node-1.19-v20220620 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.9/2022-03-09/ +* s3://amazon-eks/1.21.12/2022-05-20/ +* s3://amazon-eks/1.20.15/2022-06-20/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.196-108.356.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-3.amzn2 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1188.0 + +Notable changes: +* Update kubelet binaries for 1.20 +* Support packer's ami_regions feature +* Increase /var/log/messages limit to 100M +* Support local cluster in Outposts +* Adding c6id, m6id, r6id to eni-max-pods.txt + +### AMI Release v20220610 +* amazon-eks-gpu-node-1.22-v20220610 +* amazon-eks-gpu-node-1.21-v20220610 +* amazon-eks-gpu-node-1.20-v20220610 +* amazon-eks-gpu-node-1.19-v20220610 +* amazon-eks-arm64-node-1.22-v20220610 +* amazon-eks-arm64-node-1.21-v20220610 +* amazon-eks-arm64-node-1.20-v20220610 +* amazon-eks-arm64-node-1.19-v20220610 +* amazon-eks-node-1.22-v20220610 +* amazon-eks-node-1.21-v20220610 +* amazon-eks-node-1.20-v20220610 +* amazon-eks-node-1.19-v20220610 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.9/2022-06-03/ +* s3://amazon-eks/1.21.12/2022-05-20/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.196-108.356.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-3.amzn2 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1188.0 + +Notable changes: +* Containerd version upgraded to 1.4.13-3.amzn2 for [CVE-2022-31030](https://alas.aws.amazon.com/cve/html/CVE-2022-31030.html). +* Kernel version upgraded to 5.4.196-108.356.amzn2 for [CVE-2022-0494](https://alas.aws.amazon.com/cve/html/CVE-2022-0494.html), [CVE-2022-0854](https://alas.aws.amazon.com/cve/html/CVE-2022-0854.html), [CVE-2022-1729](https://alas.aws.amazon.com/cve/html/CVE-2022-1729.html), [CVE-2022-1836](https://alas.aws.amazon.com/cve/html/CVE-2022-1836.html), [CVE-2022-28893](https://alas.aws.amazon.com/cve/html/CVE-2022-28893.html), [CVE-2022-29581](https://alas.aws.amazon.com/cve/html/CVE-2022-29581.html) +* Updating the kubelet version for 1.22 from 1.22.6 to 1.22.9 + +### AMI Release v20220526 +* amazon-eks-gpu-node-1.22-v20220526 +* amazon-eks-gpu-node-1.21-v20220526 +* amazon-eks-gpu-node-1.20-v20220526 +* amazon-eks-gpu-node-1.19-v20220526 +* amazon-eks-arm64-node-1.22-v20220526 +* amazon-eks-arm64-node-1.21-v20220526 +* amazon-eks-arm64-node-1.20-v20220526 +* amazon-eks-arm64-node-1.19-v20220526 +* amazon-eks-node-1.22-v20220526 +* amazon-eks-node-1.21-v20220526 +* amazon-eks-node-1.20-v20220526 +* amazon-eks-node-1.19-v20220526 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.6/2022-03-09/ +* s3://amazon-eks/1.21.12/2022-05-20/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.190-107.353.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-2.amzn2.0.1 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1188.0 + +Notable changes: +Linux kernel upgraded to 5.4.190-107.353. + +### AMI Release v20220523 +* amazon-eks-gpu-node-1.22-v20220523 +* amazon-eks-gpu-node-1.21-v20220523 +* amazon-eks-gpu-node-1.20-v20220523 +* amazon-eks-gpu-node-1.19-v20220523 +* amazon-eks-arm64-node-1.22-v20220523 +* amazon-eks-arm64-node-1.21-v20220523 +* amazon-eks-arm64-node-1.20-v20220523 +* amazon-eks-arm64-node-1.19-v20220523 +* amazon-eks-node-1.22-v20220523 +* amazon-eks-node-1.21-v20220523 +* amazon-eks-node-1.20-v20220523 +* amazon-eks-node-1.19-v20220523 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.6/2022-03-09/ +* s3://amazon-eks/1.21.12/2022-05-20/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.190-107.353.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-2.amzn2.0.1 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1188.0 + +Notable changes: +* Added i4i instance support +* Fixes regression in the docker group ID. AMI build will now fail if the docker group ID is not 1950. +* Removes unused kernels (such as 4.14) during AMI build. This prevents false-positives from automated scanning tools such as AWS Inspector. +* Maintain dockershim compatibility symlink after instance reboot +* Updates 1.21 kubelet version to 1.21.12 + +### [Recalled] AMI Release v20220513 +* amazon-eks-gpu-node-1.22-v20220513 +* amazon-eks-gpu-node-1.21-v20220513 +* amazon-eks-gpu-node-1.20-v20220513 +* amazon-eks-gpu-node-1.19-v20220513 +* amazon-eks-arm64-node-1.22-v20220513 +* amazon-eks-arm64-node-1.21-v20220513 +* amazon-eks-arm64-node-1.20-v20220513 +* amazon-eks-arm64-node-1.19-v20220513 +* amazon-eks-node-1.22-v20220513 +* amazon-eks-node-1.21-v20220513 +* amazon-eks-node-1.20-v20220513 +* amazon-eks-node-1.19-v20220513 + +Notice: +* EKS-Optimized AMI SSM parameters contained an incorrect reference to the release version of the AMIs in this release. + +### AMI Release v20220429 +* amazon-eks-gpu-node-1.22-v20220429 +* amazon-eks-gpu-node-1.21-v20220429 +* amazon-eks-gpu-node-1.20-v20220429 +* amazon-eks-gpu-node-1.19-v20220429 +* amazon-eks-arm64-node-1.22-v20220429 +* amazon-eks-arm64-node-1.21-v20220429 +* amazon-eks-arm64-node-1.20-v20220429 +* amazon-eks-arm64-node-1.19-v20220429 +* amazon-eks-node-1.22-v20220429 +* amazon-eks-node-1.21-v20220429 +* amazon-eks-node-1.20-v20220429 +* amazon-eks-node-1.19-v20220429 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.6/2022-03-09/ +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.188-104.359.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-2.amzn2.0.1 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1188.0-1.amzn2 + +Notable changes: +* Added c7g support +* [When replaying user-data in testing will bail user-data when strict due to moving files](https://github.com/awslabs/amazon-eks-ami/pull/893/files) + +### AMI Release v20220421 +* amazon-eks-gpu-node-1.22-v20220421 +* amazon-eks-gpu-node-1.21-v20220421 +* amazon-eks-gpu-node-1.20-v20220421 +* amazon-eks-gpu-node-1.19-v20220421 +* amazon-eks-arm64-node-1.22-v20220421 +* amazon-eks-arm64-node-1.21-v20220421 +* amazon-eks-arm64-node-1.20-v20220421 +* amazon-eks-arm64-node-1.19-v20220421 +* amazon-eks-node-1.22-v20220421 +* amazon-eks-node-1.21-v20220421 +* amazon-eks-node-1.20-v20220421 +* amazon-eks-node-1.19-v20220421 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.6/2022-03-09/ +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.188-104.359.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-2.amzn2.0.1 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1188.0-1.amzn2 + +Notable changes: +* Includes patched Kernel for [CVE-2022-26490](https://alas.aws.amazon.com/cve/html/CVE-2022-26490.html), [CVE-2022-27666](https://alas.aws.amazon.com/cve/html/CVE-2022-27666.html) and [CVE-2022-28356](https://alas.aws.amazon.com/cve/html/CVE-2022-28356.html) +* New release with AMIs now available in ap-southeast-3 + +### AMI Release v20220420 +* amazon-eks-gpu-node-1.22-v20220420 +* amazon-eks-gpu-node-1.21-v20220420 +* amazon-eks-gpu-node-1.20-v20220420 +* amazon-eks-gpu-node-1.19-v20220420 +* amazon-eks-arm64-node-1.22-v20220420 +* amazon-eks-arm64-node-1.21-v20220420 +* amazon-eks-arm64-node-1.20-v20220420 +* amazon-eks-arm64-node-1.19-v20220420 +* amazon-eks-node-1.22-v20220420 +* amazon-eks-node-1.21-v20220420 +* amazon-eks-node-1.20-v20220420 +* amazon-eks-node-1.19-v20220420 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.6/2022-03-09/ +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.188-104.359.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-2.amzn2.0.1 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1188.0 + +Notable changes: +- Patches for [CVE-2022-0778](https://nvd.nist.gov/vuln/detail/CVE-2022-0778), [CVE-2022-23218](https://nvd.nist.gov/vuln/detail/CVE-2022-23218) and [CVE-2022-23219](https://nvd.nist.gov/vuln/detail/CVE-2022-23219) have been included. +- Deprecating 1.18 k8s Version + +### AMI Release v20220406 +* amazon-eks-gpu-node-1.22-v20220406 +* amazon-eks-gpu-node-1.21-v20220406 +* amazon-eks-gpu-node-1.20-v20220406 +* amazon-eks-gpu-node-1.19-v20220406 +* amazon-eks-gpu-node-1.18-v20220406 +* amazon-eks-arm64-node-1.22-v20220406 +* amazon-eks-arm64-node-1.21-v20220406 +* amazon-eks-arm64-node-1.20-v20220406 +* amazon-eks-arm64-node-1.19-v20220406 +* amazon-eks-arm64-node-1.18-v20220406 +* amazon-eks-node-1.22-v20220406 +* amazon-eks-node-1.21-v20220406 +* amazon-eks-node-1.20-v20220406 +* amazon-eks-node-1.19-v20220406 +* amazon-eks-node-1.18-v20220406 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.6/2022-03-09/ +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2021-09-02/ + +AMI details: +* kernel: 5.4.181-99.354.amzn2 (1.19 and above), 4.14.268-205.500.amzn2 (1.18 and below) +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-2.amzn2.0.1 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0 + +Notable changes: +- Patches for [CVE-2022-24769](https://nvd.nist.gov/vuln/detail/CVE-2022-24769) have been included. +- The bootstrap script will auto-discover maxPods values when instanceType is missing in eni-max-pods.txt + +### AMI Release v20220317 +* amazon-eks-gpu-node-1.22-v20220317 +* amazon-eks-gpu-node-1.21-v20220317 +* amazon-eks-gpu-node-1.20-v20220317 +* amazon-eks-gpu-node-1.19-v20220317 +* amazon-eks-gpu-node-1.18-v20220317 +* amazon-eks-arm64-node-1.22-v20220317 +* amazon-eks-arm64-node-1.21-v20220317 +* amazon-eks-arm64-node-1.20-v20220317 +* amazon-eks-arm64-node-1.19-v20220317 +* amazon-eks-arm64-node-1.18-v20220317 +* amazon-eks-node-1.22-v20220317 +* amazon-eks-node-1.21-v20220317 +* amazon-eks-node-1.20-v20220317 +* amazon-eks-node-1.19-v20220317 +* amazon-eks-node-1.18-v20220317 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.6/2022-03-09/ +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2021-09-02/ + +AMI details: +* kernel: 5.4.181-99.354.amzn2 (1.19 and above), 4.14.268-205.500.amzn2 (1.18 and below) +* dockerd: 20.10.7-5.amzn2 +* containerd: 1.4.6-8.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0 + +Notable changes: +- Adding support for new k8s version 1.22 + +### AMI Release v20220309 +* amazon-eks-gpu-node-1.21-v20220309 +* amazon-eks-gpu-node-1.20-v20220309 +* amazon-eks-gpu-node-1.19-v20220309 +* amazon-eks-gpu-node-1.18-v20220309 +* amazon-eks-arm64-node-1.21-v20220309 +* amazon-eks-arm64-node-1.20-v20220309 +* amazon-eks-arm64-node-1.19-v20220309 +* amazon-eks-arm64-node-1.18-v20220309 +* amazon-eks-node-1.21-v20220309 +* amazon-eks-node-1.20-v20220309 +* amazon-eks-node-1.19-v20220309 +* amazon-eks-node-1.18-v20220309 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2021-09-02/ + +AMI details: +* kernel: 5.4.181-99.354.amzn2 (1.19 and above), 4.14.268-205.500.amzn2 (1.18 and below) +* dockerd: 20.10.7-5.amzn2 +* containerd: 1.4.6-8.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0 + +Notable changes: +- Update kernel version to 4.14.268-205.500.amzn2 for 1.18 and below, 5.4.181-99.354.amzn2 for 1.19 and above. For more information, see [ALAS-2022-1761](https://alas.aws.amazon.com/AL2/ALAS-2022-1761.html) and [ALASKERNEL-5.4-2022-023](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2022-023.html). + +### AMI Release v20220303 +* amazon-eks-gpu-node-1.21-v20220303 +* amazon-eks-gpu-node-1.20-v20220303 +* amazon-eks-gpu-node-1.19-v20220303 +* amazon-eks-gpu-node-1.18-v20220303 +* amazon-eks-arm64-node-1.21-v20220303 +* amazon-eks-arm64-node-1.20-v20220303 +* amazon-eks-arm64-node-1.19-v20220303 +* amazon-eks-arm64-node-1.18-v20220303 +* amazon-eks-node-1.21-v20220303 +* amazon-eks-node-1.20-v20220303 +* amazon-eks-node-1.19-v20220303 +* amazon-eks-node-1.18-v20220303 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2021-09-02/ + +AMI details: +* kernel: 5.4.176-91.338.amzn2 (1.19 and above), 4.14.262-200.489.amzn2 (1.18 and below) +* dockerd: 20.10.7-5.amzn2 +* containerd: 1.4.6-8.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0 + +Notable changes: +- Update `containerd` to `1.4.6-8.amzn2` for CVE-2022-23648. + +### AMI Release v20220226 +* amazon-eks-gpu-node-1.21-v20220226 +* amazon-eks-gpu-node-1.20-v20220226 +* amazon-eks-gpu-node-1.19-v20220226 +* amazon-eks-gpu-node-1.18-v20220226 +* amazon-eks-arm64-node-1.21-v20220226 +* amazon-eks-arm64-node-1.20-v20220226 +* amazon-eks-arm64-node-1.19-v20220226 +* amazon-eks-arm64-node-1.18-v20220226 +* amazon-eks-node-1.21-v20220226 +* amazon-eks-node-1.20-v20220226 +* amazon-eks-node-1.19-v20220226 +* amazon-eks-node-1.18-v20220226 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.21.5/2022-01-21/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2021-09-02/ + +AMI details: +* kernel: 5.4.176-91.338.amzn2 (1.19 and above), 4.14.262-200.489.amzn2 (1.18 and below) +* dockerd: 20.10.7-5.amzn2 +* containerd: 1.4.6-7.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0 + +Notable changes: +- Upgrade `ec2-utils` version to `1.2-47`, addressing an issue with device symbolic links. More information is available [here](https://github.com/aws/amazon-ec2-utils/issues/22). + +### AMI Release v20220216 +* amazon-eks-gpu-node-1.21-v20220216 +* amazon-eks-gpu-node-1.20-v20220216 +* amazon-eks-gpu-node-1.19-v20220216 +* amazon-eks-gpu-node-1.18-v20220216 +* amazon-eks-arm64-node-1.21-v20220216 +* amazon-eks-arm64-node-1.20-v20220216 +* amazon-eks-arm64-node-1.19-v20220216 +* amazon-eks-arm64-node-1.18-v20220216 +* amazon-eks-node-1.21-v20220216 +* amazon-eks-node-1.20-v20220216 +* amazon-eks-node-1.19-v20220216 +* amazon-eks-node-1.18-v20220216 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.21.5/2022-01-21/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2021-09-02/ + +AMI details: +* kernel: 5.4.176-91.338.amzn2 (1.19 and above), 4.14.262-200.489.amzn2 (1.18 and below) +* dockerd: 20.10.7-5.amzn2 +* containerd: 1.4.6-7.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0 + +Notable changes: +- Support for `c6a` instance types. + +### AMI Release v20220210 +* amazon-eks-gpu-node-1.21-v20220210 +* amazon-eks-gpu-node-1.20-v20220210 +* amazon-eks-gpu-node-1.19-v20220210 +* amazon-eks-gpu-node-1.18-v20220210 +* amazon-eks-arm64-node-1.21-v20220210 +* amazon-eks-arm64-node-1.20-v20220210 +* amazon-eks-arm64-node-1.19-v20220210 +* amazon-eks-arm64-node-1.18-v20220210 +* amazon-eks-node-1.21-v20220210 +* amazon-eks-node-1.20-v20220210 +* amazon-eks-node-1.19-v20220210 +* amazon-eks-node-1.18-v20220210 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.21.5/2022-01-21/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2021-09-02/ + +AMI details: +* kernel: 5.4.176-91.338.amzn2 (1.19 and above), 4.14.262-200.489.amzn2 (1.18 and below) +* dockerd: 20.10.7-5.amzn2 +* containerd: 1.4.6-7.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0 + +Notable changes: +- Upgrade kernel version for Kubernetes 1.18 to `4.14.262-200.489.amzn2`, addressing several CVE's. More information available in [ALAS2-2022-1749](https://alas.aws.amazon.com/AL2/ALAS-2022-1749.html) +- Support for `hpc6a` instance types. +- Removes support for the `chacha20-poly1305@openssh.com` cipher, which is not FIPS-compliant. + +### AMI Release v20220123 + - amazon-eks-node-1.18-v20220123 + - amazon-eks-arm64-node-1.18-v20220123 + - amazon-eks-gpu-node-1.18-v20220123 + - amazon-eks-node-1.19-v20220123 + - amazon-eks-arm64-node-1.19-v20220123 + - amazon-eks-gpu-node-1.19-v20220123 + - amazon-eks-node-1.20-v20220123 + - amazon-eks-arm64-node-1.20-v20220123 + - amazon-eks-gpu-node-1.20-v20220123 + - amazon-eks-node-1.21-v20220123 + - amazon-eks-arm64-node-1.21-v20220123 + - amazon-eks-gpu-node-1.21-v20220123 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.21.5/2022-01-21/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2021-09-02/ + +AMI details: +* kernel: 5.4.172-90.336.amzn2 (1.19 and above), 4.14.256-197.484.amzn2 (1.18 and below) +* dockerd: 20.10.7-5.amzn2 +* containerd: 1.4.6-7.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 470.57.02 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0 + +Notable changes: +* Upgrade kernel version for Kubernetes 1.19 and above to 5.4.172-90.336.amzn2.x86_64 for CVE-2022-0185 +* Bug fix in kubelet for 1.21 AMIs to handle compacted IPv6 addresses returned by EC2 API. New Kubelet version: `v1.21.5-eks-9017834` + +### AMI Release v20220112 +* amazon-eks-gpu-node-1.21-v20220112 +* amazon-eks-gpu-node-1.20-v20220112 +* amazon-eks-gpu-node-1.19-v20220112 +* amazon-eks-gpu-node-1.18-v20220112 +* amazon-eks-arm64-node-1.21-v20220112 +* amazon-eks-arm64-node-1.20-v20220112 +* amazon-eks-arm64-node-1.19-v20220112 +* amazon-eks-arm64-node-1.18-v20220112 +* amazon-eks-node-1.21-v20220112 +* amazon-eks-node-1.20-v20220112 +* amazon-eks-node-1.19-v20220112 +* amazon-eks-node-1.18-v20220112 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2021-09-02/ + +AMI details: +* kernel: 5.4.162-86.275.amzn2 (1.19 and above), 4.14.256-197.484.amzn2 (1.18 and below) +* dockerd: 20.10.7-5.amzn2 +* containerd: 1.4.6-7.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 470.57.02 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0-1.amzn2 + +Notable changes: +* Updating aws-cli ( aws-cli/1.22.32 ). Latest CLI is installed using the recommended steps [here](https://docs.aws.amazon.com/cli/v1/userguide/install-linux.html#install-linux-bundled). This change is specific to this AMI release. +* Added fix to handle failures when serviceIpv6Cidr isn't provided. Related issue: https://github.com/awslabs/amazon-eks-ami/issues/839. +* Added fix to make ipFamily check case-insensitive + +### AMI Release v20211206 +* amazon-eks-gpu-node-1.21-v20211206 +* amazon-eks-gpu-node-1.20-v20211206 +* amazon-eks-gpu-node-1.19-v20211206 +* amazon-eks-gpu-node-1.18-v20211206 +* amazon-eks-arm64-node-1.21-v20211206 +* amazon-eks-arm64-node-1.20-v20211206 +* amazon-eks-arm64-node-1.19-v20211206 +* amazon-eks-arm64-node-1.18-v20211206 +* amazon-eks-node-1.21-v20211206 +* amazon-eks-node-1.20-v20211206 +* amazon-eks-node-1.19-v20211206 +* amazon-eks-node-1.18-v20211206 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2021-09-02/ + +AMI details: +* kernel: 5.4.156-83.273.amzn2 (1.19 and above), 4.14.252-195.483.amzn2 (1.18 and below) +* dockerd: 20.10.7-5.amzn2 +* containerd: 1.4.6-7.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 470.57.02 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0-1.amzn2 + +Notable changes: +* Adds new instanceTypes to the eni-max-pods.txt file. +* Patch for [AL2/ALAS-2021-1722](https://alas.aws.amazon.com/AL2/ALAS-2021-1722.html). + +### AMI Release v20211117 +* amazon-eks-gpu-node-1.21-v20211117 +* amazon-eks-gpu-node-1.20-v20211117 +* amazon-eks-gpu-node-1.19-v20211117 +* amazon-eks-gpu-node-1.18-v20211117 +* amazon-eks-gpu-node-1.17-v20211117 +* amazon-eks-arm64-node-1.21-v20211117 +* amazon-eks-arm64-node-1.20-v20211117 +* amazon-eks-arm64-node-1.19-v20211117 +* amazon-eks-arm64-node-1.18-v20211117 +* amazon-eks-arm64-node-1.17-v20211117 +* amazon-eks-node-1.21-v20211117 +* amazon-eks-node-1.20-v20211117 +* amazon-eks-node-1.19-v20211117 +* amazon-eks-node-1.18-v20211117 +* amazon-eks-node-1.17-v20211117 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2021-09-02/ +* s3://amazon-eks/1.17.17/2021-09-02/ + +AMI details: +* kernel: 5.4.156-83.273.amzn2 (1.19 and above), 4.14.252-195.483.amzn2 (1.18 and below) +* dockerd: 20.10.7-5.amzn2 +* containerd: 1.4.6-7.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 470.57.02 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0-1.amzn2 + +Notable changes: +Update `containerd` to `1.4.6-7.amzn2` and `docker` to `20.10.7-5.amzn2` to patch vulnerabilities in [CVE-2021-41190](https://alas.aws.amazon.com/ALAS-2021-1551.html) + +### AMI Release v20211109 +* amazon-eks-gpu-node-1.21-v20211109 +* amazon-eks-gpu-node-1.20-v20211109 +* amazon-eks-gpu-node-1.19-v20211109 +* amazon-eks-gpu-node-1.18-v20211109 +* amazon-eks-gpu-node-1.17-v20211109 +* amazon-eks-arm64-node-1.21-v20211109 +* amazon-eks-arm64-node-1.20-v20211109 +* amazon-eks-arm64-node-1.19-v20211109 +* amazon-eks-arm64-node-1.18-v20211109 +* amazon-eks-arm64-node-1.17-v20211109 +* amazon-eks-node-1.21-v20211109 +* amazon-eks-node-1.20-v20211109 +* amazon-eks-node-1.19-v20211109 +* amazon-eks-node-1.18-v20211109 +* amazon-eks-node-1.17-v20211109 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ +* s3://amazon-eks/1.18.20/2021-09-02/ +* s3://amazon-eks/1.17.17/2021-09-02/ + +AMI details: +* kernel: 5.4.149-73.259.amzn2 (1.19 and above), 4.14.252-195.483.amzn2 (1.18 and below) +* dockerd: 20.10.7-3.amzn2 +* containerd: 1.4.6-3.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 470.57.02 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0 + +Notable changes: +* Upgrade kernel version for 1.17 and 1.18 to 4.14.252-195.483.amzn2 +* Upgrade cuda version from 460.73.01 to 470.57.02 +* Upgrade kubelet version + * 1.19.14 -> 1.19.15 + * 1.20.10 -> 1.20.11 + * 1.21.4 -> 1.21.5 +* Remove cbc ciphers and use following recommended ciphers + * chacha20-poly1305@openssh.com + * aes128-ctr + * aes256-ctr + * aes128-gcm@openssh.com + * aes256-gcm@openssh.com + +## AMI Release v20211013 + +* amazon-eks-gpu-node-1.21-v20211013 +* amazon-eks-gpu-node-1.20-v20211013 +* amazon-eks-gpu-node-1.19-v20211013 +* amazon-eks-gpu-node-1.18-v20211013 +* amazon-eks-gpu-node-1.17-v20211013 +* amazon-eks-gpu-node-1.16-v20211013 +* amazon-eks-arm64-node-1.21-v20211013 +* amazon-eks-arm64-node-1.20-v20211013 +* amazon-eks-arm64-node-1.19-v20211013 +* amazon-eks-arm64-node-1.18-v20211013 +* amazon-eks-arm64-node-1.17-v20211013 +* amazon-eks-arm64-node-1.16-v20211013 +* amazon-eks-node-1.21-v20211013 +* amazon-eks-node-1.20-v20211013 +* amazon-eks-node-1.19-v20211013 +* amazon-eks-node-1.18-v20211013 +* amazon-eks-node-1.17-v20211013 +* amazon-eks-node-1.16-v20211013 + +Binaries used to build these AMIs are published: + +* s3://amazon-eks/1.21.4/2021-10-12/ +* s3://amazon-eks/1.20.10/2021-10-12/ +* s3://amazon-eks/1.19.14/2021-10-12/ +* s3://amazon-eks/1.18.20/2021-09-02/ +* s3://amazon-eks/1.17.17/2021-09-02/ +* s3://amazon-eks/1.16.15/2021-09-02/ + +AMI details: + +* kernel: 5.4.149-73.259.amzn2 (1.19 and above), 4.14.248-189.473.amzn2 (1.18 and below) +* dockerd: 20.10.7-3.amzn2 +* containerd: 1.4.6-3.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 460.73.01 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0 + +Notable changes: + +* A fix has been made to the GPU AMIs to ensure they work correctly with containerd as the container runtime. + +## AMI Release v20211008 + +* amazon-eks-gpu-node-1.21-v20211008 +* amazon-eks-gpu-node-1.20-v20211008 +* amazon-eks-gpu-node-1.19-v20211008 +* amazon-eks-gpu-node-1.18-v20211008 +* amazon-eks-gpu-node-1.17-v20211008 +* amazon-eks-gpu-node-1.16-v20211008 +* amazon-eks-gpu-node-1.15-v20211008 +* amazon-eks-arm64-node-1.21-v20211008 +* amazon-eks-arm64-node-1.20-v20211008 +* amazon-eks-arm64-node-1.19-v20211008 +* amazon-eks-arm64-node-1.18-v20211008 +* amazon-eks-arm64-node-1.17-v20211008 +* amazon-eks-arm64-node-1.16-v20211008 +* amazon-eks-arm64-node-1.15-v20211008 +* amazon-eks-node-1.21-v20211008 +* amazon-eks-node-1.20-v20211008 +* amazon-eks-node-1.19-v20211008 +* amazon-eks-node-1.18-v20211008 +* amazon-eks-node-1.17-v20211008 +* amazon-eks-node-1.16-v20211008 +* amazon-eks-node-1.15-v20211008 + +Binaries used to build these AMIs are published: + +* s3://amazon-eks/1.21.4/2021-10-12/ +* s3://amazon-eks/1.20.10/2021-10-12/ +* s3://amazon-eks/1.19.14/2021-10-12/ +* s3://amazon-eks/1.18.20/2021-09-02/ +* s3://amazon-eks/1.17.17/2021-09-02/ +* s3://amazon-eks/1.16.15/2021-09-02/ + +AMI details: + +* kernel: 5.4.149-73.259.amzn2 (1.19 and above), 4.14.248-189.473.amzn2 (1.18 and below) +* dockerd: 20.10.7-3.amzn2 +* containerd: 1.4.6-3.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 460.73.01 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.0.1124.0 + +Notable changes: + +* kubelet binaries have been updated for Kubernetes versions 1.19, 1.20 and 1.21, which include [a patch to fix an issue where kubelet can fail to unmount volumes](https://github.com/kubernetes/kubernetes/pull/102576) + +## AMI Release v20211004 + +* amazon-eks-gpu-node-1.20-v20211004 +* amazon-eks-gpu-node-1.19-v20211004 +* amazon-eks-gpu-node-1.18-v20211004 +* amazon-eks-gpu-node-1.17-v20211004 +* amazon-eks-gpu-node-1.16-v20211004 +* amazon-eks-gpu-node-1.15-v20211004 +* amazon-eks-arm64-node-1.20-v20211004 +* amazon-eks-arm64-node-1.19-v20211004 +* amazon-eks-arm64-node-1.18-v20211004 +* amazon-eks-arm64-node-1.17-v20211004 +* amazon-eks-arm64-node-1.16-v20211004 +* amazon-eks-arm64-node-1.15-v20211004 +* amazon-eks-node-1.20-v20211004 +* amazon-eks-node-1.19-v20211004 +* amazon-eks-node-1.18-v20211004 +* amazon-eks-node-1.17-v20211004 +* amazon-eks-node-1.16-v20211004 +* amazon-eks-node-1.15-v20211004 + +Binaries used to build these AMIs are published: + +* s3://amazon-eks/1.21.2/2021-04-12/ +* s3://amazon-eks/1.20.7/2021-04-12/ +* s3://amazon-eks/1.19.13/2021-01-05/ +* s3://amazon-eks/1.18.20/2020-11-02/ +* s3://amazon-eks/1.17.17/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ + +AMI details: + +* kernel: 5.4.149-73.259.amzn2 (1.19 and above), 4.14.246-187.474.amzn2 (1.18 and below) +* dockerd: 20.10.7-3.amzn2 +* containerd: 1.4.6-3.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 460.73.01 +* nvidia-container-runtime-hook: 460.73.01 +* SSM agent: 3.0.1124.0 + +Notable changes: +* Created AMI released on the latest commit + +## AMI Release v20211003 + +* amazon-eks-gpu-node-1.20-v20211003 +* amazon-eks-gpu-node-1.19-v20211003 +* amazon-eks-gpu-node-1.18-v20211003 +* amazon-eks-gpu-node-1.17-v20211003 +* amazon-eks-gpu-node-1.16-v20211003 +* amazon-eks-gpu-node-1.15-v20211003 +* amazon-eks-arm64-node-1.20-v20211003 +* amazon-eks-arm64-node-1.19-v20211003 +* amazon-eks-arm64-node-1.18-v20211003 +* amazon-eks-arm64-node-1.17-v20211003 +* amazon-eks-arm64-node-1.16-v20211003 +* amazon-eks-arm64-node-1.15-v20211003 +* amazon-eks-node-1.20-v20211003 +* amazon-eks-node-1.19-v20211003 +* amazon-eks-node-1.18-v20211003 +* amazon-eks-node-1.17-v20211003 +* amazon-eks-node-1.16-v20211003 +* amazon-eks-node-1.15-v20211003 + +Binaries used to build these AMIs are published: + +* s3://amazon-eks/1.21.2/2021-04-12/ +* s3://amazon-eks/1.20.7/2021-04-12/ +* s3://amazon-eks/1.19.13/2021-01-05/ +* s3://amazon-eks/1.18.20/2020-11-02/ +* s3://amazon-eks/1.17.17/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ + +AMI details: + +* kernel: 5.4.144-69.257.amzn2 (1.19 and above), (1.18 and below) +* dockerd: 20.10.7-3.amzn2 +* containerd: 1.4.6-3.amzn2 +* runc: 1.0.0-2.amzn2 +* cuda: 460.73.01 +* nvidia-container-runtime-hook: 460.73.01 +* SSM agent: 3.0.1124.0 + +Notable changes: + +* Updated version of RunC to 1.0.0-2.amzn2 +* Updated version of Docker to 20.10.7-3.amzn2 +* Updated version of Containerd to 1.4.6-3.amzn2 +* Following CVEs are addressed Docker (CVE-2021-41089, CVE-2021-41091, CVE-2021-41092) and containerd (CVE-2021-41103) + +## AMI Release v20211001 + +* amazon-eks-gpu-node-1.21-v20211001 +* amazon-eks-gpu-node-1.20-v20211001 +* amazon-eks-gpu-node-1.19-v20211001 +* amazon-eks-gpu-node-1.18-v20211001 +* amazon-eks-gpu-node-1.17-v20211001 +* amazon-eks-gpu-node-1.16-v20211001 +* amazon-eks-arm64-node-1.21-v20211001 +* amazon-eks-arm64-node-1.20-v20211001 +* amazon-eks-arm64-node-1.19-v20211001 +* amazon-eks-arm64-node-1.18-v20211001 +* amazon-eks-arm64-node-1.17-v20211001 +* amazon-eks-arm64-node-1.16-v20211001 +* amazon-eks-node-1.21-v20211001 +* amazon-eks-node-1.20-v20211001 +* amazon-eks-node-1.19-v20211001 +* amazon-eks-node-1.18-v20211001 +* amazon-eks-node-1.17-v20211001 +* amazon-eks-node-1.16-v20211001 + +Binaries used to build these AMIs are published: + +s3://amazon-eks/1.20.4/2021-04-12/ +s3://amazon-eks/1.19.6/2021-01-05/ +s3://amazon-eks/1.18.9/2020-11-02/ +s3://amazon-eks/1.17.12/2020-11-02/ +s3://amazon-eks/1.16.15/2020-11-02/ +s3://amazon-eks/1.15.12/2020-11-02/ + +AMI details: + +* kernel: 5.4.144-69.257.amzn2 (1.19 and above), (1.18 and below) +* dockerd: 19.03.13-ce +* containerd: 1.4.6 +* runc: 1.0.0.amzn2 +* cuda: 460.73.01 +* nvidia-container-runtime-hook: 460.73.01 +* SSM agent: 3.0.1124.0 + +Notable changes: +* This release includes the patch for the CA to handle Let's Encrypt Certificate Expiry +* Updating default [containerd socket path](https://github.com/awslabs/amazon-eks-ami/commit/9576786266df8bee08e97c1c7f2d0e2f85752092) + +## AMI Release v20210914 + +* amazon-eks-gpu-node-1.21-v20210914 +* amazon-eks-gpu-node-1.20-v20210914 +* amazon-eks-gpu-node-1.19-v20210914 +* amazon-eks-gpu-node-1.18-v20210914 +* amazon-eks-gpu-node-1.17-v20210914 +* amazon-eks-gpu-node-1.16-v20210914 +* amazon-eks-arm64-node-1.21-v20210914 +* amazon-eks-arm64-node-1.20-v20210914 +* amazon-eks-arm64-node-1.19-v20210914 +* amazon-eks-arm64-node-1.18-v20210914 +* amazon-eks-arm64-node-1.17-v20210914 +* amazon-eks-arm64-node-1.16-v20210914 +* amazon-eks-node-1.21-v20210914 +* amazon-eks-node-1.20-v20210914 +* amazon-eks-node-1.19-v20210914 +* amazon-eks-node-1.18-v20210914 +* amazon-eks-node-1.17-v20210914 +* amazon-eks-node-1.16-v20210914 + +Notable changes: +Adding support for new ec2 instance types i.e. m6i + +## AMI Release v20210830 + +* amazon-eks-gpu-node-1.21-v20210830 +* amazon-eks-gpu-node-1.20-v20210830 +* amazon-eks-gpu-node-1.19-v20210830 +* amazon-eks-gpu-node-1.18-v20210830 +* amazon-eks-gpu-node-1.17-v20210830 +* amazon-eks-gpu-node-1.16-v20210830 +* amazon-eks-arm64-node-1.21-v20210830 +* amazon-eks-arm64-node-1.20-v20210830 +* amazon-eks-arm64-node-1.19-v20210830 +* amazon-eks-arm64-node-1.18-v20210830 +* amazon-eks-arm64-node-1.17-v20210830 +* amazon-eks-arm64-node-1.16-v20210830 +* amazon-eks-node-1.21-v20210830 +* amazon-eks-node-1.20-v20210830 +* amazon-eks-node-1.19-v20210830 +* amazon-eks-node-1.18-v20210830 +* amazon-eks-node-1.17-v20210830 +* amazon-eks-node-1.16-v20210830 + +Notable changes: + +* Upgrade kubelet version for 1.17 and 1.20 + * 1.17.12 -> 1.17.17 + * 1.20.4 -> 1.20.7 + +## AMI Release v20210826 + +* amazon-eks-gpu-node-1.21-v20210826 +* amazon-eks-gpu-node-1.20-v20210826 +* amazon-eks-gpu-node-1.19-v20210826 +* amazon-eks-gpu-node-1.18-v20210826 +* amazon-eks-gpu-node-1.17-v20210826 +* amazon-eks-gpu-node-1.16-v20210826 +* amazon-eks-gpu-node-1.15-v20210826 +* amazon-eks-arm64-node-1.21-v20210826 +* amazon-eks-arm64-node-1.20-v20210826 +* amazon-eks-arm64-node-1.19-v20210826 +* amazon-eks-arm64-node-1.18-v20210826 +* amazon-eks-arm64-node-1.17-v20210826 +* amazon-eks-arm64-node-1.16-v20210826 +* amazon-eks-arm64-node-1.15-v20210826 +* amazon-eks-node-1.21-v20210826 +* amazon-eks-node-1.20-v20210826 +* amazon-eks-node-1.19-v20210826 +* amazon-eks-node-1.18-v20210826 +* amazon-eks-node-1.17-v20210826 +* amazon-eks-node-1.16-v20210826 +* amazon-eks-node-1.15-v20210826 + +Notable changes: + +* Fix to reduce permissions of `pull-sandbox-image.sh` [c78bb6b](https://github.com/awslabs/amazon-eks-ami/commit/c78bb6bac21e9323f1f9c57568ece93c1f1d507b) + + +## AMI Release v20210813 + +* amazon-eks-gpu-node-1.21-v20210813 +* amazon-eks-gpu-node-1.20-v20210813 +* amazon-eks-gpu-node-1.19-v20210813 +* amazon-eks-gpu-node-1.18-v20210813 +* amazon-eks-gpu-node-1.17-v20210813 +* amazon-eks-gpu-node-1.16-v20210813 +* amazon-eks-gpu-node-1.15-v20210813 +* amazon-eks-arm64-node-1.21-v20210813 +* amazon-eks-arm64-node-1.20-v20210813 +* amazon-eks-arm64-node-1.19-v20210813 +* amazon-eks-arm64-node-1.18-v20210813 +* amazon-eks-arm64-node-1.17-v20210813 +* amazon-eks-arm64-node-1.16-v20210813 +* amazon-eks-arm64-node-1.15-v20210813 +* amazon-eks-node-1.21-v20210813 +* amazon-eks-node-1.20-v20210813 +* amazon-eks-node-1.19-v20210813 +* amazon-eks-node-1.18-v20210813 +* amazon-eks-node-1.17-v20210813 +* amazon-eks-node-1.16-v20210813 +* amazon-eks-node-1.15-v20210813 + +Notable changes: +* Contains fix for sanbox-image issue with containerd in Gov-cloud and CN regions. +* Updating to 1.18.20 and 1.19.13 kubernetes version. + +## AMI Release v20210722 + +* amazon-eks-gpu-node-1.21-v20210722 +* amazon-eks-gpu-node-1.20-v20210722 +* amazon-eks-gpu-node-1.19-v20210722 +* amazon-eks-gpu-node-1.18-v20210722 +* amazon-eks-gpu-node-1.17-v20210722 +* amazon-eks-gpu-node-1.16-v20210722 +* amazon-eks-gpu-node-1.15-v20210722 +* amazon-eks-arm64-node-1.21-v20210722 +* amazon-eks-arm64-node-1.20-v20210722 +* amazon-eks-arm64-node-1.19-v20210722 +* amazon-eks-arm64-node-1.18-v20210722 +* amazon-eks-arm64-node-1.17-v20210722 +* amazon-eks-arm64-node-1.16-v20210722 +* amazon-eks-arm64-node-1.15-v20210722 +* amazon-eks-node-1.21-v20210722 +* amazon-eks-node-1.20-v20210722 +* amazon-eks-node-1.19-v20210722 +* amazon-eks-node-1.18-v20210722 +* amazon-eks-node-1.17-v20210722 +* amazon-eks-node-1.16-v20210722 +* amazon-eks-node-1.15-v20210722 + +Notable changes: +* This release includes the security patch for the [kernel](https://alas.aws.amazon.com/ALAS-2021-1524.html), for CVE-2021-33909. + +## AMI Release v20210720 + +* amazon-eks-gpu-node-1.21-v20210720 +* amazon-eks-gpu-node-1.20-v20210720 +* amazon-eks-gpu-node-1.19-v20210720 +* amazon-eks-gpu-node-1.18-v20210720 +* amazon-eks-gpu-node-1.17-v20210720 +* amazon-eks-gpu-node-1.16-v20210720 +* amazon-eks-gpu-node-1.15-v20210720 +* amazon-eks-arm64-node-1.21-v20210720 +* amazon-eks-arm64-node-1.20-v20210720 +* amazon-eks-arm64-node-1.19-v20210720 +* amazon-eks-arm64-node-1.18-v20210720 +* amazon-eks-arm64-node-1.17-v20210720 +* amazon-eks-arm64-node-1.16-v20210720 +* amazon-eks-arm64-node-1.15-v20210720 +* amazon-eks-node-1.21-v20210720 +* amazon-eks-node-1.20-v20210720 +* amazon-eks-node-1.19-v20210720 +* amazon-eks-node-1.18-v20210720 +* amazon-eks-node-1.17-v20210720 +* amazon-eks-node-1.16-v20210720 +* amazon-eks-node-1.15-v20210720 + +EKS AMI release for Kubernetes version 1.21 (1.21 AMIs for GPU and ARM in us-gov-west-1 and us-gov-east-1 are included in this release) +* Note: The containerd has patch for CVE-2-21-32760 + +Containerd runtime support +The EKS Optimized Amazon Linux 2 AMI now contains a bootstrap (https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh) flag --container-runtime to optionally enable the containerd runtime. This flag is available in all supported Kubernetes versions of the AMI. This change is to get ahead of the removal of Docker as a supported runtime in Kubernetes (more details here (https://kubernetes.io/blog/2020/12/02/dockershim-faq/)). Feedback is appreciated. + +FIPS Kernel Panic issue on 5.4.X is fixed - https://github.com/awslabs/amazon-eks-ami/issues/632 + +## AMI Release v20210716 + +* amazon-eks-gpu-node-1.21-v20210716 +* amazon-eks-gpu-node-1.20-v20210716 +* amazon-eks-gpu-node-1.19-v20210716 +* amazon-eks-gpu-node-1.18-v20210716 +* amazon-eks-gpu-node-1.17-v20210716 +* amazon-eks-gpu-node-1.16-v20210716 +* amazon-eks-gpu-node-1.15-v20210716 +* amazon-eks-arm64-node-1.21-v20210716 +* amazon-eks-arm64-node-1.20-v20210716 +* amazon-eks-arm64-node-1.19-v20210716 +* amazon-eks-arm64-node-1.18-v20210716 +* amazon-eks-arm64-node-1.17-v20210716 +* amazon-eks-arm64-node-1.16-v20210716 +* amazon-eks-arm64-node-1.15-v20210716 +* amazon-eks-node-1.21-v20210716 +* amazon-eks-node-1.20-v20210716 +* amazon-eks-node-1.19-v20210716 +* amazon-eks-node-1.18-v20210716 +* amazon-eks-node-1.17-v20210716 +* amazon-eks-node-1.16-v20210716 +* amazon-eks-node-1.15-v20210716 + +EKS AMI release for Kubernetes version 1.21 (1.21 AMIs for GPU and ARM in us-gov-west-1 and us-gov-east-1 aren't a part of this release) +* Note: The containerd has patch for CVE-2-21-32760 + +Containerd runtime support +The EKS Optimized Amazon Linux 2 AMI now contains a bootstrap (https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh) flag --container-runtime to optionally enable the containerd runtime. This flag is available in all supported Kubernetes versions of the AMI. This change is to get ahead of the removal of Docker as a supported runtime in Kubernetes (more details here (https://kubernetes.io/blog/2020/12/02/dockershim-faq/)). Feedback is appreciated. + +FIPS Kernel Panic issue on 5.4.X is fixed - https://github.com/awslabs/amazon-eks-ami/issues/632 + +## AMI Release v20210628 +* amazon-eks-gpu-node-1.20-v20210628 +* amazon-eks-gpu-node-1.19-v20210628 +* amazon-eks-gpu-node-1.18-v20210628 +* amazon-eks-gpu-node-1.17-v20210628 +* amazon-eks-gpu-node-1.16-v20210628 +* amazon-eks-gpu-node-1.15-v20210628 +* amazon-eks-arm64-node-1.20-v20210628 +* amazon-eks-arm64-node-1.19-v20210628 +* amazon-eks-arm64-node-1.18-v20210628 +* amazon-eks-arm64-node-1.17-v20210628 +* amazon-eks-arm64-node-1.16-v20210628 +* amazon-eks-arm64-node-1.15-v20210628 +* amazon-eks-node-1.20-v20210628 +* amazon-eks-node-1.19-v20210628 +* amazon-eks-node-1.18-v20210628 +* amazon-eks-node-1.17-v20210628 +* amazon-eks-node-1.16-v20210628 +* amazon-eks-node-1.15-v20210628 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.20.4/2021-04-12/ +* s3://amazon-eks/1.19.6/2021-01-05/ +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +AMI details: +* kernel: 5.4.117-58.216.amzn2 (1.19 and above), 4.14.232-177.418.amzn2 (1.18 and below) +* dockerd: 19.03.13ce +* containerd: 1.4.1 +* runc: 1.0.0-rc93 +* cuda: 460.73.01 +* nvidia-container-runtime-hook: 460.73.01 +* SSM agent: 3.0.1295.0 + +Notable changes: + +Includes the latest security patches for [systemd](https://alas.aws.amazon.com/AL2/ALAS-2021-1647.html), [python3](https://alas.aws.amazon.com/AL2/ALAS-2021-1670.html) and others. + +## AMI Release v20210621 +* amazon-eks-gpu-node-1.20-v20210621 +* amazon-eks-gpu-node-1.19-v20210621 +* amazon-eks-gpu-node-1.18-v20210621 +* amazon-eks-gpu-node-1.17-v20210621 +* amazon-eks-gpu-node-1.16-v20210621 +* amazon-eks-gpu-node-1.15-v20210621 +* amazon-eks-arm64-node-1.20-v20210621 +* amazon-eks-arm64-node-1.19-v20210621 +* amazon-eks-arm64-node-1.18-v20210621 +* amazon-eks-arm64-node-1.17-v20210621 +* amazon-eks-arm64-node-1.16-v20210621 +* amazon-eks-arm64-node-1.15-v20210621 +* amazon-eks-node-1.20-v20210621 +* amazon-eks-node-1.19-v20210621 +* amazon-eks-node-1.18-v20210621 +* amazon-eks-node-1.17-v20210621 +* amazon-eks-node-1.16-v20210621 +* amazon-eks-node-1.15-v20210621 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.20.4/2021-04-12/ +* s3://amazon-eks/1.19.6/2021-01-05/ +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +AMI details: +* kernel: 5.4.117-58.216.amzn2.x86_64 (1.19 and above), 4.14.232-176.381.amzn2.x86_64 (1.18 and below) +* dockerd: 19.03.13-ce +* containerd: 1.4.1 +* runc: 1.0.0-rc93 +* cuda: 460.73.01 +* nvidia-container-runtime-hook: 1.4.0 +* SSM agent: 3.0.1295.0 + +Notable changes: +* The SSM Agent will now be automatically installed + +## AMI Release v20210526 +* amazon-eks-gpu-node-1.20-v20210526 +* amazon-eks-gpu-node-1.19-v20210526 +* amazon-eks-gpu-node-1.18-v20210526 +* amazon-eks-gpu-node-1.17-v20210526 +* amazon-eks-gpu-node-1.16-v20210526 +* amazon-eks-gpu-node-1.15-v20210526 +* amazon-eks-arm64-node-1.20-v20210526 +* amazon-eks-arm64-node-1.19-v20210526 +* amazon-eks-arm64-node-1.18-v20210526 +* amazon-eks-arm64-node-1.17-v20210526 +* amazon-eks-arm64-node-1.16-v20210526 +* amazon-eks-arm64-node-1.15-v20210526 +* amazon-eks-node-1.20-v20210526 +* amazon-eks-node-1.19-v20210526 +* amazon-eks-node-1.18-v20210526 +* amazon-eks-node-1.17-v20210526 +* amazon-eks-node-1.16-v20210526 +* amazon-eks-node-1.15-v20210526 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.20.4/2021-04-12/ +* s3://amazon-eks/1.19.6/2021-01-05/ +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +AMI details: +* kernel: 5.4.117-58.216.amzn2.x86_64 (1.19 and above), 4.14.232-176.381.amzn2.x86_64 (1.18 and below) +* dockerd: 19.03.13-ce +* containerd: 1.4.1 +* runc: 1.0.0-rc93 +* cuda: 460.73.01 +* nvidia-container-runtime-hook: 1.4.0 + + +Notable changes: +* [CVE-2021-25215](https://access.redhat.com/security/cve/CVE-2021-25215) patch +* kenel patch for following CVEs: [CVE-2021-31829](https://access.redhat.com/security/cve/CVE-2021-31829), [CVE-2021-23133](https://access.redhat.com/security/cve/CVE-2021-23133), [CVE-2020-29374](https://access.redhat.com/security/cve/CVE-2020-29374) + +## AMI Release v20210519 +* amazon-eks-gpu-node-1.20-v20210519 +* amazon-eks-gpu-node-1.19-v20210519 +* amazon-eks-gpu-node-1.18-v20210519 +* amazon-eks-gpu-node-1.17-v20210519 +* amazon-eks-gpu-node-1.16-v20210519 +* amazon-eks-gpu-node-1.15-v20210519 +* amazon-eks-arm64-node-1.20-v20210519 +* amazon-eks-arm64-node-1.19-v20210519 +* amazon-eks-arm64-node-1.18-v20210519 +* amazon-eks-arm64-node-1.17-v20210519 +* amazon-eks-arm64-node-1.16-v20210519 +* amazon-eks-arm64-node-1.15-v20210519 +* amazon-eks-node-1.20-v20210519 +* amazon-eks-node-1.19-v20210519 +* amazon-eks-node-1.18-v20210519 +* amazon-eks-node-1.17-v20210519 +* amazon-eks-node-1.16-v20210519 +* amazon-eks-node-1.15-v20210519 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.20.4/2021-04-12/ +* s3://amazon-eks/1.19.6/2021-01-05/ +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +AMI details: +* kernel: 5.4.110-54.189.amzn2.x86_64 (1.19 and above), 4.14.231-173.361.amzn2.x86_64 (1.18 and below) +* dockerd: 19.03.13-ce +* containerd: 1.4.1 +* runc: 1.0.0-rc93 +* cuda: 460.73.01 +* nvidia-container-runtime-hook: 1.4.0 + + +Notable changes: +* `runc` version upgrade to `rc93` for GPU AMIs +* [fix](https://github.com/opencontainers/runc/pull/2871) for [#2530](https://github.com/opencontainers/runc/issues/2530) backported to `rc93` for GPU AMIs +* [`runc` CVE 2021-30465](https://github.com/opencontainers/runc/security/advisories/GHSA-c3xm-pvg7-gh7r) patch backported to `rc93` for GPU AMIs + +## AMI Release v20210518 + +* amazon-eks-gpu-node-1.19-v20210518 +* amazon-eks-gpu-node-1.18-v20210518 +* amazon-eks-gpu-node-1.17-v20210518 +* amazon-eks-gpu-node-1.16-v20210518 +* amazon-eks-gpu-node-1.15-v20210518 +* amazon-eks-arm64-node-1.19-v20210518 +* amazon-eks-arm64-node-1.18-v20210518 +* amazon-eks-arm64-node-1.17-v20210518 +* amazon-eks-arm64-node-1.16-v20210518 +* amazon-eks-arm64-node-1.15-v20210518 +* amazon-eks-node-1.19-v20210518 +* amazon-eks-node-1.18-v20210518 +* amazon-eks-node-1.17-v20210518 +* amazon-eks-node-1.16-v20210518 +* amazon-eks-node-1.15-v20210518 + +Binaries used to build these AMIs are published: + +* s3://amazon-eks/1.20.4/2021-04-12/ +* s3://amazon-eks/1.19.6/2021-01-05/ +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes: +* `runc` version upgrade to `rc93` +* [fix](https://github.com/opencontainers/runc/pull/2871) for [#2530](https://github.com/opencontainers/runc/issues/2530) backported to `rc93` +* [`runc` CVE 2021-30465](https://github.com/opencontainers/runc/security/advisories/GHSA-c3xm-pvg7-gh7r) patch backported to `rc93` + +## AMI Release v20210512 + +* amazon-eks-gpu-node-1.19-v20210512 +* amazon-eks-gpu-node-1.18-v20210512 +* amazon-eks-gpu-node-1.17-v20210512 +* amazon-eks-gpu-node-1.16-v20210512 +* amazon-eks-gpu-node-1.15-v20210512 +* amazon-eks-arm64-node-1.19-v20210512 +* amazon-eks-arm64-node-1.18-v20210512 +* amazon-eks-arm64-node-1.17-v20210512 +* amazon-eks-arm64-node-1.16-v20210512 +* amazon-eks-arm64-node-1.15-v20210512 +* amazon-eks-node-1.19-v20210512 +* amazon-eks-node-1.18-v20210512 +* amazon-eks-node-1.17-v20210512 +* amazon-eks-node-1.16-v20210512 +* amazon-eks-node-1.15-v20210512 + +Binaries used to build these AMIs are published: + +* s3://amazon-eks/1.20.4/2021-04-12/ +* s3://amazon-eks/1.19.6/2021-01-05/ +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes: +* Release 1.20 AMIs + +## AMI Release v20210504 + +* amazon-eks-gpu-node-1.19-v20210504 +* amazon-eks-gpu-node-1.18-v20210504 +* amazon-eks-gpu-node-1.17-v20210504 +* amazon-eks-gpu-node-1.16-v20210504 +* amazon-eks-gpu-node-1.15-v20210504 +* amazon-eks-arm64-node-1.19-v20210504 +* amazon-eks-arm64-node-1.18-v20210504 +* amazon-eks-arm64-node-1.17-v20210504 +* amazon-eks-arm64-node-1.16-v20210504 +* amazon-eks-arm64-node-1.15-v20210504 +* amazon-eks-node-1.19-v20210504 +* amazon-eks-node-1.18-v20210504 +* amazon-eks-node-1.17-v20210504 +* amazon-eks-node-1.16-v20210504 +* amazon-eks-node-1.15-v20210504 + +Binaries used to build these AMIs are published: + +s3://amazon-eks/1.19.6/2021-01-05/ +s3://amazon-eks/1.18.9/2020-11-02/ +s3://amazon-eks/1.17.12/2020-11-02/ +s3://amazon-eks/1.16.15/2020-11-02/ +s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes: + +* Update Kernel (1.19: 5.4.110-54.189.amzn2.x86_64, 1.18 and below: 4.14.231-173.361.amzn2.x86_64) to address a vulnerability. More information available in [ALAS-2021-1634](https://alas.aws.amazon.com/AL2/ALAS-2021-1634.html) +* Update Nvidia and Cuda drivers to v460.73.01 + +## AMI Release v20210501 + +* amazon-eks-gpu-node-1.19-v20210501 +* amazon-eks-gpu-node-1.18-v20210501 +* amazon-eks-gpu-node-1.17-v20210501 +* amazon-eks-gpu-node-1.16-v20210501 +* amazon-eks-gpu-node-1.15-v20210501 +* amazon-eks-arm64-node-1.19-v20210501 +* amazon-eks-arm64-node-1.18-v20210501 +* amazon-eks-arm64-node-1.17-v20210501 +* amazon-eks-arm64-node-1.16-v20210501 +* amazon-eks-arm64-node-1.15-v20210501 +* amazon-eks-node-1.19-v20210501 +* amazon-eks-node-1.18-v20210501 +* amazon-eks-node-1.17-v20210501 +* amazon-eks-node-1.16-v20210501 +* amazon-eks-node-1.15-v20210501 + +Binaries used to build these AMIs are published: + +s3://amazon-eks/1.19.6/2021-01-05/ +s3://amazon-eks/1.18.9/2020-11-02/ +s3://amazon-eks/1.17.12/2020-11-02/ +s3://amazon-eks/1.16.15/2020-11-02/ +s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes: + +* Patches for Linux kernel 4.14, used by AMIs with Kubernetes v1.18 and below (CVE ALAS2-2021-1627) +* Patches for Linux kernel 5.4, used by AMIs with Kubernetes v1.19 to fix a race condition with Conntrack. + + + +### AMI Release v20210414 + +* amazon-eks-gpu-node-1.19-v20210414 +* amazon-eks-gpu-node-1.18-v20210414 +* amazon-eks-gpu-node-1.17-v20210414 +* amazon-eks-gpu-node-1.16-v20210414 +* amazon-eks-gpu-node-1.15-v20210414 +* amazon-eks-arm64-node-1.19-v20210414 +* amazon-eks-arm64-node-1.18-v20210414 +* amazon-eks-arm64-node-1.17-v20210414 +* amazon-eks-arm64-node-1.16-v20210414 +* amazon-eks-arm64-node-1.15-v20210414 +* amazon-eks-node-1.19-v20210414 +* amazon-eks-node-1.18-v20210414 +* amazon-eks-node-1.17-v20210414 +* amazon-eks-node-1.16-v20210414 +* amazon-eks-node-1.15-v20210414 + +Binaries used to build these AMIs are published: +s3://amazon-eks/1.19.6/2021-01-05/ +s3://amazon-eks/1.18.9/2020-11-02/ +s3://amazon-eks/1.17.12/2020-11-02/ +s3://amazon-eks/1.16.15/2020-11-02/ +s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes: +A regression was introduced for 1.19 AMI in the last release as a result of runc version update to `1.0.0-rc93` causing nodes to flap between `Ready` and `NotReady`, more details [#648](https://github.com/awslabs/amazon-eks-ami/issues/648). We are reverting the runc version back to 1.0.0-rc92. + + +### AMI Release v20210329 + +* amazon-eks-gpu-node-1.19-v20210329 +* amazon-eks-gpu-node-1.18-v20210329 +* amazon-eks-gpu-node-1.17-v20210329 +* amazon-eks-gpu-node-1.16-v20210329 +* amazon-eks-gpu-node-1.15-v20210329 +* amazon-eks-arm64-node-1.19-v20210329 +* amazon-eks-arm64-node-1.18-v20210329 +* amazon-eks-arm64-node-1.17-v20210329 +* amazon-eks-arm64-node-1.16-v20210329 +* amazon-eks-arm64-node-1.15-v20210329 +* amazon-eks-node-1.19-v20210329 +* amazon-eks-node-1.18-v20210329 +* amazon-eks-node-1.17-v20210329 +* amazon-eks-node-1.16-v20210329 +* amazon-eks-node-1.15-v20210329 + +Binaries used to build these AMIs are published: +s3://amazon-eks/1.19.6/2021-01-05/ +s3://amazon-eks/1.18.9/2020-11-02/ +s3://amazon-eks/1.17.12/2020-11-02/ +s3://amazon-eks/1.16.15/2020-11-02/ +s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes: +A regression was introduced to the 4.14 Amazon Linux Kernel where I/O could slow significantly after running some workloads for a long period of time (observations point to between 4 hours and several days). This release contains the Kernel patch which fixes the above issue. + + + + +### AMI Release v20210322 + +* amazon-eks-gpu-node-1.19-v20210322 +* amazon-eks-gpu-node-1.18-v20210322 +* amazon-eks-gpu-node-1.17-v20210322 +* amazon-eks-gpu-node-1.16-v20210322 +* amazon-eks-gpu-node-1.15-v20210322 +* amazon-eks-arm64-node-1.19-v20210322 +* amazon-eks-arm64-node-1.18-v20210322 +* amazon-eks-arm64-node-1.17-v20210322 +* amazon-eks-arm64-node-1.16-v20210322 +* amazon-eks-arm64-node-1.15-v20210322 +* amazon-eks-node-1.19-v20210322 +* amazon-eks-node-1.18-v20210322 +* amazon-eks-node-1.17-v20210322 +* amazon-eks-node-1.16-v20210322 +* amazon-eks-node-1.15-v20210322 + +Binaries used to build these AMIs are published : +s3://amazon-eks/1.19.6/2021-01-05/ +s3://amazon-eks/1.18.9/2020-11-02/ +s3://amazon-eks/1.17.12/2020-11-02/ +s3://amazon-eks/1.16.15/2020-11-02/ +s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes : +- Updates Nvidia drivers to version `460.32.03` +- patch for CVE-2021-27363, CVE-2021-27364, CVE-2021-27365 +- set kubelet log verbosity to 2 + +### AMI Release v20210310 +* amazon-eks-gpu-node-1.19-v20210310 +* amazon-eks-gpu-node-1.18-v20210310 +* amazon-eks-gpu-node-1.17-v20210310 +* amazon-eks-gpu-node-1.16-v20210310 +* amazon-eks-gpu-node-1.15-v20210310 +* amazon-eks-arm64-node-1.19-v20210310 +* amazon-eks-arm64-node-1.18-v20210310 +* amazon-eks-arm64-node-1.17-v20210310 +* amazon-eks-arm64-node-1.16-v20210310 +* amazon-eks-arm64-node-1.15-v20210310 +* amazon-eks-node-1.19-v20210310 +* amazon-eks-node-1.18-v20210309 +* amazon-eks-node-1.17-v20210309 +* amazon-eks-node-1.16-v20210309 +* amazon-eks-node-1.15-v20210309 + +Binaries used to build these AMIs are published : +s3://amazon-eks/1.19.6/2021-01-05/ +s3://amazon-eks/1.18.9/2020-11-02/ +s3://amazon-eks/1.17.12/2020-11-02/ +s3://amazon-eks/1.16.15/2020-11-02/ +s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes : +- Updates Nvidia drivers to version `460.27.04` +- GPU AMIs no longer uses `daemon.json` defined in https://github.com/awslabs/amazon-eks-ami/blob/master/files/docker-daemon.json + +### AMI Release v20210302 + +**GPU AMIs in this release are not compatible with any eksctl version after [eksctl 0.34.0](https://github.com/weaveworks/eksctl/releases/tag/0.34.0)** + +* amazon-eks-gpu-node-1.19-v20210302 +* amazon-eks-gpu-node-1.18-v20210302 +* amazon-eks-gpu-node-1.17-v20210302 +* amazon-eks-gpu-node-1.16-v20210302 +* amazon-eks-gpu-node-1.15-v20210302 +* amazon-eks-arm64-node-1.19-v20210302 +* amazon-eks-arm64-node-1.18-v20210302 +* amazon-eks-arm64-node-1.17-v20210302 +* amazon-eks-arm64-node-1.16-v20210302 +* amazon-eks-arm64-node-1.15-v20210302 +* amazon-eks-node-1.19-v20210302 +* amazon-eks-node-1.18-v20210302 +* amazon-eks-node-1.17-v20210302 +* amazon-eks-node-1.16-v20210302 +* amazon-eks-node-1.15-v20210302 + +Binaries used to build these AMIs are published: +- s3://amazon-eks/1.19.6/2021-01-05/ +- s3://amazon-eks/1.18.9/2020-11-02/ +- s3://amazon-eks/1.17.12/2020-11-02/ +- s3://amazon-eks/1.16.15/2020-11-02/ +- s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes: +- files/bootstrap.sh: ensure /etc/docker exists before writing to it (#611) +- GPU AMIs now use docker `daemon.json` defined in https://github.com/awslabs/amazon-eks-ami/blob/master/files/docker-daemon.json +- Patch for CVE-2021-3177 +- check that nvidia-smi is configured correctly before updating GPU clocks (#613) +- Fix Makefile indentation for 1.19 (#616) +- Increase fs.inotify.max_user_instances to 8192 from the default of 128 (#614) +- use dynamic lookup of docker gid (#622) +- bump docker version to 19.03.13ce-1 (#624) + +### AMI Release v20210208 +* amazon-eks-gpu-node-1.19-v20210208 +* amazon-eks-gpu-node-1.18-v20210208 +* amazon-eks-gpu-node-1.17-v20210208 +* amazon-eks-gpu-node-1.16-v20210208 +* amazon-eks-gpu-node-1.15-v20210208 +* amazon-eks-arm64-node-1.19-v20210208 +* amazon-eks-arm64-node-1.18-v20210208 +* amazon-eks-arm64-node-1.17-v20210208 +* amazon-eks-arm64-node-1.16-v20210208 +* amazon-eks-arm64-node-1.15-v20210208 +* amazon-eks-node-1.19-v20210208 +* amazon-eks-node-1.18-v20210208 +* amazon-eks-node-1.17-v20210208 +* amazon-eks-node-1.16-v20210208 +* amazon-eks-node-1.15-v20210208 + +Binaries used to build these AMIs are published : +* s3://amazon-eks/1.19.6/2021-01-05/ +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes : +* Kubernetes versions 1.19+ will now use the 5.4 Linux kernel +* Patch for [ALAS-2021-1588](https://alas.aws.amazon.com/AL2/ALAS-2021-1588.html) + +### AMI Release v20210125 +* amazon-eks-gpu-node-1.18-v20210125 +* amazon-eks-gpu-node-1.17-v20210125 +* amazon-eks-gpu-node-1.16-v20210125 +* amazon-eks-gpu-node-1.15-v20210125 +* amazon-eks-arm64-node-1.18-v20210125 +* amazon-eks-arm64-node-1.17-v20210125 +* amazon-eks-arm64-node-1.16-v20210125 +* amazon-eks-arm64-node-1.15-v20210125 +* amazon-eks-node-1.18-v20210125 +* amazon-eks-node-1.17-v20210125 +* amazon-eks-node-1.16-v20210125 +* amazon-eks-node-1.15-v20210125 + +Binaries used to build these AMIs are published : +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes : +* ARM AMIs built with m6g.large instance type (#601) +* Add Support for c6gn instance type (#597) +* Patch for CVE-2021-3156 (https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-3156) + +### AMI Release v20210112 +* amazon-eks-gpu-node-1.18-v20210112 +* amazon-eks-gpu-node-1.17-v20210112 +* amazon-eks-gpu-node-1.16-v20210112 +* amazon-eks-gpu-node-1.15-v20210112 +* amazon-eks-arm64-node-1.18-v20210112 +* amazon-eks-arm64-node-1.17-v20210112 +* amazon-eks-arm64-node-1.16-v20210112 +* amazon-eks-arm64-node-1.15-v20210112 +* amazon-eks-node-1.18-v20210112 +* amazon-eks-node-1.17-v20210112 +* amazon-eks-node-1.16-v20210112 +* amazon-eks-node-1.15-v20210112 + +Binaries used to build these AMIs are published : +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes : +* Update ulimit for memlock to unlimited +* Update ulimit for max_user_watches and max_file_count +* Fix position of sonobuoy e2e registry config check (#590) +* Update Makefile to support sonobuoy e2e registry config override (#588) +* fix syntax error in install script (#582) introduced by #522 +* Feature flag the cleanup of the image (#522) +* Add iptables rule count to log collector +* GPU Boost clock setup for performance improvement (#573) +* add support for sonobuoy e2e registry overrides (#585) for MVP +* ensure kubelet.service.d directory exists (#519) +* (bootstrap): document pause container parameters (#556) +* add SIGKILL to RestartForceExitStatus (#554) +* fix containerd_version typo in Makefile (#584) +* Update systemd to always restart kubelet to support dynamic kubelet configuration (#578) +* Add missing instance types (#580) + +### AMI Release v20201211 +* amazon-eks-gpu-node-1.18-v20201211 +* amazon-eks-gpu-node-1.17-v20201211 +* amazon-eks-gpu-node-1.16-v20201211 +* amazon-eks-gpu-node-1.15-v20201211 +* amazon-eks-arm64-node-1.18-v20201211 +* amazon-eks-arm64-node-1.17-v20201211 +* amazon-eks-arm64-node-1.16-v20201211 +* amazon-eks-arm64-node-1.15-v20201211 +* amazon-eks-node-1.18-v20201211 +* amazon-eks-node-1.17-v20201211 +* amazon-eks-node-1.16-v20201211 +* amazon-eks-node-1.15-v20201211 + +Binaries used to build these AMIs are published : +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes : +* Bug fix for the issue with rngd on EKS worker ami that's built with AL2 source ami. +* Bug fix for grub issue introduced by new nvidia driver +* Patch for CVE-2020-1971 (https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-1971) + +### AMI Release v20201126 +* amazon-eks-gpu-node-1.18-v20201126 +* amazon-eks-gpu-node-1.17-v20201126 +* amazon-eks-gpu-node-1.16-v20201126 +* amazon-eks-gpu-node-1.15-v20201126 +* amazon-eks-arm64-node-1.18-v20201126 +* amazon-eks-arm64-node-1.17-v20201126 +* amazon-eks-arm64-node-1.16-v20201126 +* amazon-eks-arm64-node-1.15-v20201126 +* amazon-eks-node-1.18-v20201126 +* amazon-eks-node-1.17-v20201126 +* amazon-eks-node-1.16-v20201126 +* amazon-eks-node-1.15-v20201126 + +Binaries used to build these AMIs are published : +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes : + +* Containerd patch for CVE-2020-15257 (containerd-1.4.1-2) + + +### AMI Release v20201117 +* amazon-eks-gpu-node-1.18-v20201117 +* amazon-eks-gpu-node-1.17-v20201117 +* amazon-eks-gpu-node-1.16-v20201117 +* amazon-eks-gpu-node-1.15-v20201117 +* amazon-eks-arm64-node-1.18-v20201117 +* amazon-eks-arm64-node-1.17-v20201117 +* amazon-eks-arm64-node-1.16-v20201117 +* amazon-eks-arm64-node-1.15-v20201117 +* amazon-eks-node-1.18-v20201117 +* amazon-eks-node-1.17-v20201117 +* amazon-eks-node-1.16-v20201117 +* amazon-eks-node-1.15-v20201117 + +Binaries used to build these AMIs are published : +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes : +* Bug fix [#526](https://github.com/awslabs/amazon-eks-ami/pull/526) +* GPU AMIs - Nvidia driver version update to 450.51.06, cuda version update to 11.0 +* Updated kernel version to 4.14.203 and fix for soft lockup issue +* Downgraded containerd version to 1.3.2 to fix pods getting stuck in the Terminating state + + +### AMI Release v20201112 +* amazon-eks-gpu-node-1.18-v20201112 +* amazon-eks-gpu-node-1.17-v20201112 +* amazon-eks-gpu-node-1.16-v20201112 +* amazon-eks-gpu-node-1.15-v20201112 +* amazon-eks-arm64-node-1.18-v20201112 +* amazon-eks-arm64-node-1.17-v20201112 +* amazon-eks-arm64-node-1.16-v20201112 +* amazon-eks-arm64-node-1.15-v20201112 +* amazon-eks-node-1.18-v20201112 +* amazon-eks-node-1.17-v20201112 +* amazon-eks-node-1.16-v20201112 +* amazon-eks-node-1.15-v20201112 + +Binaries used to build these AMIs are published : +* s3://amazon-eks/1.18.9/2020-11-02/ +* s3://amazon-eks/1.17.12/2020-11-02/ +* s3://amazon-eks/1.16.15/2020-11-02/ +* s3://amazon-eks/1.15.12/2020-11-02/ + +Notable changes : +* Bug fix [#526](https://github.com/awslabs/amazon-eks-ami/pull/526) +* GPU AMIs - Nvidia driver version update to 450.51.06, cuda version update to 11.0 +* Updated kernel version to 4.14.203 and fix for [soft lockup issue](https://github.com/awslabs/amazon-eks-ami/issues/454) + + +Note: Previous release information can be found from [release note](https://github.com/awslabs/amazon-eks-ami/releases) + + +### AMI Release v20190927 +* amazon-eks-node-1.14-v20190927 +* amazon-eks-gpu-node-1.14-v20190927 +* amazon-eks-node-1.13-v20190927 +* amazon-eks-gpu-node-1.13-v20190927 +* amazon-eks-node-1.12-v20190927 +* amazon-eks-gpu-node-1.12-v20190927 +* amazon-eks-node-1.11-v20190927 +* amazon-eks-gpu-node-1.11-v20190927 + +Changes: +* 0f11f6c Add G4DN instance family to node group template +* ade31b0 Add support for g4 instance family +* d9147f1 sync nodegroup template to latest available + ### AMI Release v20190906 * amazon-eks-node-1.14-v20190906 * amazon-eks-gpu-node-1.14-v20190906 @@ -10,7 +8572,7 @@ * amazon-eks-gpu-node-1.11-v20190906 Changes: -* c1ae2f3 Adding new directory and file for 1.14 and above by removing --allow-privileged=true flag (#327) +* c1ae2f3 Adding new directory and file for 1.14 and above by removing --allow-privileged=true flag (#327) * 5335ea8 add support for me-south-1 region (#322) * c4e03c1 Update list of instance types (#320) * 389f4ba update S3_URL_BASE environment variable in install-worker.sh @@ -208,4 +8770,4 @@ Note: CNI >= 1.2.1 is required for t3 and r5 instance support. * EKS Launch AMI - + \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index 7e0b7332b..000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,61 +0,0 @@ -# Contributing Guidelines - -Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional -documentation, we greatly value feedback and contributions from our community. - -Please read through this document before submitting any issues or pull requests to ensure we have all the necessary -information to effectively respond to your bug report or contribution. - - -## Reporting Bugs/Feature Requests - -We welcome you to use the GitHub issue tracker to report bugs or suggest features. - -When filing an issue, please check [existing open](https://github.com/aws-samples/amazon-eks-ami/issues), or [recently closed](https://github.com/aws-samples/amazon-eks-ami/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already -reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: - -* A reproducible test case or series of steps -* The version of our code being used -* Any modifications you've made relevant to the bug -* Anything unusual about your environment or deployment - - -## Contributing via Pull Requests -Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: - -1. You are working against the latest source on the *master* branch. -2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. -3. You open an issue to discuss any significant work - we would hate for your time to be wasted. - -To send us a pull request, please: - -1. Fork the repository. -2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. -3. Ensure local tests pass. -4. Commit to your fork using clear commit messages. -5. Send us a pull request, answering any default questions in the pull request interface. -6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. - -GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and -[creating a pull request](https://help.github.com/articles/creating-a-pull-request/). - - -## Finding contributions to work on -Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels ((enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/aws-samples/amazon-eks-ami/labels/help%20wanted) issues is a great place to start. - - -## Code of Conduct -This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). -For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact -opensource-codeofconduct@amazon.com with any additional questions or comments. - - -## Security issue notifications -If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. - - -## Licensing - -See the [LICENSE](https://github.com/aws-samples/amazon-eks-ami/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. - -We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes. diff --git a/Config b/Config index 97041313e..42acc08d9 100644 --- a/Config +++ b/Config @@ -3,7 +3,7 @@ # Copyright 2019 Amazon.com, Inc. or its affiliates. # SPDX-License-Identifier: Apache-2.0 -package.Amazon-eks-ami = { +package.Amazon-eks-ami-mirror = { interfaces = (1.0); deploy = { @@ -15,7 +15,7 @@ package.Amazon-eks-ami = { network-access = blocked; }; - build-system = archivebuild; + build-system = archivebuild-wrapper; build-tools = { 1.0 = { ArchiveBuild = 1.0; diff --git a/Makefile b/Makefile index 38a0e299b..1a10456bb 100644 --- a/Makefile +++ b/Makefile @@ -1,16 +1,54 @@ +MAKEFILE_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + +PACKER_DEFAULT_VARIABLE_FILE ?= $(MAKEFILE_DIR)/eks-worker-al2-variables.json +PACKER_TEMPLATE_FILE ?= $(MAKEFILE_DIR)/eks-worker-al2.json PACKER_BINARY ?= packer -PACKER_VARIABLES := ami_name binary_bucket_name kubernetes_version kubernetes_build_date docker_version cni_version cni_plugin_version source_ami_id arch instance_type -AWS_DEFAULT_REGION ?= us-west-2 +AVAILABLE_PACKER_VARIABLES := $(shell $(PACKER_BINARY) inspect -machine-readable $(PACKER_TEMPLATE_FILE) | grep 'template-variable' | awk -F ',' '{print $$4}') K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) -ami_name ?= amazon-eks-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') +# expands to 'true' if PACKER_VARIABLE_FILE is non-empty +# and the file contains the string passed as the first argument +# otherwise, expands to 'false' +packer_variable_file_contains = $(if $(PACKER_VARIABLE_FILE),$(shell grep -Fq $1 $(PACKER_VARIABLE_FILE) && echo true || echo false),false) + +# expands to 'true' if the version comparison is affirmative +# otherwise expands to 'false' +vercmp = $(shell $(MAKEFILE_DIR)/files/bin/vercmp "$1" "$2" "$3") + +# Docker is not present on 1.25+ AMI's +# TODO: remove this when 1.24 reaches EOL +ifeq ($(call vercmp,$(kubernetes_version),gteq,1.25.0), true) + # do not tag the AMI with the Docker version + docker_version ?= none + # do not include the Docker version in the AMI description + ami_component_description ?= (k8s: {{ user `kubernetes_version` }}, containerd: {{ user `containerd_version` }}) +endif + +AMI_VERSION ?= v$(shell date '+%Y%m%d') +AMI_VARIANT ?= amazon-eks +ifneq (,$(findstring al2023, $(PACKER_TEMPLATE_FILE))) + AMI_VARIANT := $(AMI_VARIANT)-al2023 +endif arch ?= x86_64 ifeq ($(arch), arm64) -instance_type ?= a1.large + instance_type ?= m6g.large + AMI_VARIANT := $(AMI_VARIANT)-arm64 else -instance_type ?= m4.large + instance_type ?= m5.large +endif +ifeq ($(enable_fips), true) + AMI_VARIANT := $(AMI_VARIANT)-fips +endif +ami_name ?= $(AMI_VARIANT)-node-$(K8S_VERSION_MINOR)-$(AMI_VERSION) + +ifeq ($(aws_region), cn-northwest-1) + source_ami_owners ?= 141808717104 +endif + +ifeq ($(aws_region), us-gov-west-1) + source_ami_owners ?= 045324592363 endif T_RED := \e[0;31m @@ -18,30 +56,100 @@ T_GREEN := \e[0;32m T_YELLOW := \e[0;33m T_RESET := \e[0m -.PHONY: all -all: 1.10 1.11 1.12 1.13 +# default to the latest supported Kubernetes version +k8s=1.28 + +.PHONY: build +build: ## Build EKS Optimized AL2 AMI + $(MAKE) k8s $(shell hack/latest-binaries.sh $(k8s)) + +# ensure that these flags are equivalent to the rules in the .editorconfig +SHFMT_FLAGS := --list \ +--language-dialect auto \ +--indent 2 \ +--binary-next-line \ +--case-indent \ +--space-redirects + +SHFMT_COMMAND := $(shell which shfmt) +ifeq (, $(SHFMT_COMMAND)) + SHFMT_COMMAND = docker run --rm -v $(MAKEFILE_DIR):$(MAKEFILE_DIR) mvdan/shfmt +endif + +.PHONY: fmt +fmt: ## Format the source files + $(SHFMT_COMMAND) $(SHFMT_FLAGS) --write $(MAKEFILE_DIR) + +SHELLCHECK_COMMAND := $(shell which shellcheck) +ifeq (, $(SHELLCHECK_COMMAND)) + SHELLCHECK_COMMAND = docker run --rm -v $(MAKEFILE_DIR):$(MAKEFILE_DIR) koalaman/shellcheck:stable +endif +SHELL_FILES := $(shell find $(MAKEFILE_DIR) -type f -name '*.sh') + +.PHONY: transform-al2-to-al2023 +transform-al2-to-al2023: + PACKER_TEMPLATE_FILE=$(PACKER_TEMPLATE_FILE) \ + PACKER_DEFAULT_VARIABLE_FILE=$(PACKER_DEFAULT_VARIABLE_FILE) \ + hack/transform-al2-to-al2023.sh + +.PHONY: lint +lint: lint-docs ## Check the source files for syntax and format issues + $(SHFMT_COMMAND) $(SHFMT_FLAGS) --diff $(MAKEFILE_DIR) + $(SHELLCHECK_COMMAND) --format gcc --severity error $(SHELL_FILES) + hack/lint-space-errors.sh + +.PHONY: test +test: ## run the test-harness + test/test-harness.sh + +# include only variables which have a defined value +PACKER_VARIABLES := $(foreach packerVar,$(AVAILABLE_PACKER_VARIABLES),$(if $($(packerVar)),$(packerVar))) +PACKER_VAR_FLAGS := -var-file $(PACKER_DEFAULT_VARIABLE_FILE) \ +$(if $(PACKER_VARIABLE_FILE),-var-file=$(PACKER_VARIABLE_FILE),) \ +$(foreach packerVar,$(PACKER_VARIABLES),-var $(packerVar)='$($(packerVar))') .PHONY: validate -validate: - $(PACKER_BINARY) validate $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)=$($(packerVar)),)) eks-worker-al2.json +validate: ## Validate packer config + $(PACKER_BINARY) validate $(PACKER_VAR_FLAGS) $(PACKER_TEMPLATE_FILE) .PHONY: k8s -k8s: validate +k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI @echo "$(T_GREEN)Building AMI for version $(T_YELLOW)$(kubernetes_version)$(T_GREEN) on $(T_YELLOW)$(arch)$(T_RESET)" - $(PACKER_BINARY) build $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)=$($(packerVar)),)) eks-worker-al2.json + $(PACKER_BINARY) build -timestamp-ui -color=false $(PACKER_VAR_FLAGS) $(PACKER_TEMPLATE_FILE) + +.PHONY: 1.23 +1.23: ## Build EKS Optimized AL2 AMI - K8s 1.23 + $(MAKE) k8s $(shell hack/latest-binaries.sh 1.23) + +.PHONY: 1.24 +1.24: ## Build EKS Optimized AL2 AMI - K8s 1.24 + $(MAKE) k8s $(shell hack/latest-binaries.sh 1.24) + +.PHONY: 1.25 +1.25: ## Build EKS Optimized AL2 AMI - K8s 1.25 + $(MAKE) k8s $(shell hack/latest-binaries.sh 1.25) + +.PHONY: 1.26 +1.26: ## Build EKS Optimized AL2 AMI - K8s 1.26 + $(MAKE) k8s $(shell hack/latest-binaries.sh 1.26) + +.PHONY: 1.27 +1.27: ## Build EKS Optimized AL2 AMI - K8s 1.27 + $(MAKE) k8s $(shell hack/latest-binaries.sh 1.27) -.PHONY: 1.10 -1.10: - $(MAKE) k8s kubernetes_version=1.10.13 kubernetes_build_date=2019-03-27 +.PHONY: 1.28 +1.28: ## Build EKS Optimized AL2 AMI - K8s 1.28 + $(MAKE) k8s $(shell hack/latest-binaries.sh 1.28) -.PHONY: 1.11 -1.11: - $(MAKE) k8s kubernetes_version=1.11.9 kubernetes_build_date=2019-03-27 +.PHONY: lint-docs +lint-docs: ## Lint the docs + hack/lint-docs.sh -.PHONY: 1.12 -1.12: - $(MAKE) k8s kubernetes_version=1.12.7 kubernetes_build_date=2019-03-27 +.PHONY: clean +clean: + rm *-manifest.json + rm *-version-info.json -.PHONY: 1.13 -1.13: - $(MAKE) k8s kubernetes_version=1.13.7 kubernetes_build_date=2019-06-11 \ No newline at end of file +.PHONY: help +help: ## Display help + @awk 'BEGIN {FS = ":.*##"; printf "Usage:\n make \033[36m\033[0m\n"} /^[\.a-zA-Z_0-9\-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) diff --git a/README.md b/README.md index a29a40fa9..403a0d1be 100644 --- a/README.md +++ b/README.md @@ -1,81 +1,63 @@ # Amazon EKS AMI Build Specification +## This branch will be deleted on **March 30, 2024**! + +The default branch of this repository has changed to `main`. + +This change coincides with a reorganization of the project sources. You may continue using the `master` branch as you update your downstream dependencies, but you'll need to explicitly check out the `master` branch after February 29, 2024. + +--- + This repository contains resources and configuration scripts for building a custom Amazon EKS AMI with [HashiCorp Packer](https://www.packer.io/). This is the same configuration that Amazon EKS uses to create the official Amazon EKS-optimized AMI. -## Setup +## 🚀 Getting started + +If you are new to Amazon EKS, we recommend that you follow +our [Getting Started](https://docs.aws.amazon.com/eks/latest/userguide/getting-started.html) +chapter in the Amazon EKS User Guide. If you already have a cluster, and you +want to launch a node group with your new AMI, see [Launching Amazon EKS Worker +Nodes](https://docs.aws.amazon.com/eks/latest/userguide/launch-workers.html). + +## 🔢 Pre-requisites -You must have [Packer](https://www.packer.io/) installed on your local system. +You must have [Packer](https://www.packer.io/) version 1.8.0 or later installed on your local system. For more information, see [Installing Packer](https://www.packer.io/docs/install/index.html) in the Packer documentation. You must also have AWS account credentials configured so that Packer can make calls to AWS API operations on your behalf. For more information, see [Authentication](https://www.packer.io/docs/builders/amazon.html#specifying-amazon-credentials) in the Packer documentation. -**Note** -The default instance type to build this AMI is an `m4.large` and does not -qualify for the AWS free tier. You are charged for any instances created -when building this AMI. +## 👷 Building the AMI -## Building the AMI - -A Makefile is provided to build the AMI, but it is just a small wrapper around +A Makefile is provided to build the Amazon EKS Worker AMI, but it is just a small wrapper around invoking Packer directly. You can initiate the build process by running the following command in the root of this repository: ```bash +# build an AMI with the latest Kubernetes version make -``` - -The Makefile runs Packer with the `eks-worker-al2.json` build specification -template and the [amazon-ebs](https://www.packer.io/docs/builders/amazon-ebs.html) -builder. An instance is launched and the Packer [Shell -Provisioner](https://www.packer.io/docs/provisioners/shell.html) runs the -`install-worker.sh` script on the instance to install software and perform other -necessary configuration tasks. Then, Packer creates an AMI from the instance -and terminates the instance after the AMI is created. -## Using the AMI - -If you are just getting started with Amazon EKS, we recommend that you follow -our [Getting Started](https://docs.aws.amazon.com/eks/latest/userguide/getting-started.html) -chapter in the Amazon EKS User Guide. If you already have a cluster, and you -want to launch a node group with your new AMI, see [Launching Amazon EKS Worker -Nodes](https://docs.aws.amazon.com/eks/latest/userguide/launch-workers.html) -in the Amazon EKS User Guide. - -The [`amazon-eks-nodegroup.yaml`](amazon-eks-nodegroup.yaml) AWS CloudFormation -template in this repository is provided to launch a node group with the new AMI -ID that is returned when Packer finishes building. Note that there is important -Amazon EC2 user data in this CloudFormation template that bootstraps the worker -nodes when they are launched so that they can register with your Amazon EKS -cluster. Your nodes cannot register properly without this user data. +# build an AMI with a specific Kubernetes version +make k8s=1.29 +``` -### Compatibility with CloudFormation Template +The Makefile chooses a particular kubelet binary to use per Kubernetes version which you can [view here](Makefile). -The CloudFormation template for EKS Nodes is published in the S3 bucket -`amazon-eks` under the path `cloudformation`. You can see a list of previous -versions by running `aws s3 ls s3://amazon-eks/cloudformation/`. +> **Note** +> The default instance type to build this AMI does not qualify for the AWS free tier. +> You are charged for any instances created when building this AMI. -| CloudFormation Version | EKS AMI versions | [amazon-vpc-cni-k8s](https://github.com/aws/amazon-vpc-cni-k8s/releases) | -| ---------------------- | ------------------------------------------ | -------------------- | -| 2019-02-11 | amazon-eks-node-(1.12,1.11,1.10)-v20190327 | v1.3.2 (for p3dn.24xlarge instances) | -| 2019-02-11 | amazon-eks-node-(1.11,1.10)-v20190220 | v1.3.2 (for p3dn.24xlarge instances) | -| 2019-02-11 | amazon-eks-node-(1.11,1.10)-v20190211 | v1.3.2 (for p3dn.24xlarge instances) | -| 2018-12-10 | amazon-eks-node-(1.11,1.10)-v20181210 | v1.2.1 | -| 2018-11-07 | amazon-eks-node-v25+ | v1.2.1 (for t3 and r5 instances) | -| 2018-08-30 | amazon-eks-node-v23+ | v1.1.0 | -| 2018-08-21 | amazon-eks-node-v23+ | v1.1.0 | +## 👩‍💻 Using the AMI -For older versions of the EKS AMI (v20-v22), you can find the CloudFormation -templates in the same bucket under the path `s3://amazon-eks/1.10.3/2018-06-05/`. +The [AMI user guide](https://awslabs.github.io/amazon-eks-ami/USER_GUIDE/) has details about the AMI's internals, and the [EKS user guide](https://docs.aws.amazon.com/eks/latest/userguide/launch-templates.html#launch-template-custom-ami) explains how to use a custom AMI in a managed node group. -## Security +## 🔒 Security For security issues or concerns, please do not open an issue or pull request on GitHub. Please report any suspected or confirmed security issues to AWS Security https://aws.amazon.com/security/vulnerability-reporting/ -## License Summary +## ⚖️ License Summary This sample code is made available under a modified MIT license. See the LICENSE file. diff --git a/amazon-eks-nodegroup.yaml b/amazon-eks-nodegroup.yaml deleted file mode 100644 index 140869504..000000000 --- a/amazon-eks-nodegroup.yaml +++ /dev/null @@ -1,456 +0,0 @@ ---- -AWSTemplateFormatVersion: 2010-09-09 -Description: Amazon EKS - Node Group - -Parameters: - - KeyName: - Description: The EC2 Key Pair to allow SSH access to the instances - Type: AWS::EC2::KeyPair::KeyName - - NodeImageId: - Description: AMI id for the node instances. - Type: AWS::EC2::Image::Id - - NodeInstanceType: - Description: EC2 instance type for the node instances - Type: String - Default: t3.medium - ConstraintDescription: Must be a valid EC2 instance type - AllowedValues: - - a1.medium - - a1.large - - a1.xlarge - - a1.2xlarge - - a1.4xlarge - - c1.medium - - c1.xlarge - - c3.large - - c3.xlarge - - c3.2xlarge - - c3.4xlarge - - c3.8xlarge - - c4.large - - c4.xlarge - - c4.2xlarge - - c4.4xlarge - - c4.8xlarge - - c5.large - - c5.xlarge - - c5.2xlarge - - c5.4xlarge - - c5.9xlarge - - c5.12xlarge - - c5.18xlarge - - c5.24xlarge - - c5d.large - - c5d.xlarge - - c5d.2xlarge - - c5d.4xlarge - - c5d.9xlarge - - c5d.18xlarge - - c5n.large - - c5n.xlarge - - c5n.2xlarge - - c5n.4xlarge - - c5n.9xlarge - - c5n.18xlarge - - cc2.8xlarge - - cr1.8xlarge - - d2.xlarge - - d2.2xlarge - - d2.4xlarge - - d2.8xlarge - - f1.2xlarge - - f1.4xlarge - - f1.16xlarge - - g2.2xlarge - - g2.8xlarge - - g3s.xlarge - - g3.4xlarge - - g3.8xlarge - - g3.16xlarge - - h1.2xlarge - - h1.4xlarge - - h1.8xlarge - - h1.16xlarge - - hs1.8xlarge - - i2.xlarge - - i2.2xlarge - - i2.4xlarge - - i2.8xlarge - - i3.large - - i3.xlarge - - i3.2xlarge - - i3.4xlarge - - i3.8xlarge - - i3.16xlarge - - i3.metal - - i3en.large - - i3en.xlarge - - i3en.2xlarge - - i3en.3xlarge - - i3en.6xlarge - - i3en.12xlarge - - i3en.24xlarge - - m1.small - - m1.medium - - m1.large - - m1.xlarge - - m2.xlarge - - m2.2xlarge - - m2.4xlarge - - m3.medium - - m3.large - - m3.xlarge - - m3.2xlarge - - m4.large - - m4.xlarge - - m4.2xlarge - - m4.4xlarge - - m4.10xlarge - - m4.16xlarge - - m5.large - - m5.xlarge - - m5.2xlarge - - m5.4xlarge - - m5.8xlarge - - m5.12xlarge - - m5.16xlarge - - m5.24xlarge - - m5a.large - - m5a.xlarge - - m5a.2xlarge - - m5a.4xlarge - - m5a.8xlarge - - m5a.12xlarge - - m5a.16xlarge - - m5a.24xlarge - - m5ad.large - - m5ad.xlarge - - m5ad.2xlarge - - m5ad.4xlarge - - m5ad.12xlarge - - m5ad.24xlarge - - m5d.large - - m5d.xlarge - - m5d.2xlarge - - m5d.4xlarge - - m5d.8xlarge - - m5d.12xlarge - - m5d.16xlarge - - m5d.24xlarge - - p2.xlarge - - p2.8xlarge - - p2.16xlarge - - p3.2xlarge - - p3.8xlarge - - p3.16xlarge - - p3dn.24xlarge - - r3.large - - r3.xlarge - - r3.2xlarge - - r3.4xlarge - - r3.8xlarge - - r4.large - - r4.xlarge - - r4.2xlarge - - r4.4xlarge - - r4.8xlarge - - r4.16xlarge - - r5.large - - r5.xlarge - - r5.2xlarge - - r5.4xlarge - - r5.8xlarge - - r5.12xlarge - - r5.16xlarge - - r5.24xlarge - - r5a.large - - r5a.xlarge - - r5a.2xlarge - - r5a.4xlarge - - r5a.8xlarge - - r5a.12xlarge - - r5a.16xlarge - - r5a.24xlarge - - r5ad.large - - r5ad.xlarge - - r5ad.2xlarge - - r5ad.4xlarge - - r5ad.12xlarge - - r5ad.24xlarge - - r5d.large - - r5d.xlarge - - r5d.2xlarge - - r5d.4xlarge - - r5d.8xlarge - - r5d.12xlarge - - r5d.16xlarge - - r5d.24xlarge - - t1.micro - - t2.nano - - t2.micro - - t2.small - - t2.medium - - t2.large - - t2.xlarge - - t2.2xlarge - - t3.nano - - t3.micro - - t3.small - - t3.medium - - t3.large - - t3.xlarge - - t3.2xlarge - - t3a.nano - - t3a.micro - - t3a.small - - t3a.medium - - t3a.large - - t3a.xlarge - - t3a.2xlarge - - x1.16xlarge - - x1.32xlarge - - x1e.xlarge - - x1e.2xlarge - - x1e.4xlarge - - x1e.8xlarge - - x1e.16xlarge - - x1e.32xlarge - - z1d.large - - z1d.xlarge - - z1d.2xlarge - - z1d.3xlarge - - z1d.6xlarge - - z1d.12xlarge - - NodeAutoScalingGroupMinSize: - Description: Minimum size of Node Group ASG. - Type: Number - Default: 1 - - NodeAutoScalingGroupMaxSize: - Description: Maximum size of Node Group ASG. Set to at least 1 greater than NodeAutoScalingGroupDesiredCapacity. - Type: Number - Default: 4 - - NodeAutoScalingGroupDesiredCapacity: - Description: Desired capacity of Node Group ASG. - Type: Number - Default: 3 - - NodeVolumeSize: - Description: Node volume size - Type: Number - Default: 20 - - ClusterName: - Description: The cluster name provided when the cluster was created. If it is incorrect, nodes will not be able to join the cluster. - Type: String - - BootstrapArguments: - Description: Arguments to pass to the bootstrap script. See files/bootstrap.sh in https://github.com/awslabs/amazon-eks-ami - Type: String - Default: "" - - NodeGroupName: - Description: Unique identifier for the Node Group. - Type: String - - ClusterControlPlaneSecurityGroup: - Description: The security group of the cluster control plane. - Type: AWS::EC2::SecurityGroup::Id - - VpcId: - Description: The VPC of the worker instances - Type: AWS::EC2::VPC::Id - - Subnets: - Description: The subnets where workers can be created. - Type: List - -Metadata: - - AWS::CloudFormation::Interface: - ParameterGroups: - - Label: - default: EKS Cluster - Parameters: - - ClusterName - - ClusterControlPlaneSecurityGroup - - Label: - default: Worker Node Configuration - Parameters: - - NodeGroupName - - NodeAutoScalingGroupMinSize - - NodeAutoScalingGroupDesiredCapacity - - NodeAutoScalingGroupMaxSize - - NodeInstanceType - - NodeImageId - - NodeVolumeSize - - KeyName - - BootstrapArguments - - Label: - default: Worker Network Configuration - Parameters: - - VpcId - - Subnets - -Resources: - - NodeInstanceProfile: - Type: AWS::IAM::InstanceProfile - Properties: - Path: "/" - Roles: - - !Ref NodeInstanceRole - - NodeInstanceRole: - Type: AWS::IAM::Role - Properties: - AssumeRolePolicyDocument: - Version: 2012-10-17 - Statement: - - Effect: Allow - Principal: - Service: ec2.amazonaws.com - Action: sts:AssumeRole - Path: "/" - ManagedPolicyArns: - - arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy - - arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy - - arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly - - NodeSecurityGroup: - Type: AWS::EC2::SecurityGroup - Properties: - GroupDescription: Security group for all nodes in the cluster - VpcId: !Ref VpcId - Tags: - - Key: !Sub kubernetes.io/cluster/${ClusterName} - Value: owned - - NodeSecurityGroupIngress: - Type: AWS::EC2::SecurityGroupIngress - DependsOn: NodeSecurityGroup - Properties: - Description: Allow node to communicate with each other - GroupId: !Ref NodeSecurityGroup - SourceSecurityGroupId: !Ref NodeSecurityGroup - IpProtocol: -1 - FromPort: 0 - ToPort: 65535 - - NodeSecurityGroupFromControlPlaneIngress: - Type: AWS::EC2::SecurityGroupIngress - DependsOn: NodeSecurityGroup - Properties: - Description: Allow worker Kubelets and pods to receive communication from the cluster control plane - GroupId: !Ref NodeSecurityGroup - SourceSecurityGroupId: !Ref ClusterControlPlaneSecurityGroup - IpProtocol: tcp - FromPort: 1025 - ToPort: 65535 - - ControlPlaneEgressToNodeSecurityGroup: - Type: AWS::EC2::SecurityGroupEgress - DependsOn: NodeSecurityGroup - Properties: - Description: Allow the cluster control plane to communicate with worker Kubelet and pods - GroupId: !Ref ClusterControlPlaneSecurityGroup - DestinationSecurityGroupId: !Ref NodeSecurityGroup - IpProtocol: tcp - FromPort: 1025 - ToPort: 65535 - - NodeSecurityGroupFromControlPlaneOn443Ingress: - Type: AWS::EC2::SecurityGroupIngress - DependsOn: NodeSecurityGroup - Properties: - Description: Allow pods running extension API servers on port 443 to receive communication from cluster control plane - GroupId: !Ref NodeSecurityGroup - SourceSecurityGroupId: !Ref ClusterControlPlaneSecurityGroup - IpProtocol: tcp - FromPort: 443 - ToPort: 443 - - ControlPlaneEgressToNodeSecurityGroupOn443: - Type: AWS::EC2::SecurityGroupEgress - DependsOn: NodeSecurityGroup - Properties: - Description: Allow the cluster control plane to communicate with pods running extension API servers on port 443 - GroupId: !Ref ClusterControlPlaneSecurityGroup - DestinationSecurityGroupId: !Ref NodeSecurityGroup - IpProtocol: tcp - FromPort: 443 - ToPort: 443 - - ClusterControlPlaneSecurityGroupIngress: - Type: AWS::EC2::SecurityGroupIngress - DependsOn: NodeSecurityGroup - Properties: - Description: Allow pods to communicate with the cluster API Server - GroupId: !Ref ClusterControlPlaneSecurityGroup - SourceSecurityGroupId: !Ref NodeSecurityGroup - IpProtocol: tcp - ToPort: 443 - FromPort: 443 - - NodeGroup: - Type: AWS::AutoScaling::AutoScalingGroup - Properties: - DesiredCapacity: !Ref NodeAutoScalingGroupDesiredCapacity - LaunchConfigurationName: !Ref NodeLaunchConfig - MinSize: !Ref NodeAutoScalingGroupMinSize - MaxSize: !Ref NodeAutoScalingGroupMaxSize - VPCZoneIdentifier: !Ref Subnets - Tags: - - Key: Name - Value: !Sub ${ClusterName}-${NodeGroupName}-Node - PropagateAtLaunch: true - - Key: !Sub kubernetes.io/cluster/${ClusterName} - Value: owned - PropagateAtLaunch: true - UpdatePolicy: - AutoScalingRollingUpdate: - MaxBatchSize: 1 - MinInstancesInService: !Ref NodeAutoScalingGroupDesiredCapacity - PauseTime: PT5M - - NodeLaunchConfig: - Type: AWS::AutoScaling::LaunchConfiguration - Properties: - AssociatePublicIpAddress: true - IamInstanceProfile: !Ref NodeInstanceProfile - ImageId: !Ref NodeImageId - InstanceType: !Ref NodeInstanceType - KeyName: !Ref KeyName - SecurityGroups: - - !Ref NodeSecurityGroup - BlockDeviceMappings: - - DeviceName: /dev/xvda - Ebs: - VolumeSize: !Ref NodeVolumeSize - VolumeType: gp2 - DeleteOnTermination: true - UserData: - Fn::Base64: - !Sub | - #!/bin/bash - set -o xtrace - /etc/eks/bootstrap.sh ${ClusterName} ${BootstrapArguments} - /opt/aws/bin/cfn-signal --exit-code $? \ - --stack ${AWS::StackName} \ - --resource NodeGroup \ - --region ${AWS::Region} - -Outputs: - - NodeInstanceRole: - Description: The node instance role - Value: !GetAtt NodeInstanceRole.Arn - - NodeSecurityGroup: - Description: The security group for the node group - Value: !Ref NodeSecurityGroup diff --git a/build-tools/bin/archivebuild-wrapper b/build-tools/bin/archivebuild-wrapper new file mode 100755 index 000000000..ba86f6f0f --- /dev/null +++ b/build-tools/bin/archivebuild-wrapper @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +# This file is for Amazon internal build processes + +HEAD_COMMIT="${BRAZIL_PACKAGE_CHANGE_ID:-$(git rev-parse HEAD)}" + +if [ "${HEAD_COMMIT}" = "" ]; then + echo >&2 "could not determine HEAD commit" + exit 1 +fi + +echo "${HEAD_COMMIT}" > .git-commit +archivebuild diff --git a/doc/CHANGELOG.md b/doc/CHANGELOG.md new file mode 120000 index 000000000..04c99a55c --- /dev/null +++ b/doc/CHANGELOG.md @@ -0,0 +1 @@ +../CHANGELOG.md \ No newline at end of file diff --git a/CODE_OF_CONDUCT.md b/doc/CODE_OF_CONDUCT.md similarity index 72% rename from CODE_OF_CONDUCT.md rename to doc/CODE_OF_CONDUCT.md index 3b6446687..5b627cfa6 100644 --- a/CODE_OF_CONDUCT.md +++ b/doc/CODE_OF_CONDUCT.md @@ -1,4 +1,4 @@ ## Code of Conduct -This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). -For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact +This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). +For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact opensource-codeofconduct@amazon.com with any additional questions or comments. diff --git a/doc/CONTRIBUTING.md b/doc/CONTRIBUTING.md new file mode 100644 index 000000000..b7cdc25ab --- /dev/null +++ b/doc/CONTRIBUTING.md @@ -0,0 +1,151 @@ +# Contributing Guidelines + +Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional +documentation, we greatly value feedback and contributions from our community. + +Please read through this document before submitting any issues or pull requests to ensure we have all the necessary +information to effectively respond to your bug report or contribution. + + +## Reporting Bugs/Feature Requests + +We welcome you to use the GitHub issue tracker to report bugs or suggest features. + +When filing an issue, please check [existing open](https://github.com/aws-samples/amazon-eks-ami/issues), or [recently closed](https://github.com/aws-samples/amazon-eks-ami/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already +reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: + +* A reproducible test case or series of steps +* The version of our code being used +* Any modifications you've made relevant to the bug +* Anything unusual about your environment or deployment + + +## Contributing via Pull Requests +Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: + +1. You are working against the latest source on the *master* branch. +2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. +3. You open an issue to discuss any significant work - we would hate for your time to be wasted. + +To send us a pull request, please: + +1. Fork the repository. +2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. +3. Ensure your changes match our style guide (`make fmt`). +4. Ensure local tests pass (`make test`). +5. Commit to your fork using clear commit messages. +6. Send us a pull request, answering any default questions in the pull request interface. +7. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. + +GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and +[creating a pull request](https://help.github.com/articles/creating-a-pull-request/). + +### Testing Changes + +When submitting PRs, we want to verify that there are no regressions in the AMI with the new changes. EKS runs various tests before publishing new Amazon EKS optimized Amazon Linux AMIs, which will ensure the highest level of confidence that there are no regressions in officially published AMIs. To maintain the health of this repo, we need to do some basic validation prior to merging PRs. Eventually, we hope to automate this process. Until then, here are the basic steps that we should take before merging PRs. + +**Test #1: Verify that the unit tests pass** + +Please add a test case for your changes, if possible. See the [unit test README](https://github.com/awslabs/amazon-eks-ami/tree/master/test#readme) for more information. These tests will be run automatically for every pull request. + +``` +make test +``` + +**Test #2: Verify that building AMIs still works** + +If your change is relevant to a specific Kubernetes version, build all AMIs that apply. Otherwise, just choose the latest available Kubernetes version. + +``` +# Configure AWS credentials +make 1.22 +``` + +**Test #3: Create a nodegroup with new AMI and confirm it joins a cluster** + +Once the AMI is built, we need to verify that it can join a cluster. You can use `eksctl`, or your method of choice, to create a cluster and add nodes to it using the AMI you built. Below is an example config file. + +`cluster.yaml` + +``` +apiVersion: eksctl.io/v1alpha5 +kind: ClusterConfig + +metadata: + name: basic-cluster + region: us-west-2 + version: '1.22' + +nodeGroups: + - name: ng + instanceType: m5.large + ami: [INSERT_AMI_ID] + overrideBootstrapCommand: | + #!/bin/bash + /etc/eks/bootstrap.sh basic-cluster +``` + +Then run: + +``` +eksctl create cluster -f cluster.yaml +``` + +`eksctl` will verify that the nodes join the cluster before completing. + +**Test #4: Verify that the nodes are Kubernetes conformant** + +You can use [sonobuoy](https://sonobuoy.io/) to run conformance tests on the cluster you've create in *Test #2*. You should only include nodes with the custom AMI built in *Test #1*. You must install `sonobuoy` locally before running. + +``` +sonobuoy run --wait +``` + +By default, `sonobuoy` will run `e2e` and `systemd-logs`. This step may take multiple hours to run. + +**Test #5: [Optional] Test your specific PR changes** + +If your PR has changes that require additional, custom validation, provide the appropriate steps to verify that the changes don't cause regressions and behave as expected. Document the steps taken in the CR. + +**Clean Up** + +Delete the cluster: + +``` +eksctl delete cluster -f cluster.yaml +``` + +## Troubleshooting + +**Tests fail with `realpath: command not found`** + +When running `make test`, you may see a message like below: + +``` +test/test-harness.sh: line 41: realpath: command not found +/entrypoint.sh: line 13: /test.sh: No such file or directory +``` + +The issue is discussed in [this StackExchange post](https://unix.stackexchange.com/questions/101080/realpath-command-not-found). + +On OSX, running `brew install coreutils` resolves the issue. + +## Finding contributions to work on +Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels ((enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/aws-samples/amazon-eks-ami/labels/help%20wanted) issues is a great place to start. + + +## Code of Conduct +This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). +For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact +opensource-codeofconduct@amazon.com with any additional questions or comments. + + +## Security issue notifications +If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. + + +## Licensing + +See the [LICENSE](https://github.com/aws-samples/amazon-eks-ami/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. + +We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes. diff --git a/doc/README.md b/doc/README.md new file mode 120000 index 000000000..32d46ee88 --- /dev/null +++ b/doc/README.md @@ -0,0 +1 @@ +../README.md \ No newline at end of file diff --git a/doc/USER_GUIDE.md b/doc/USER_GUIDE.md new file mode 100644 index 000000000..a546ab034 --- /dev/null +++ b/doc/USER_GUIDE.md @@ -0,0 +1,390 @@ +# User Guide + +This document includes details about using the AMI template and the resulting AMIs. + +--- + +## AMI template variables + +Default values for most variables are defined in [a default variable file](https://github.com/awslabs/amazon-eks-ami/blob/master/eks-worker-al2-variables.json). + +Users have the following options for specifying their own values: + +1. Provide a variable file with the `PACKER_VARIABLE_FILE` argument to `make`. Values in this file will override values in the default variable file. Your variable file does not need to include all possible variables, as it will be merged with the default variable file. +2. Pass a key-value pair for any template variable to `make`. These values will override any values that were specified with the first method. In the table below, these variables have a default value of *None*. + +> **Note** +> Some variables (such as `arch` and `kubernetes_version`) do not have a sensible, static default, and are satisfied by the Makefile. +> Such variables do not appear in the default variable file, and must be overridden (if necessary) by the second method described above. + + + +| Variable | Default value | Description | +| - | - | - | +| `additional_yum_repos` | `""` | | +| `ami_component_description` | ```(k8s: {{ user `kubernetes_version` }}, docker: {{ user `docker_version` }}, containerd: {{ user `containerd_version` }})``` | | +| `ami_description` | ```EKS Kubernetes Worker AMI with AmazonLinux2 image``` | | +| `ami_name` | *None* | | +| `ami_regions` | `""` | | +| `ami_users` | `""` | | +| `arch` | *None* | | +| `associate_public_ip_address` | `""` | | +| `aws_access_key_id` | ```{{env `AWS_ACCESS_KEY_ID`}}``` | | +| `aws_region` | ```us-west-2``` | | +| `aws_secret_access_key` | ```{{env `AWS_SECRET_ACCESS_KEY`}}``` | | +| `aws_session_token` | ```{{env `AWS_SESSION_TOKEN`}}``` | | +| `binary_bucket_name` | ```amazon-eks``` | | +| `binary_bucket_region` | ```us-west-2``` | | +| `cache_container_images` | ```false``` | | +| `cni_plugin_version` | ```v1.2.0``` | | +| `containerd_version` | ```1.7.*``` | | +| `creator` | ```{{env `USER`}}``` | | +| `docker_version` | ```20.10.*``` | | +| `encrypted` | ```false``` | | +| `enable_fips` | ```false``` | Install openssl and enable fips related kernel parameters | +| `instance_type` | *None* | | +| `kernel_version` | `""` | | +| `kms_key_id` | `""` | | +| `kubernetes_build_date` | *None* | | +| `kubernetes_version` | *None* | | +| `launch_block_device_mappings_volume_size` | ```4``` | | +| `pause_container_version` | ```3.5``` | | +| `pull_cni_from_github` | ```true``` | | +| `remote_folder` | ```/tmp``` | Directory path for shell provisioner scripts on the builder instance | +| `runc_version` | ```1.1.*``` | | +| `security_group_id` | `""` | | +| `source_ami_filter_name` | ```amzn2-ami-minimal-hvm-*``` | | +| `source_ami_id` | `""` | | +| `source_ami_owners` | ```137112412989``` | | +| `ssh_interface` | `""` | | +| `ssh_username` | ```ec2-user``` | | +| `ssm_agent_version` | `""` | Version of the SSM agent to install from the S3 bucket provided by the SSM agent project, such as ```latest```. If empty, the latest version of the SSM agent available in the Amazon Linux core repositories will be installed. | +| `subnet_id` | `""` | | +| `temporary_security_group_source_cidrs` | `""` | | +| `volume_type` | ```gp2``` | | +| `working_dir` | ```{{user `remote_folder`}}/worker``` | Directory path for ephemeral resources on the builder instance | + + +--- + +## Choosing Kubernetes binaries + +When building the AMI, binaries such as `kubelet`, `aws-iam-authenticator`, and `ecr-credential-provider` are installed. + +### Using the latest binaries + +It is recommended that the latest available binaries are used, as they may contain important fixes for bugs or security issues. +The latest binaries can be discovered with the following script: +```bash +hack/latest-binaries.sh $KUBERNETES_MINOR_VERSION +``` +This script will return the values for the binary-related AMI template variables, for example: +```bash +> hack/latest-binaries.sh 1.28 + +kubernetes_version=1.28.1 kubernetes_build_date=2023-10-01 +``` + +### Using a specific version of the binaries + +Use the following commands to obtain values for the binary-related AMI template variables: +```bash +# List Kubernetes versions +aws s3 ls s3://amazon-eks + +# List build dates +aws s3 ls s3://amazon-eks/1.23.9/ + +# List platforms +aws s3 ls s3://amazon-eks/1.23.9/2022-07-27/bin/ + +# List architectures +aws s3 ls s3://amazon-eks/1.23.9/2022-07-27/bin/linux/ + +# List binaries +aws s3 ls s3://amazon-eks/1.23.9/2022-07-27/bin/linux/x86_64/ +``` + +To build using the example binaries above: +```bash +make k8s \ + kubernetes_version=1.23.9 \ + kubernetes_build_date=2022-07-27 \ + arch=x86_64 +``` + +### Providing your own binaries + +By default, binaries are downloaded from the public S3 bucket `amazon-eks` in `us-west-2`. +You can instead provide your own version of Kubernetes binaries. + +To use your own binaries: + +1. Copy all of the necessary binaries to your own S3 bucket using the AWS CLI. For example: +```bash + aws s3 cp kubelet s3://$BUCKET/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/linux/$ARCH/kubelet +``` + +**Important**: You must provide all the binaries present in the default `amazon-eks` bucket for a specific `KUBERNETES_VERSION`, `KUBERNETES_BUILD_DATE`, and `ARCH` combination. +These binaries must be accessible using the credentials on the Packer builder EC2 instance. + +2. Run the following command to start the build process to use your own Kubernetes binaries: +```bash +make k8s \ + binary_bucket_name=my-custom-bucket \ + binary_bucket_region=eu-west-1 \ + kubernetes_version=1.14.9 \ + kubernetes_build_date=2020-01-22 +``` +**Note**: Confirm that the binary_bucket_name, binary_bucket_region, kubernetes_version, and kubernetes_build_date parameters match the path to your binaries in Amazon S3. + +--- + +## Container Image Caching + +Optionally, some container images can be cached during the AMI build process in order to reduce the latency of the node getting to a `Ready` state when launched. + +To turn on container image caching: + +``` +cache_container_images=true make 1.23 +``` + +When container image caching is enabled, the following images are cached: + - 602401143452.dkr.ecr..amazonaws.com/eks/kube-proxy:-eksbuild. + - 602401143452.dkr.ecr..amazonaws.com/eks/kube-proxy:-minimal-eksbuild. + - 602401143452.dkr.ecr..amazonaws.com/eks/pause:3.5 + - 602401143452.dkr.ecr..amazonaws.com/amazon-k8s-cni-init: + - 602401143452.dkr.ecr..amazonaws.com/amazon-k8s-cni: + +The account ID can be different depending on the region and partition you are building the AMI in. See [here](https://docs.aws.amazon.com/eks/latest/userguide/add-ons-images.html) for more details. + +Since the VPC CNI is not versioned with K8s itself, the latest version of the VPC CNI and the default version, based on the response from the EKS DescribeAddonVersions at the time of the AMI build, will be cached. + +The images listed above are also tagged with each region in the partition the AMI is built in, since images are often built in one region and copied to others within the same partition. Images that are available to pull from an ECR FIPS endpoint are also tagged as such (i.e. `602401143452.dkr.ecr-fips.us-east-1.amazonaws.com/eks/pause:3.5`). + +When listing images on a node, you'll notice a long list of images. However, most of these images are simply tagged in different ways with no storage overhead. Images cached in the AMI total around 1.0 GiB. In general, a node with no images cached using the VPC CNI will use around 500 MiB of images when in a `Ready` state with no other pods running on the node. + +--- + +## IAM Permissions + +To build the EKS Optimized AMI, you will need the following permissions: + +``` +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "ec2:AttachVolume", + "ec2:AuthorizeSecurityGroupIngress", + "ec2:CopyImage", + "ec2:CreateImage", + "ec2:CreateKeypair", + "ec2:CreateSecurityGroup", + "ec2:CreateSnapshot", + "ec2:CreateTags", + "ec2:CreateVolume", + "ec2:DeleteKeyPair", + "ec2:DeleteSecurityGroup", + "ec2:DeleteSnapshot", + "ec2:DeleteVolume", + "ec2:DeregisterImage", + "ec2:DescribeImageAttribute", + "ec2:DescribeImages", + "ec2:DescribeInstances", + "ec2:DescribeInstanceStatus", + "ec2:DescribeRegions", + "ec2:DescribeSecurityGroups", + "ec2:DescribeSnapshots", + "ec2:DescribeSubnets", + "ec2:DescribeTags", + "ec2:DescribeVolumes", + "ec2:DetachVolume", + "ec2:GetPasswordData", + "ec2:ModifyImageAttribute", + "ec2:ModifyInstanceAttribute", + "ec2:ModifySnapshotAttribute", + "ec2:RegisterImage", + "ec2:RunInstances", + "ec2:StopInstances", + "ec2:TerminateInstances", + "eks:DescribeAddonVersions", + "ecr:GetAuthorizationToken" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "ecr:BatchGetImage", + "ecr:BatchCheckLayerAvailability", + "ecr:GetDownloadUrlForLayer" + ], + "Resource": "arn:aws:ecr:us-west-2:602401143452:repository/*" + }, + { + "Effect": "Allow", + "Action": [ + "s3:GetObject" + ], + "Resource": "arn:aws:s3:::amazon-eks/*" + } + ] +} +``` + +You will need to use the region you are building the AMI in to specify the ECR repository resource in the second IAM statement. You may also need to change the account if you are building the AMI in a different partition or special region. You can see a mapping of regions to account ID [here](https://docs.aws.amazon.com/eks/latest/userguide/add-ons-images.html). +If you're using a custom s3 bucket to vend different K8s binaries, you will need to change the resource in the third IAM statement above to reference your custom bucket. +For more information about the permissions required by Packer with different configurations, see the [docs](https://www.packer.io/plugins/builders/amazon#iam-task-or-instance-role). + +--- + +## Customizing Kubelet Config + +In some cases, customers may want to customize the [kubelet configuration](https://kubernetes.io/docs/reference/config-api/kubelet-config.v1beta1/#kubelet-config-k8s-io-v1beta1-KubeletConfiguration) on their nodes, and there are two mechanisms to do that with the EKS Optimized AMI. + +**Set the "--kubelet-extra-args" flag when invoking bootstrap.sh** + +`bootstrap.sh`, the script that bootstraps nodes when using the EKS Optimized AMI, supports a flag called `--kubelet-extra-args` that allows you to pass in additional `kubelet` configuration. If you invoke the bootstrap script yourself (self-managed nodegroups or EKS managed nodegroups with custom AMIs), you can use that to customize your configuration. For example, you can use something like the following in your userdata: + +``` +/etc/eks/bootstrap.sh my-cluster --kubelet-extra-args '--registry-qps=20 --registry-burst=40' +``` + +In this case, it will set `registryPullQPS` to 20 and `registryBurst` to 40 in `kubelet`. Some of the flags, like the ones above, are marked as deprecated and you're encouraged to set them in the `kubelet` config file (described below), but they continue to work as of 1.23. + +**Update the kubelet config file** + +You can update the `kubelet` config file directly with new configuration. On EKS Optimized AMIs, the file is stored at `/etc/kubernetes/kubelet/kubelet-config.json`. It must be valid JSON. You can use a utility like `jq` (or your tool of choice) to edit the config in your user data: + +``` +echo "$(jq ".registryPullQPS=20 | .registryBurst=40" /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json +``` + +There are a couple of important caveats here: + +1. If you update the `kubelet` config file after `kubelet` has already started (i.e. `bootstrap.sh` already ran), you'll need to restart `kubelet` to pick up the latest configuration. +2. [bootstrap.sh](https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh) does modify a few fields, like `kubeReserved` and `evictionHard`, so you'd need to modify the config after the bootstrap script is run and restart `kubelet` to overwrite those properties. + +**View active kubelet config** + +When `kubelet` starts up, it logs all possible flags, including unset flags. The unset flags get logged with default values. *These logs do not necessarily reflect the actual active configuration.* This has caused confusion in the past when customers have configured the `kubelet` config file with one value and notice the default value is logged. Here is an example of the referenced log: + +``` +Aug 16 21:53:49 ip-192-168-92-220.us-east-2.compute.internal kubelet[3935]: I0816 21:53:49.202824 3935 flags.go:59] FLAG: --registry-burst="10" +Aug 16 21:53:49 ip-192-168-92-220.us-east-2.compute.internal kubelet[3935]: I0816 21:53:49.202829 3935 flags.go:59] FLAG: --registry-qps="5" +``` + +To view the actual `kubelet` config on your node, you can use the Kubernetes API to confirm that your configuration has applied. + +``` +$ kubectl proxy +$ curl -sSL "http://localhost:8001/api/v1/nodes/ip-192-168-92-220.us-east-2.compute.internal/proxy/configz" | jq + +{ + "kubeletconfig": { + ... + "registryPullQPS": 20, + "registryBurst": 40, + ... + } +} +``` + +--- + +## AL2 and Linux Kernel Information + +By default, the `amazon-eks-ami` uses a [source_ami_filter](https://github.com/awslabs/amazon-eks-ami/blob/e3f1b910f83ad1f27e68312e50474ea6059f052d/eks-worker-al2.json#L46) that selects the latest [hvm](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/virtualization_types.html) AL2 AMI for the given architecture as the base AMI. For more information on what kernel versions are running on published Amazon EKS optimized Linux AMIs, see [the public documentation](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html). + +When building an AMI, you can set `kernel_version` to customize the kernel version. Valid values are: +- `4.14` +- `5.4` +- `5.10` + +If `kernel_version` is not set: +- For Kubernetes 1.23 and below, `5.4` is used. +- For Kubernetes 1.24 and above, `5.10` is used. + +The [upgrade_kernel.sh script](https://github.com/awslabs/amazon-eks-ami/blob/master/scripts/upgrade_kernel.sh) contains the logic for updating and upgrading the kernel. + +--- + +## Updating known instance types + +`files/bootstrap.sh` configures the maximum number of pods on a node based off of the number of ENIs available, which is determined by the instance type. Larger instances generally have more ENIs. The number of ENIs limits how many IPV4 addresses are available on an instance, and we need one IP address per pod. You can [see this file](https://github.com/aws/amazon-vpc-cni-k8s/blob/master/scripts/gen_vpc_ip_limits.go) for the code that calculates the max pods for more information. + +To add support for new instance types, at a minimum, we need to update `files/eni-max-pods.txt` using the [amazon-vpc-cni-k8s package.](https://github.com/aws/amazon-vpc-cni-k8s) to set the number of max pods available for those instance types. If the instance type is not on the list, `bootstrap.sh` will fail when the node is started. + +``` +$ git clone git@github.com:aws/amazon-vpc-cni-k8s.git + +# AWS credentials required at this point +$ make generate-limits +# misc/eni-max-pods.txt should be generated + +# Copy the generated file to this repo, something like this: +$ cp misc/eni-max-pods.txt ../amazon-eks-ami/files/ + +# Verify that expected types were added +$ git diff +``` + +At this point, you can build an AMI and it will include the updated list of instance types. + +--- + +## Version-locked packages + +Some packages are critical for correct, performant behavior of a Kubernetes node; such as: +- `kernel` +- `containerd` +- `runc` + +> **Note** +> This is not an exhaustive list. The complete list of locked packages is available with `yum versionlock list`. + +As a result, these packages should generally be modified within the bounds of a managed process that gracefully handles failures and prevents disruption to the cluster's workloads. + +To prevent unintentional changes, the [yum-versionlock](https://github.com/rpm-software-management/yum-utils/tree/05db7ef501fc9d6698935bcc039c83c0761c3be2/plugins/versionlock) plugin is used on these packages. + +If you wish to modify a locked package, you can: +``` +# unlock a single package +sudo yum versionlock delete $PACKAGE_NAME + +# unlock all packages +sudo yum versionlock clear +``` + +--- + +## Image credential provider plugins + +Prior to Kubernetes 1.27, the `kubelet` could obtain credentials for ECR out of the box. This legacy credential process has been removed in Kubernetes 1.27, and +ECR credentials should now be obtained via a plugin, the `ecr-credential-provider`. This plugin is installed in the AMI at `/etc/eks/image-credential-provider/ecr-credential-provider`. More information about this plugin is available in the [`cloud-provider-aws` documentation](https://cloud-provider-aws.sigs.k8s.io/credential_provider/). + +Additional image credential provider plugins may be appended to `/etc/eks/image-credential-provider/config.json`. In Kubernetes versions 1.26 and below, all plugins in this file must support `credentialprovider.kubelet.k8s.io/v1alpha1`. In Kubernetes versions 1.27 and above, they must support `credentialprovider.kubelet.k8s.io/v1`. + +For more information about image credential provider plugins, refer to the [Kubernetes documentation](https://kubernetes.io/docs/tasks/administer-cluster/kubelet-credential-provider/). + +--- + +## Ephemeral Storage + +Some instance types launch with ephemeral NVMe instance storage (i3, i4i, c5d, c6id, etc). There are two main ways of utilizing this storage within Kubernetes: a single RAID-0 array for use by kubelet and containerd or mounting the individual disks for pod usage. + +The EKS Optimized AMI includes a utility script to configure ephemeral storage. The script can be invoked by passing the `--local-disks ` flag to the `/etc/eks/bootstrap.sh` script or the script can be invoked directly at `/bin/setup-local-disks`. All disks are formatted with an XFS file system. + +Below are details on the two disk setup options: + +### RAID-0 for Kubelet and Containerd (raid0) + +A RAID-0 array is setup that includes all ephemeral NVMe instance storage disks. The containerd and kubelet state directories (`/var/lib/containerd` and `/var/lib/kubelet`) will then use the ephemeral storage for more and faster node ephemeral-storage. The node's ephemeral storage can be shared among pods that request ephemeral storage and container images that are downloaded to the node. + +### Mount for Persistent Volumes (mount) + +Another way of utilizing the ephemeral disks is to format and mount the individual disks. Mounting individual disks allows the [local-static-provisioner](https://github.com/kubernetes-sigs/sig-storage-local-static-provisioner) DaemonSet to create Persistent Volume Claims that pods can utilize. diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json new file mode 100644 index 000000000..45756e51e --- /dev/null +++ b/eks-worker-al2-variables.json @@ -0,0 +1,39 @@ +{ + "additional_yum_repos": "", + "ami_component_description": "(k8s: {{ user `kubernetes_version` }}, docker: {{ user `docker_version` }}, containerd: {{ user `containerd_version` }})", + "ami_description": "EKS Kubernetes Worker AMI with AmazonLinux2 image", + "ami_regions": "", + "ami_users": "", + "associate_public_ip_address": "", + "aws_access_key_id": "{{env `AWS_ACCESS_KEY_ID`}}", + "aws_region": "us-west-2", + "aws_secret_access_key": "{{env `AWS_SECRET_ACCESS_KEY`}}", + "aws_session_token": "{{env `AWS_SESSION_TOKEN`}}", + "binary_bucket_name": "amazon-eks", + "binary_bucket_region": "us-west-2", + "cache_container_images": "false", + "cni_plugin_version": "v1.2.0", + "containerd_version": "1.7.*", + "creator": "{{env `USER`}}", + "docker_version": "20.10.*", + "enable_fips": "false", + "encrypted": "false", + "kernel_version": "", + "kms_key_id": "", + "launch_block_device_mappings_volume_size": "4", + "pause_container_version": "3.5", + "pull_cni_from_github": "true", + "remote_folder": "/tmp", + "runc_version": "1.1.*", + "security_group_id": "", + "source_ami_filter_name": "amzn2-ami-minimal-hvm-*", + "source_ami_id": "", + "source_ami_owners": "137112412989", + "ssh_interface": "", + "ssh_username": "ec2-user", + "ssm_agent_version": "", + "subnet_id": "", + "temporary_security_group_source_cidrs": "", + "volume_type": "gp2", + "working_dir": "{{user `remote_folder`}}/worker" +} diff --git a/eks-worker-al2.json b/eks-worker-al2.json index a9c490236..c301c1eca 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -1,43 +1,56 @@ { + "_comment": "All template variables are enumerated here; and most variables have a default value defined in eks-worker-al2-variables.json", "variables": { - "aws_region": "us-west-2", + "additional_yum_repos": null, + "ami_component_description": null, + "ami_description": null, "ami_name": null, - "creator": "{{env `USER`}}", - "encrypted": "false", - "kms_key_id": "", - - "aws_access_key_id": "{{env `AWS_ACCESS_KEY_ID`}}", - "aws_secret_access_key": "{{env `AWS_SECRET_ACCESS_KEY`}}", - "aws_session_token": "{{env `AWS_SESSION_TOKEN`}}", - - "binary_bucket_name": "amazon-eks", - "binary_bucket_region": "us-west-2", - "kubernetes_version": null, - "kubernetes_build_date": null, - "docker_version": "18.06", - "cni_version": "v0.6.0", - "cni_plugin_version": "v0.7.5", - - "source_ami_id": "", - "source_ami_owners": "137112412989", - "source_ami_filter_name": "amzn2-ami-minimal-hvm-*", + "ami_regions": null, + "ami_users": null, "arch": null, + "associate_public_ip_address": null, + "aws_access_key_id": null, + "aws_region": null, + "aws_secret_access_key": null, + "aws_session_token": null, + "binary_bucket_name": null, + "binary_bucket_region": null, + "cache_container_images": null, + "cni_plugin_version": null, + "containerd_version": null, + "creator": null, + "docker_version": null, + "encrypted": null, + "enable_fips": null, "instance_type": null, - "ami_description": "EKS Kubernetes Worker AMI with AmazonLinux2 image", - - "ssh_interface": "", - "ssh_username": "ec2-user", - "temporary_security_group_source_cidrs": "", - "associate_public_ip_address": "", - "subnet_id": "" - + "kernel_version": null, + "kms_key_id": null, + "kubernetes_build_date": null, + "kubernetes_version": null, + "launch_block_device_mappings_volume_size": null, + "pause_container_version": null, + "pull_cni_from_github": null, + "remote_folder": null, + "runc_version": null, + "security_group_id": null, + "source_ami_filter_name": null, + "source_ami_id": null, + "source_ami_owners": null, + "ssh_interface": null, + "ssh_username": null, + "ssm_agent_version": null, + "subnet_id": null, + "temporary_security_group_source_cidrs": null, + "volume_type": null, + "working_dir": null }, - "builders": [ { "type": "amazon-ebs", "region": "{{user `aws_region`}}", "source_ami": "{{user `source_ami_id`}}", + "ami_users": "{{user `ami_users`}}", + "snapshot_users": "{{user `ami_users`}}", "source_ami_filter": { "filters": { "name": "{{user `source_ami_filter_name`}}", @@ -46,75 +59,187 @@ "state": "available", "virtualization-type": "hvm" }, - "owners": [ "{{user `source_ami_owners`}}" ], + "owners": [ + "{{user `source_ami_owners`}}" + ], "most_recent": true }, "instance_type": "{{user `instance_type`}}", "launch_block_device_mappings": [ { "device_name": "/dev/xvda", - "volume_type": "gp2", - "volume_size": 4, + "volume_type": "{{user `volume_type`}}", + "volume_size": "{{user `launch_block_device_mappings_volume_size`}}", "delete_on_termination": true } ], - "ami_block_device_mappings": [ + "ami_block_device_mappings": [ { "device_name": "/dev/xvda", - "volume_type": "gp2", + "volume_type": "{{user `volume_type`}}", "volume_size": 20, "delete_on_termination": true } ], + "aws_polling": { + "delay_seconds": 30, + "max_attempts": 90 + }, + "ami_regions": "{{user `ami_regions`}}", "ssh_username": "{{user `ssh_username`}}", "ssh_interface": "{{user `ssh_interface`}}", "temporary_security_group_source_cidrs": "{{user `temporary_security_group_source_cidrs`}}", + "security_group_id": "{{user `security_group_id`}}", "associate_public_ip_address": "{{user `associate_public_ip_address`}}", "ssh_pty": true, "encrypt_boot": "{{user `encrypted`}}", "kms_key_id": "{{user `kms_key_id`}}", "run_tags": { - "creator": "{{user `creator`}}" + "creator": "{{user `creator`}}" }, "subnet_id": "{{user `subnet_id`}}", "tags": { - "Name": "{{user `ami_name`}}", - "created": "{{timestamp}}", - "docker_version": "{{ user `docker_version`}}", - "source_ami_id": "{{ user `source_ami_id`}}", - "kubernetes": "{{ user `kubernetes_version`}}/{{ user `kubernetes_build_date` }}/bin/linux/{{ user `arch` }}", - "cni_version": "{{ user `cni_version`}}", - "cni_plugin_version": "{{ user `cni_plugin_version`}}" + "Name": "{{user `ami_name`}}", + "created": "{{timestamp}}", + "build_region": "{{ .BuildRegion }}", + "source_ami_id": "{{ .SourceAMI }}", + "source_ami_name": "{{ .SourceAMIName }}", + "docker_version": "{{ user `docker_version`}}", + "containerd_version": "{{ user `containerd_version`}}", + "kubernetes": "{{ user `kubernetes_version`}}/{{ user `kubernetes_build_date` }}/bin/linux/{{ user `arch` }}", + "cni_plugin_version": "{{ user `cni_plugin_version`}}", + "ssm_agent_version": "{{ user `ssm_agent_version`}}" }, "ami_name": "{{user `ami_name`}}", - "ami_description": "{{ user `ami_description` }}, (k8s: {{ user `kubernetes_version`}}, docker:{{ user `docker_version`}})" + "ami_description": "{{ user `ami_description` }}, {{ user `ami_component_description` }}", + "metadata_options": { + "http_tokens": "required" + } } ], - "provisioners": [ { "type": "shell", - "inline": ["mkdir -p /tmp/worker/"] + "remote_folder": "{{ user `remote_folder`}}", + "inline": [ + "mkdir -p {{user `working_dir`}}", + "mkdir -p {{user `working_dir`}}/log-collector-script" + ] + }, + { + "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", + "script": "{{template_dir}}/scripts/install_additional_repos.sh", + "environment_vars": [ + "ADDITIONAL_YUM_REPOS={{user `additional_yum_repos`}}" + ] }, { "type": "file", "source": "{{template_dir}}/files/", - "destination": "/tmp/worker/" + "destination": "{{user `working_dir`}}" + }, + { + "type": "file", + "source": "{{template_dir}}/log-collector-script/linux/", + "destination": "{{user `working_dir`}}/log-collector-script/" + }, + { + "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", + "inline": [ + "sudo chmod -R a+x {{user `working_dir`}}/bin/", + "sudo mv {{user `working_dir`}}/bin/* /usr/bin/" + ] + }, + { + "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", + "script": "{{template_dir}}/scripts/upgrade_kernel.sh", + "environment_vars": [ + "KUBERNETES_VERSION={{user `kubernetes_version`}}", + "KERNEL_VERSION={{user `kernel_version`}}" + ] + }, + { + "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", + "script": "{{template_dir}}/scripts/enable-fips.sh", + "environment_vars": [ + "ENABLE_FIPS={{user `enable_fips`}}" + ] + }, + { + "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", + "inline": ["sudo reboot"], + "expect_disconnect": true, + "pause_after": "90s" }, { "type": "shell", - "script": "{{template_dir}}/install-worker.sh", + "remote_folder": "{{ user `remote_folder`}}", + "script": "{{template_dir}}/scripts/install-worker.sh", "environment_vars": [ "KUBERNETES_VERSION={{user `kubernetes_version`}}", "KUBERNETES_BUILD_DATE={{user `kubernetes_build_date`}}", "BINARY_BUCKET_NAME={{user `binary_bucket_name`}}", "BINARY_BUCKET_REGION={{user `binary_bucket_region`}}", "DOCKER_VERSION={{user `docker_version`}}", - "CNI_VERSION={{user `cni_version`}}", + "CONTAINERD_VERSION={{user `containerd_version`}}", + "RUNC_VERSION={{user `runc_version`}}", "CNI_PLUGIN_VERSION={{user `cni_plugin_version`}}", + "PULL_CNI_FROM_GITHUB={{user `pull_cni_from_github`}}", "AWS_ACCESS_KEY_ID={{user `aws_access_key_id`}}", "AWS_SECRET_ACCESS_KEY={{user `aws_secret_access_key`}}", - "AWS_SESSION_TOKEN={{user `aws_session_token`}}" + "AWS_SESSION_TOKEN={{user `aws_session_token`}}", + "PAUSE_CONTAINER_VERSION={{user `pause_container_version`}}", + "CACHE_CONTAINER_IMAGES={{user `cache_container_images`}}", + "WORKING_DIR={{user `working_dir`}}", + "SSM_AGENT_VERSION={{user `ssm_agent_version`}}" + ] + }, + { + "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", + "script": "{{template_dir}}/scripts/cleanup.sh" + }, + { + "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", + "script": "{{template_dir}}/scripts/cleanup_additional_repos.sh", + "environment_vars": [ + "ADDITIONAL_YUM_REPOS={{user `additional_yum_repos`}}" + ] + }, + { + "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", + "script": "{{template_dir}}/scripts/validate.sh", + "environment_vars": [ + "KERNEL_VERSION={{user `kernel_version`}}" + ] + }, + { + "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", + "script": "{{template_dir}}/scripts/generate-version-info.sh", + "execute_command": "chmod +x {{ .Path }}; {{ .Path }} {{user `working_dir`}}/version-info.json", + "environment_vars": [ + "CACHE_CONTAINER_IMAGES={{user `cache_container_images`}}" + ] + }, + { + "type": "file", + "direction": "download", + "source": "{{user `working_dir`}}/version-info.json", + "destination": "{{ user `ami_name` }}-version-info.json" + }, + { + "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", + "inline": [ + "rm -rf {{user `working_dir`}}" ] } ], @@ -122,7 +247,20 @@ { "type": "manifest", "output": "manifest.json", - "strip_path": true + "strip_path": true, + "custom_data": { + "source_ami_name": "{{ build `SourceAMIName` }}", + "source_ami_id": "{{ build `SourceAMI` }}" + } + }, + { + "type": "manifest", + "output": "{{user `ami_name`}}-manifest.json", + "strip_path": true, + "custom_data": { + "source_ami_name": "{{ build `SourceAMIName` }}", + "source_ami_id": "{{ build `SourceAMI` }}" + } } ] } diff --git a/files/1.14/kubelet.service b/files/1.14/kubelet.service deleted file mode 100644 index 28a3b062a..000000000 --- a/files/1.14/kubelet.service +++ /dev/null @@ -1,21 +0,0 @@ -[Unit] -Description=Kubernetes Kubelet -Documentation=https://github.com/kubernetes/kubernetes -After=docker.service -Requires=docker.service - -[Service] -ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -ExecStart=/usr/bin/kubelet --cloud-provider aws \ - --config /etc/kubernetes/kubelet/kubelet-config.json \ - --kubeconfig /var/lib/kubelet/kubeconfig \ - --container-runtime docker \ - --network-plugin cni $KUBELET_ARGS $KUBELET_EXTRA_ARGS - -Restart=on-failure -RestartForceExitStatus=SIGPIPE -RestartSec=5 -KillMode=process - -[Install] -WantedBy=multi-user.target diff --git a/files/bin/configure-clocksource b/files/bin/configure-clocksource new file mode 100755 index 000000000..9815401f8 --- /dev/null +++ b/files/bin/configure-clocksource @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +set -o errexit +set -o pipefail +set -o nounset + +CLOCK_PATH="/sys/devices/system/clocksource/clocksource0" + +function log() { + echo >&2 "$@" +} + +function current-clocksource() { + cat "${CLOCK_PATH}/current_clocksource" +} + +function check-available-clocksource() { + grep --quiet "${1}" "${CLOCK_PATH}/available_clocksource" +} + +function try-set-clocksource() { + if check-available-clocksource "${1}"; then + echo "${1}" > "${CLOCK_PATH}/current_clocksource" + log "configured clocksource: ${1}" + else + log "clocksource not available: ${1}" + fi +} + +case "$(imds /latest/meta-data/system)" in + nitro) + CLOCKSOURCE="kvm-clock" + ;; + + **) + CLOCKSOURCE="tsc" + ;; +esac + +log "desired clocksource: ${CLOCKSOURCE}" + +if [ ! "$(current-clocksource)" = "${CLOCKSOURCE}" ]; then + try-set-clocksource "${CLOCKSOURCE}" +fi + +log "final clocksource: $(current-clocksource)" diff --git a/files/bin/imds b/files/bin/imds new file mode 100755 index 000000000..2e87c00d8 --- /dev/null +++ b/files/bin/imds @@ -0,0 +1,66 @@ +#!/usr/bin/env bash + +set -o errexit +set -o pipefail +set -o nounset + +if [ "$#" -ne 1 ]; then + echo >&2 "usage: imds token|API_PATH" + exit 1 +fi + +IMDS_DEBUG="${IMDS_DEBUG:-false}" +# default ttl is 15 minutes +IMDS_TOKEN_TTL_SECONDS=${IMDS_TOKEN_TTL_SECONDS:-900} +IMDS_RETRIES=${IMDS_RETRIES:-10} +IMDS_RETRY_DELAY_SECONDS=${IMDS_RETRY_DELAY_SECONDS:-1} +IMDS_ENDPOINT=${IMDS_ENDPOINT:-169.254.169.254} + +function log() { + if [ "$IMDS_DEBUG" = "true" ]; then + echo >&2 "$1" + fi +} + +function imdscurl() { + local OUTPUT_FILE=$(mktemp) + local CODE=$(curl \ + --silent \ + --show-error \ + --output $OUTPUT_FILE \ + --write-out "%{http_code}" \ + --retry $IMDS_RETRIES \ + --retry-delay $IMDS_RETRY_DELAY_SECONDS \ + "$@" || echo "1") + # CODE will be either the HTTP status code, or 1 if the exit code of `curl` is non-zero + if [[ ${CODE} -lt 200 || ${CODE} -gt 299 ]]; then + cat >&2 $OUTPUT_FILE + return $CODE + fi + printf "$(cat $OUTPUT_FILE)\n" + rm $OUTPUT_FILE +} + +function get-token() { + imdscurl \ + -H "X-aws-ec2-metadata-token-ttl-seconds: $IMDS_TOKEN_TTL_SECONDS" \ + -X PUT \ + "http://$IMDS_ENDPOINT/latest/api/token" +} + +function get-with-token() { + local API_PATH="$1" + imdscurl \ + -H "X-aws-ec2-metadata-token: ${IMDS_TOKEN:-$(get-token)}" \ + "http://$IMDS_ENDPOINT/$API_PATH" +} + +log "ℹ️ Talking to IMDS at $IMDS_ENDPOINT" + +if [ "$1" = "token" ]; then + get-token +else + # leading slashes will be removed + API_PATH="${1#/}" + get-with-token "$API_PATH" +fi diff --git a/files/bin/mount-bpf-fs b/files/bin/mount-bpf-fs new file mode 100755 index 000000000..df5767e99 --- /dev/null +++ b/files/bin/mount-bpf-fs @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +set -o errexit +set -o nounset + +SYSTEMD_UNIT_DIR="/etc/systemd/system" +SYSTEMD_UNIT="sys-fs-bpf.mount" +SYSTEMD_UNIT_PATH="$SYSTEMD_UNIT_DIR/$SYSTEMD_UNIT" +MOUNT_POINT="/sys/fs/bpf" +FS_TYPE="bpf" + +MOUNT_BPF_FS_DEBUG=${MOUNT_BPF_FS_DEBUG:-false} +function debug() { + if [ "$MOUNT_BPF_FS_DEBUG" = "true" ]; then + echo >&2 "DEBUG:" "$@" + fi +} + +if [ $(mount --types "$FS_TYPE" | wc -l) -gt 0 ]; then + debug "$FS_TYPE filesystem already mounted!" + exit 0 +elif mount | awk '{print $3}' | grep "$MOUNT_POINT"; then + debug "mount point at $MOUNT_POINT already exists!" + exit 0 +elif [ -f "$SYSTEMD_UNIT_PATH" ]; then + debug "systemd unit at $SYSTEMD_UNIT_PATH already exists!" + exit 0 +fi + +mkdir -p "$SYSTEMD_UNIT_DIR" +cat > "$SYSTEMD_UNIT_PATH" << EOL +[Unit] +Description=BPF mounts +Documentation=https://docs.kernel.org/bpf/index.html +DefaultDependencies=no +Before=local-fs.target umount.target +After=swap.target + +[Mount] +What=bpffs +Where=$MOUNT_POINT +Type=bpf +Options=rw,nosuid,nodev,noexec,relatime,mode=700 + +[Install] +WantedBy=multi-user.target +EOL + +systemctl enable "$SYSTEMD_UNIT" +systemctl start "$SYSTEMD_UNIT" diff --git a/files/bin/private-dns-name b/files/bin/private-dns-name new file mode 100755 index 000000000..f8ce371d8 --- /dev/null +++ b/files/bin/private-dns-name @@ -0,0 +1,44 @@ +#!/usr/bin/env bash + +set -o errexit +set -o nounset +set -o xtrace + +# Retrieves the PrivateDnsName from EC2 for this instance, waiting until +# it is available if necessary (due to eventual consistency). + +function log { + echo >&2 "$(date '+%Y-%m-%dT%H:%M:%S%z')" "[private-dns-name]" "$@" +} + +INSTANCE_ID=$(imds /latest/meta-data/instance-id) + +# the AWS CLI currently constructs the wrong endpoint URL on localzones (the availability zone group will be used instead of the parent region) +# more info: https://github.com/aws/aws-cli/issues/7043 +REGION=$(imds /latest/meta-data/placement/region) + +# by default, wait for 120 seconds +PRIVATE_DNS_NAME_MAX_ATTEMPTS=${PRIVATE_DNS_NAME_MAX_ATTEMPTS:-20} +PRIVATE_DNS_NAME_ATTEMPT_INTERVAL=${PRIVATE_DNS_NAME_ATTEMPT_INTERVAL:-6} + +log "will make up to ${PRIVATE_DNS_NAME_MAX_ATTEMPTS} attempt(s) every ${PRIVATE_DNS_NAME_ATTEMPT_INTERVAL} second(s)" + +ATTEMPT=0 +while true; do + PRIVATE_DNS_NAME=$(aws ec2 describe-instances --region $REGION --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].PrivateDnsName') + if [ ! "${PRIVATE_DNS_NAME}" = "" ] || [ ${ATTEMPT} -ge ${PRIVATE_DNS_NAME_MAX_ATTEMPTS} ]; then + break + fi + ATTEMPT=$((ATTEMPT + 1)) + log "WARN: PrivateDnsName is not available, waiting for ${PRIVATE_DNS_NAME_ATTEMPT_INTERVAL} seconds..." + sleep ${PRIVATE_DNS_NAME_ATTEMPT_INTERVAL} +done + +if [ "${PRIVATE_DNS_NAME}" = "" ]; then + log "ERROR: failed to retrieve PrivateDnsName after ${ATTEMPT} attempts!" + exit 1 +else + log "INFO: retrieved PrivateDnsName: ${PRIVATE_DNS_NAME}" + echo "${PRIVATE_DNS_NAME}" + exit 0 +fi diff --git a/files/bin/provider-id b/files/bin/provider-id new file mode 100755 index 000000000..7cced7f3a --- /dev/null +++ b/files/bin/provider-id @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +set -o errexit +set -o nounset + +AVAILABILITY_ZONE=$(imds '/latest/meta-data/placement/availability-zone') +INSTANCE_ID=$(imds '/latest/meta-data/instance-id') + +echo "aws:///$AVAILABILITY_ZONE/$INSTANCE_ID" diff --git a/files/bin/setup-local-disks b/files/bin/setup-local-disks new file mode 100644 index 000000000..9cdb18dae --- /dev/null +++ b/files/bin/setup-local-disks @@ -0,0 +1,220 @@ +#!/usr/bin/env bash + +set -o errexit +set -o pipefail +set -o nounset + +err_report() { + echo "Exited with error on line $1" +} +trap 'err_report $LINENO' ERR + +print_help() { + echo "usage: $0 " + echo "Sets up Amazon EC2 Instance Store NVMe disks" + echo "" + echo "-d, --dir directory to mount the filesystem(s) (default: /mnt/k8s-disks/)" + echo "-h, --help print this help" +} + +# Sets up a RAID-0 of NVMe instance storage disks, moves +# the contents of /var/lib/kubelet and /var/lib/containerd +# to the new mounted RAID, and bind mounts the kubelet and +# containerd state directories. +maybe_raid0() { + local md_name="kubernetes" + local md_device="/dev/md/${md_name}" + local md_config="/.aws/mdadm.conf" + local array_mount_point="${MNT_DIR}/0" + mkdir -p "$(dirname "${md_config}")" + + if [[ ! -s "${md_config}" ]]; then + mdadm --create --force --verbose \ + "${md_device}" \ + --level=0 \ + --name="${md_name}" \ + --raid-devices="${#EPHEMERAL_DISKS[@]}" \ + "${EPHEMERAL_DISKS[@]}" + while [ -n "$(mdadm --detail "${md_device}" | grep -ioE 'State :.*resyncing')" ]; do + echo "Raid is resyncing..." + sleep 1 + done + mdadm --detail --scan > "${md_config}" + fi + + ## Check if the device symlink has changed on reboot to include a homehost identifier + local current_md_device=$(find /dev/md/ -type l -regex ".*/${md_name}_?[0-9a-z]*$" | tail -n1) + if [[ ! -z ${current_md_device} ]]; then + md_device="${current_md_device}" + fi + + # Format the array if not already formatted. + if [[ -z "$(lsblk "${md_device}" -o fstype --noheadings)" ]]; then + ## By default, mkfs tries to use the stripe unit of the array (512k), + ## for the log stripe unit, but the max log stripe unit is 256k. + ## So instead, we use 32k (8 blocks) to avoid a warning of breaching the max. + ## mkfs.xfs defaults to 32k after logging the warning since the default log buffer size is 32k. + mkfs.xfs -l su=8b "${md_device}" + fi + + ## Create the mount directory + mkdir -p "${array_mount_point}" + + local dev_uuid=$(blkid -s UUID -o value "${md_device}") + local mount_unit_name="$(systemd-escape --path --suffix=mount "${array_mount_point}")" + cat > "/etc/systemd/system/${mount_unit_name}" << EOF + [Unit] + Description=Mount EC2 Instance Store NVMe disk RAID0 + [Mount] + What=UUID=${dev_uuid} + Where=${array_mount_point} + Type=xfs + Options=defaults,noatime + [Install] + WantedBy=multi-user.target +EOF + systemd-analyze verify "${mount_unit_name}" + systemctl enable "${mount_unit_name}" --now + + prev_running="" + needs_linked="" + for unit in "kubelet" "containerd"; do + ## Check if the bind mount from the RAID already exists + if [[ "$(systemctl is-active var-lib-${unit}.mount)" != "active" ]]; then + # Check if components that depend on the RAID are running and, if so, stop them + if systemctl is-active "${unit}" > /dev/null 2>&1; then + prev_running+=" ${unit}" + fi + needs_linked+=" /var/lib/${unit}" + fi + done + + ## Check if /var/log/pods has been bind mounted and make sure kubelet is stopped + if [[ "$(systemctl is-active var-log-pods.mount)" != "active" ]]; then + if systemctl is-active "kubelet" > /dev/null 2>&1; then + prev_running+=" ${unit}" + fi + needs_linked+=" /var/log/pods" + fi + + if [[ ! -z "${prev_running}" ]]; then + systemctl stop ${prev_running} + fi + + # Transfer state directories to the array, if they exist. + for mount_point in ${needs_linked}; do + local unit="$(basename "${mount_point}")" + local array_mount_point_unit="${array_mount_point}/${unit}" + mkdir -p "${mount_point}" + echo "Copying ${mount_point}/ to ${array_mount_point_unit}/" + cp -a "${mount_point}/" "${array_mount_point_unit}/" + local mount_unit_name="$(systemd-escape --path --suffix=mount "${mount_point}")" + cat > "/etc/systemd/system/${mount_unit_name}" << EOF + [Unit] + Description=Mount ${unit} on EC2 Instance Store NVMe RAID0 + [Mount] + What=${array_mount_point_unit} + Where=${mount_point} + Type=none + Options=bind + [Install] + WantedBy=multi-user.target +EOF + systemd-analyze verify "${mount_unit_name}" + systemctl enable "${mount_unit_name}" --now + done + + if [[ ! -z "${prev_running}" ]]; then + systemctl start ${prev_running} + fi +} + +# Mounts and creates xfs file systems on all EC2 instance store NVMe disks +# without existing file systems. Mounts in /mnt/k8s-disks/{1..} by default +maybe_mount() { + idx=1 + for dev in "${EPHEMERAL_DISKS[@]}"; do + if [[ -z "$(lsblk "${dev}" -o fstype --noheadings)" ]]; then + mkfs.xfs -l su=8b "${dev}" + fi + if [[ ! -z "$(lsblk "${dev}" -o MOUNTPOINT --noheadings)" ]]; then + echo "${dev} is already mounted." + continue + fi + local mount_point="${MNT_DIR}/${idx}" + local mount_unit_name="$(systemd-escape --path --suffix=mount "${mount_point}")" + mkdir -p "${mount_point}" + cat > "/etc/systemd/system/${mount_unit_name}" << EOF + [Unit] + Description=Mount EC2 Instance Store NVMe disk ${idx} + [Mount] + What=${dev} + Where=${mount_point} + Type=xfs + Options=defaults,noatime + [Install] + WantedBy=multi-user.target +EOF + systemd-analyze verify "${mount_unit_name}" + systemctl enable "${mount_unit_name}" --now + idx=$((idx + 1)) + done +} + +## Main logic +MNT_DIR="/mnt/k8s-disks" + +while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -h | --help) + print_help + exit 0 + ;; + -d | --dir) + MNT_DIR="$2" + shift + shift + ;; + *) # unknown option + POSITIONAL+=("$1") # save it in an array for later + shift # past argument + ;; + esac +done + +set +u +set -- "${POSITIONAL[@]}" # restore positional parameters +DISK_SETUP="$1" +set -u + +if [[ "${DISK_SETUP}" != "raid0" && "${DISK_SETUP}" != "mount" ]]; then + echo "Valid disk setup options are: raid0 or mount" + exit 1 +fi + +disks=($(find -L /dev/disk/by-id/ -xtype l -name '*NVMe_Instance_Storage_*')) +## Bail early if there are no ephemeral disks to setup +if [[ "${#disks[@]}" -eq 0 ]]; then + echo "no ephemeral disks found, skipping disk setup" + exit 0 +fi + +if [ "$(id --user)" -ne 0 ]; then + echo "Must be run as root" + exit 1 +fi + +## Get devices of NVMe instance storage ephemeral disks +EPHEMERAL_DISKS=($(realpath "${disks[@]}" | sort -u)) + +case "${DISK_SETUP}" in + "raid0") + maybe_raid0 + echo "Successfully setup RAID-0 consisting of ${EPHEMERAL_DISKS[@]}" + ;; + "mount") + maybe_mount + echo "Successfully setup disk mounts consisting of ${EPHEMERAL_DISKS[@]}" + ;; +esac diff --git a/files/bin/vercmp b/files/bin/vercmp new file mode 100755 index 000000000..5bb467854 --- /dev/null +++ b/files/bin/vercmp @@ -0,0 +1,93 @@ +#!/usr/bin/env bash + +# Comparison expressions for semantic versions. +# only supports semver standard MAJOR.MINOR.PATCH syntax; +# pre-release or build-metadata extensions have undefined behavior. + +set -o errexit +set -o pipefail + +function usage() { + echo "Comparison expressions for semantic versions." + echo + echo "usage: vercmp VERSION_A OPERATOR VERSION_B" + echo + echo "OPERATORS" + echo + echo " lt - Less than" + echo " lteq - Less than or equal to" + echo " eq - Equal to" + echo " gteq - Grater than or equal to" + echo " gt - Greater than" + echo +} + +if [ "$#" -ne 3 ]; then + usage + exit 1 +fi + +LEFT="$1" +OPERATOR="$2" +RIGHT="$3" + +if [ "$LEFT" = "$RIGHT" ]; then + COMPARISON=0 +else + SORTED=($(for VER in "$LEFT" "$RIGHT"; do echo "$VER"; done | sort -V)) + if [ "${SORTED[0]}" = "$LEFT" ]; then + COMPARISON=-1 + else + COMPARISON=1 + fi +fi + +OUTCOME=false + +case $OPERATOR in + lt) + if [ "$COMPARISON" -eq -1 ]; then + OUTCOME=true + fi + ;; + + lteq) + if [ "$COMPARISON" -lt 1 ]; then + OUTCOME=true + fi + ;; + + eq) + if [ "$COMPARISON" -eq 0 ]; then + OUTCOME=true + fi + ;; + + gteq) + if [ "$COMPARISON" -gt -1 ]; then + OUTCOME=true + fi + ;; + + gt) + if [ "$COMPARISON" -eq 1 ]; then + OUTCOME=true + fi + ;; + + *) + usage + exit 1 + ;; +esac + +VERCMP_QUIET="${VERCMP_QUIET:-false}" +if [ ! "$VERCMP_QUIET" = "true" ]; then + echo "$OUTCOME" +fi + +if [ "$OUTCOME" = "true" ]; then + exit 0 +else + exit 1 +fi diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 10b9b403f..42567a495 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -5,85 +5,169 @@ set -o nounset set -o errexit err_report() { - echo "Exited with error on line $1" + echo "Exited with error on line $1" } trap 'err_report $LINENO' ERR IFS=$'\n\t' +# mute stdout from vercmp +export VERCMP_QUIET=true + function print_help { - echo "usage: $0 [options] " - echo "Bootstraps an instance into an EKS cluster" - echo "" - echo "-h,--help print this help" - echo "--use-max-pods Sets --max-pods for the kubelet when true. (default: true)" - echo "--b64-cluster-ca The base64 encoded cluster CA content. Only valid when used with --apiserver-endpoint. Bypasses calling \"aws eks describe-cluster\"" - echo "--apiserver-endpoint The EKS cluster API Server endpoint. Only valid when used with --b64-cluster-ca. Bypasses calling \"aws eks describe-cluster\"" - echo "--kubelet-extra-args Extra arguments to add to the kubelet. Useful for adding labels or taints." - echo "--enable-docker-bridge Restores the docker default bridge network. (default: false)" - echo "--aws-api-retry-attempts Number of retry attempts for AWS API call (DescribeCluster) (default: 3)" - echo "--docker-config-json The contents of the /etc/docker/daemon.json file. Useful if you want a custom config differing from the default one in the AMI" + echo "usage: $0 [options] " + echo "Bootstraps an instance into an EKS cluster" + echo "" + echo "-h,--help print this help" + echo + echo "--apiserver-endpoint The EKS cluster API Server endpoint. Only valid when used with --b64-cluster-ca. Bypasses calling \"aws eks describe-cluster\"" + echo "--aws-api-retry-attempts Number of retry attempts for AWS API call (DescribeCluster) (default: 3)" + echo "--b64-cluster-ca The base64 encoded cluster CA content. Only valid when used with --apiserver-endpoint. Bypasses calling \"aws eks describe-cluster\"" + echo "--cluster-id Specify the id of EKS cluster" + echo "--container-runtime Specify a container runtime. For Kubernetes 1.23 and below, possible values are [dockerd, containerd] and the default value is dockerd. For Kubernetes 1.24 and above, containerd is the only valid value. This flag is deprecated and will be removed in a future release." + echo "--containerd-config-file File containing the containerd configuration to be used in place of AMI defaults." + echo "--dns-cluster-ip Overrides the IP address to use for DNS queries within the cluster. Defaults to 10.100.0.10 or 172.20.0.10 based on the IP address of the primary interface" + echo "--docker-config-json The contents of the /etc/docker/daemon.json file. Useful if you want a custom config differing from the default one in the AMI" + echo "--enable-docker-bridge Restores the docker default bridge network. (default: false)" + echo "--enable-local-outpost Enable support for worker nodes to communicate with the local control plane when running on a disconnected Outpost. (true or false)" + echo "--ip-family Specify ip family of the cluster" + echo "--kubelet-extra-args Extra arguments to add to the kubelet. Useful for adding labels or taints." + echo "--local-disks Setup instance storage NVMe disks in raid0 or mount the individual disks for use by pods [mount | raid0]" + echo "--mount-bpf-fs Mount a bpffs at /sys/fs/bpf (default: true)" + echo "--pause-container-account The AWS account (number) to pull the pause container from" + echo "--pause-container-version The tag of the pause container" + echo "--service-ipv6-cidr ipv6 cidr range of the cluster" + echo "--use-max-pods Sets --max-pods for the kubelet when true. (default: true)" +} + +function log { + echo >&2 "$(date '+%Y-%m-%dT%H:%M:%S%z')" "[eks-bootstrap]" "$@" } +log "INFO: starting..." + POSITIONAL=() while [[ $# -gt 0 ]]; do - key="$1" - case $key in - -h|--help) - print_help - exit 1 - ;; - --use-max-pods) - USE_MAX_PODS="$2" - shift - shift - ;; - --b64-cluster-ca) - B64_CLUSTER_CA=$2 - shift - shift - ;; - --apiserver-endpoint) - APISERVER_ENDPOINT=$2 - shift - shift - ;; - --kubelet-extra-args) - KUBELET_EXTRA_ARGS=$2 - shift - shift - ;; - --enable-docker-bridge) - ENABLE_DOCKER_BRIDGE=$2 - shift - shift - ;; - --aws-api-retry-attempts) - API_RETRY_ATTEMPTS=$2 - shift - shift - ;; - --docker-config-json) - DOCKER_CONFIG_JSON=$2 - shift - shift - ;; - --pause-container-account) - PAUSE_CONTAINER_ACCOUNT=$2 - shift - shift - ;; - --pause-container-version) - PAUSE_CONTAINER_VERSION=$2 - shift - shift - ;; - *) # unknown option - POSITIONAL+=("$1") # save it in an array for later - shift # past argument - ;; - esac + key="$1" + case $key in + -h | --help) + print_help + exit 1 + ;; + --use-max-pods) + USE_MAX_PODS="$2" + log "INFO: --use-max-pods='${USE_MAX_PODS}'" + shift + shift + ;; + --b64-cluster-ca) + B64_CLUSTER_CA=$2 + log "INFO: --b64-cluster-ca='${B64_CLUSTER_CA}'" + shift + shift + ;; + --apiserver-endpoint) + APISERVER_ENDPOINT=$2 + log "INFO: --apiserver-endpoint='${APISERVER_ENDPOINT}'" + shift + shift + ;; + --kubelet-extra-args) + KUBELET_EXTRA_ARGS=$2 + log "INFO: --kubelet-extra-args='${KUBELET_EXTRA_ARGS}'" + shift + shift + ;; + --enable-docker-bridge) + ENABLE_DOCKER_BRIDGE=$2 + log "INFO: --enable-docker-bridge='${ENABLE_DOCKER_BRIDGE}'" + shift + shift + ;; + --aws-api-retry-attempts) + API_RETRY_ATTEMPTS=$2 + log "INFO: --aws-api-retry-attempts='${API_RETRY_ATTEMPTS}'" + shift + shift + ;; + --docker-config-json) + DOCKER_CONFIG_JSON=$2 + log "INFO: --docker-config-json='${DOCKER_CONFIG_JSON}'" + shift + shift + ;; + --containerd-config-file) + CONTAINERD_CONFIG_FILE=$2 + log "INFO: --containerd-config-file='${CONTAINERD_CONFIG_FILE}'" + shift + shift + ;; + --pause-container-account) + PAUSE_CONTAINER_ACCOUNT=$2 + log "INFO: --pause-container-account='${PAUSE_CONTAINER_ACCOUNT}'" + shift + shift + ;; + --pause-container-version) + PAUSE_CONTAINER_VERSION=$2 + log "INFO: --pause-container-version='${PAUSE_CONTAINER_VERSION}'" + shift + shift + ;; + --dns-cluster-ip) + DNS_CLUSTER_IP=$2 + log "INFO: --dns-cluster-ip='${DNS_CLUSTER_IP}'" + shift + shift + ;; + --container-runtime) + CONTAINER_RUNTIME=$2 + log "INFO: --container-runtime='${CONTAINER_RUNTIME}'" + shift + shift + ;; + --ip-family) + IP_FAMILY=$2 + log "INFO: --ip-family='${IP_FAMILY}'" + shift + shift + ;; + --service-ipv6-cidr) + SERVICE_IPV6_CIDR=$2 + log "INFO: --service-ipv6-cidr='${SERVICE_IPV6_CIDR}'" + shift + shift + ;; + --enable-local-outpost) + ENABLE_LOCAL_OUTPOST=$2 + log "INFO: --enable-local-outpost='${ENABLE_LOCAL_OUTPOST}'" + shift + shift + ;; + --cluster-id) + CLUSTER_ID=$2 + log "INFO: --cluster-id='${CLUSTER_ID}'" + shift + shift + ;; + --mount-bpf-fs) + MOUNT_BPF_FS=$2 + log "INFO: --mount-bpf-fs='${MOUNT_BPF_FS}'" + shift + shift + ;; + --local-disks) + LOCAL_DISKS=$2 + log "INFO: --local-disks='${LOCAL_DISKS}'" + shift + shift + ;; + *) # unknown option + POSITIONAL+=("$1") # save it in an array for later + shift # past argument + ;; + esac done set +u @@ -91,138 +175,495 @@ set -- "${POSITIONAL[@]}" # restore positional parameters CLUSTER_NAME="$1" set -u +export IMDS_TOKEN=$(imds token) + +KUBELET_VERSION=$(kubelet --version | grep -Eo '[0-9]\.[0-9]+\.[0-9]+') +log "INFO: Using kubelet version $KUBELET_VERSION" + +# ecr-credential-provider only implements credentialprovider.kubelet.k8s.io/v1alpha1 prior to 1.27.1: https://github.com/kubernetes/cloud-provider-aws/pull/597 +# TODO: remove this when 1.26 is EOL +if vercmp "$KUBELET_VERSION" lt "1.27.0"; then + IMAGE_CREDENTIAL_PROVIDER_CONFIG=/etc/eks/image-credential-provider/config.json + echo "$(jq '.apiVersion = "kubelet.config.k8s.io/v1alpha1"' $IMAGE_CREDENTIAL_PROVIDER_CONFIG)" > $IMAGE_CREDENTIAL_PROVIDER_CONFIG + echo "$(jq '.providers[].apiVersion = "credentialprovider.kubelet.k8s.io/v1alpha1"' $IMAGE_CREDENTIAL_PROVIDER_CONFIG)" > $IMAGE_CREDENTIAL_PROVIDER_CONFIG +fi + +# Set container runtime related variables +DOCKER_CONFIG_JSON="${DOCKER_CONFIG_JSON:-}" +ENABLE_DOCKER_BRIDGE="${ENABLE_DOCKER_BRIDGE:-false}" + +# As of Kubernetes version 1.24, we will start defaulting the container runtime to containerd +# and no longer support docker as a container runtime. +DEFAULT_CONTAINER_RUNTIME=dockerd +if vercmp "$KUBELET_VERSION" gteq "1.24.0"; then + DEFAULT_CONTAINER_RUNTIME=containerd +fi +CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-$DEFAULT_CONTAINER_RUNTIME}" + +log "INFO: Using $CONTAINER_RUNTIME as the container runtime" + +if vercmp "$KUBELET_VERSION" gteq "1.24.0" && [ $CONTAINER_RUNTIME != "containerd" ]; then + log "ERROR: containerd is the only supported container runtime as of Kubernetes version 1.24" + exit 1 +fi + USE_MAX_PODS="${USE_MAX_PODS:-true}" B64_CLUSTER_CA="${B64_CLUSTER_CA:-}" APISERVER_ENDPOINT="${APISERVER_ENDPOINT:-}" +SERVICE_IPV4_CIDR="${SERVICE_IPV4_CIDR:-}" +DNS_CLUSTER_IP="${DNS_CLUSTER_IP:-}" KUBELET_EXTRA_ARGS="${KUBELET_EXTRA_ARGS:-}" -ENABLE_DOCKER_BRIDGE="${ENABLE_DOCKER_BRIDGE:-false}" API_RETRY_ATTEMPTS="${API_RETRY_ATTEMPTS:-3}" -DOCKER_CONFIG_JSON="${DOCKER_CONFIG_JSON:-}" -PAUSE_CONTAINER_VERSION="${PAUSE_CONTAINER_VERSION:-3.1}" - -function get_pause_container_account_for_region () { - local region="$1" - case "${region}" in - ap-east-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-800184023465}";; - me-south-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-558608220178}";; - *) - echo "${PAUSE_CONTAINER_ACCOUNT:-602401143452}";; - esac +CONTAINERD_CONFIG_FILE="${CONTAINERD_CONFIG_FILE:-}" +PAUSE_CONTAINER_VERSION="${PAUSE_CONTAINER_VERSION:-3.5}" +IP_FAMILY="${IP_FAMILY:-}" +SERVICE_IPV6_CIDR="${SERVICE_IPV6_CIDR:-}" +ENABLE_LOCAL_OUTPOST="${ENABLE_LOCAL_OUTPOST:-}" +CLUSTER_ID="${CLUSTER_ID:-}" +LOCAL_DISKS="${LOCAL_DISKS:-}" + +##allow --reserved-cpus options via kubelet arg directly. Disable default reserved cgroup option in such cases +USE_RESERVED_CGROUPS=true +if [[ ${KUBELET_EXTRA_ARGS} == *'--reserved-cpus'* ]]; then + USE_RESERVED_CGROUPS=false + log "INFO: --kubelet-extra-args includes --reserved-cpus, so kube/system-reserved cgroups will not be used." +fi + +if [[ ! -z ${LOCAL_DISKS} ]]; then + setup-local-disks "${LOCAL_DISKS}" +fi + +MOUNT_BPF_FS="${MOUNT_BPF_FS:-true}" + +# Helper function which calculates the amount of the given resource (either CPU or memory) +# to reserve in a given resource range, specified by a start and end of the range and a percentage +# of the resource to reserve. Note that we return zero if the start of the resource range is +# greater than the total resource capacity on the node. Additionally, if the end range exceeds the total +# resource capacity of the node, we use the total resource capacity as the end of the range. +# Args: +# $1 total available resource on the worker node in input unit (either millicores for CPU or Mi for memory) +# $2 start of the resource range in input unit +# $3 end of the resource range in input unit +# $4 percentage of range to reserve in percent*100 (to allow for two decimal digits) +# Return: +# amount of resource to reserve in input unit +get_resource_to_reserve_in_range() { + local total_resource_on_instance=$1 + local start_range=$2 + local end_range=$3 + local percentage=$4 + resources_to_reserve="0" + if (($total_resource_on_instance > $start_range)); then + resources_to_reserve=$(((($total_resource_on_instance < $end_range ? $total_resource_on_instance : $end_range) - $start_range) * $percentage / 100 / 100)) + fi + echo $resources_to_reserve +} + +# Calculates the amount of memory to reserve for kubeReserved in mebibytes. KubeReserved is a function of pod +# density so we are calculating the amount of memory to reserve for Kubernetes systems daemons by +# considering the maximum number of pods this instance type supports. +# Args: +# $1 the max number of pods per instance type (MAX_PODS) based on values from /etc/eks/eni-max-pods.txt +# Return: +# memory to reserve in Mi for the kubelet +get_memory_mebibytes_to_reserve() { + local max_num_pods=$1 + memory_to_reserve=$((11 * $max_num_pods + 255)) + echo $memory_to_reserve +} + +# Calculates the amount of CPU to reserve for kubeReserved in millicores from the total number of vCPUs available on the instance. +# From the total core capacity of this worker node, we calculate the CPU resources to reserve by reserving a percentage +# of the available cores in each range up to the total number of cores available on the instance. +# We are using these CPU ranges from GKE (https://cloud.google.com/kubernetes-engine/docs/concepts/cluster-architecture#node_allocatable): +# 6% of the first core +# 1% of the next core (up to 2 cores) +# 0.5% of the next 2 cores (up to 4 cores) +# 0.25% of any cores above 4 cores +# Return: +# CPU resources to reserve in millicores (m) +get_cpu_millicores_to_reserve() { + local total_cpu_on_instance=$(($(nproc) * 1000)) + local cpu_ranges=(0 1000 2000 4000 $total_cpu_on_instance) + local cpu_percentage_reserved_for_ranges=(600 100 50 25) + cpu_to_reserve="0" + for i in "${!cpu_percentage_reserved_for_ranges[@]}"; do + local start_range=${cpu_ranges[$i]} + local end_range=${cpu_ranges[(($i + 1))]} + local percentage_to_reserve_for_range=${cpu_percentage_reserved_for_ranges[$i]} + cpu_to_reserve=$(($cpu_to_reserve + $(get_resource_to_reserve_in_range $total_cpu_on_instance $start_range $end_range $percentage_to_reserve_for_range))) + done + echo $cpu_to_reserve } if [ -z "$CLUSTER_NAME" ]; then - echo "CLUSTER_NAME is not defined" - exit 1 + log "ERROR: cluster name is not defined!" + exit 1 +fi + +if [[ ! -z "${IP_FAMILY}" ]]; then + IP_FAMILY="$(tr [A-Z] [a-z] <<< "$IP_FAMILY")" + if [[ "${IP_FAMILY}" != "ipv4" ]] && [[ "${IP_FAMILY}" != "ipv6" ]]; then + log "ERROR: Invalid --ip-family. Only ipv4 or ipv6 are allowed" + exit 1 + fi +fi + +if [[ ! -z "${SERVICE_IPV6_CIDR}" ]]; then + if [[ "${IP_FAMILY}" == "ipv4" ]]; then + log "ERROR: --ip-family should be ipv6 when --service-ipv6-cidr is specified" + exit 1 + fi + IP_FAMILY="ipv6" fi -ZONE=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone) -AWS_DEFAULT_REGION=$(echo $ZONE | awk '{print substr($0, 1, length($0)-1)}') +AWS_DEFAULT_REGION=$(imds 'latest/dynamic/instance-identity/document' | jq .region -r) +AWS_SERVICES_DOMAIN=$(imds 'latest/meta-data/services/domain') MACHINE=$(uname -m) -if [ "$MACHINE" == "x86_64" ]; then - ARCH="amd64" -elif [ "$MACHINE" == "aarch64" ]; then - ARCH="arm64" -else - echo "Unknown machine architecture '$MACHINE'" >&2 - exit 1 +if [[ "$MACHINE" != "x86_64" && "$MACHINE" != "aarch64" ]]; then + log "ERROR: Unknown machine architecture: '$MACHINE'" + exit 1 +fi + +if [ "$MOUNT_BPF_FS" = "true" ]; then + mount-bpf-fs fi +cp -v /etc/eks/configure-clocksource.service /etc/systemd/system/configure-clocksource.service +chown root:root /etc/systemd/system/configure-clocksource.service +systemctl daemon-reload +systemctl enable --now configure-clocksource + +ECR_URI=$(/etc/eks/get-ecr-uri.sh "${AWS_DEFAULT_REGION}" "${AWS_SERVICES_DOMAIN}" "${PAUSE_CONTAINER_ACCOUNT:-}") +PAUSE_CONTAINER_IMAGE=${PAUSE_CONTAINER_IMAGE:-$ECR_URI/eks/pause} +PAUSE_CONTAINER="$PAUSE_CONTAINER_IMAGE:$PAUSE_CONTAINER_VERSION" + ### kubelet kubeconfig CA_CERTIFICATE_DIRECTORY=/etc/kubernetes/pki CA_CERTIFICATE_FILE_PATH=$CA_CERTIFICATE_DIRECTORY/ca.crt mkdir -p $CA_CERTIFICATE_DIRECTORY -if [[ -z "${B64_CLUSTER_CA}" ]] && [[ -z "${APISERVER_ENDPOINT}" ]]; then - DESCRIBE_CLUSTER_RESULT="/tmp/describe_cluster_result.txt" +if [[ -z "${B64_CLUSTER_CA}" ]] || [[ -z "${APISERVER_ENDPOINT}" ]]; then + log "INFO: --b64-cluster-ca or --apiserver-endpoint is not defined, describing cluster..." + DESCRIBE_CLUSTER_RESULT="/tmp/describe_cluster_result.txt" + + # Retry the DescribeCluster API for API_RETRY_ATTEMPTS + for attempt in $(seq 0 $API_RETRY_ATTEMPTS); do rc=0 - # Retry the DescribleCluster API for API_RETRY_ATTEMPTS - for attempt in `seq 0 $API_RETRY_ATTEMPTS`; do - if [[ $attempt -gt 0 ]]; then - echo "Attempt $attempt of $API_RETRY_ATTEMPTS" - fi - - aws eks wait cluster-active \ - --region=${AWS_DEFAULT_REGION} \ - --name=${CLUSTER_NAME} - - aws eks describe-cluster \ - --region=${AWS_DEFAULT_REGION} \ - --name=${CLUSTER_NAME} \ - --output=text \ - --query 'cluster.{certificateAuthorityData: certificateAuthority.data, endpoint: endpoint}' > $DESCRIBE_CLUSTER_RESULT || rc=$? - if [[ $rc -eq 0 ]]; then - break - fi - if [[ $attempt -eq $API_RETRY_ATTEMPTS ]]; then - exit $rc - fi - jitter=$((1 + RANDOM % 10)) - sleep_sec="$(( $(( 5 << $((1+$attempt)) )) + $jitter))" - sleep $sleep_sec - done - B64_CLUSTER_CA=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $1}') - APISERVER_ENDPOINT=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $2}') -fi - -echo $B64_CLUSTER_CA | base64 -d > $CA_CERTIFICATE_FILE_PATH - -sed -i s,CLUSTER_NAME,$CLUSTER_NAME,g /var/lib/kubelet/kubeconfig + if [[ $attempt -gt 0 ]]; then + log "INFO: Attempt $attempt of $API_RETRY_ATTEMPTS" + fi + + aws eks wait cluster-active \ + --region=${AWS_DEFAULT_REGION} \ + --name=${CLUSTER_NAME} + + aws eks describe-cluster \ + --region=${AWS_DEFAULT_REGION} \ + --name=${CLUSTER_NAME} \ + --output=text \ + --query 'cluster.{certificateAuthorityData: certificateAuthority.data, endpoint: endpoint, serviceIpv4Cidr: kubernetesNetworkConfig.serviceIpv4Cidr, serviceIpv6Cidr: kubernetesNetworkConfig.serviceIpv6Cidr, clusterIpFamily: kubernetesNetworkConfig.ipFamily, outpostArn: outpostConfig.outpostArns[0], id: id}' > $DESCRIBE_CLUSTER_RESULT || rc=$? + if [[ $rc -eq 0 ]]; then + break + fi + if [[ $attempt -eq $API_RETRY_ATTEMPTS ]]; then + log "ERROR: Exhausted retries while describing cluster!" + exit $rc + fi + jitter=$((1 + RANDOM % 10)) + sleep_sec="$(($((5 << $((1 + $attempt)))) + $jitter))" + sleep $sleep_sec + done + B64_CLUSTER_CA=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $1}') + APISERVER_ENDPOINT=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $3}') + CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $4}') + OUTPOST_ARN=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $5}') + SERVICE_IPV4_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $6}') + SERVICE_IPV6_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $7}') + + if [[ -z "${IP_FAMILY}" ]]; then + IP_FAMILY=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $2}') + fi + + # Automatically detect local cluster in outpost + if [[ -z "${OUTPOST_ARN}" ]] || [[ "${OUTPOST_ARN}" == "None" ]]; then + IS_LOCAL_OUTPOST_DETECTED=false + else + IS_LOCAL_OUTPOST_DETECTED=true + fi + + # If the cluster id is returned from describe cluster, let us use it no matter whether cluster id is passed from option + if [[ ! -z "${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT}" ]] && [[ "${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT}" != "None" ]]; then + CLUSTER_ID=${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT} + fi +fi + +if [[ -z "${IP_FAMILY}" ]] || [[ "${IP_FAMILY}" == "None" ]]; then + ### this can happen when the ipFamily field is not found in describeCluster response + ### or B64_CLUSTER_CA and APISERVER_ENDPOINT are defined but IPFamily isn't + IP_FAMILY="ipv4" +fi + +log "INFO: Using IP family: ${IP_FAMILY}" + +echo "$B64_CLUSTER_CA" | base64 -d > $CA_CERTIFICATE_FILE_PATH + sed -i s,MASTER_ENDPOINT,$APISERVER_ENDPOINT,g /var/lib/kubelet/kubeconfig +sed -i s,AWS_REGION,$AWS_DEFAULT_REGION,g /var/lib/kubelet/kubeconfig + +if [[ -z "$ENABLE_LOCAL_OUTPOST" ]]; then + # Only when "--enable-local-outpost" option is not set explicity on calling bootstrap.sh, it will be assigned with + # - the result of auto-detectection through describe-cluster + # - or "false" when describe-cluster is bypassed. + # This also means if "--enable-local-outpost" option is set explicity, it will override auto-detection result + ENABLE_LOCAL_OUTPOST="${IS_LOCAL_OUTPOST_DETECTED:-false}" +fi + +### To support worker nodes to continue to communicate and connect to local cluster even when the Outpost +### is disconnected from the parent AWS Region, the following specific setup are required: +### - append entries to /etc/hosts with the mappings of control plane host IP address and API server +### domain name. So that the domain name can be resolved to IP addresses locally. +### - use aws-iam-authenticator as bootstrap auth for kubelet TLS bootstrapping which downloads client +### X.509 certificate and generate kubelet kubeconfig file which uses the client cert. So that the +### worker node can be authentiacated through X.509 certificate which works for both connected and +#### disconnected state. +if [[ "${ENABLE_LOCAL_OUTPOST}" == "true" ]]; then + ### append to /etc/hosts file with shuffled mappings of "IP address to API server domain name" + DOMAIN_NAME=$(echo "$APISERVER_ENDPOINT" | awk -F/ '{print $3}' | awk -F: '{print $1}') + getent hosts "$DOMAIN_NAME" | shuf >> /etc/hosts + + ### kubelet bootstrap kubeconfig uses aws-iam-authenticator with cluster id to authenticate to cluster + ### - if "aws eks describe-cluster" is bypassed, for local outpost, the value of CLUSTER_NAME parameter will be cluster id. + ### - otherwise, the cluster id will use the id returned by "aws eks describe-cluster". + if [[ -z "${CLUSTER_ID}" ]]; then + log "ERROR: Cluster ID is required when local outpost support is enabled" + exit 1 + else + sed -i s,CLUSTER_NAME,$CLUSTER_ID,g /var/lib/kubelet/kubeconfig + + ### use aws-iam-authenticator as bootstrap auth and download X.509 cert used in kubelet kubeconfig + mv /var/lib/kubelet/kubeconfig /var/lib/kubelet/bootstrap-kubeconfig + KUBELET_EXTRA_ARGS="--bootstrap-kubeconfig /var/lib/kubelet/bootstrap-kubeconfig $KUBELET_EXTRA_ARGS" + fi +else + sed -i s,CLUSTER_NAME,$CLUSTER_NAME,g /var/lib/kubelet/kubeconfig +fi + ### kubelet.service configuration -MAC=$(curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/ -s | head -n 1 | sed 's/\/$//') -TEN_RANGE=$(curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/$MAC/vpc-ipv4-cidr-blocks | grep -c '^10\..*' || true ) -DNS_CLUSTER_IP=10.100.0.10 -if [[ "$TEN_RANGE" != "0" ]] ; then - DNS_CLUSTER_IP=172.20.0.10; +MAC=$(imds 'latest/meta-data/mac') + +if [[ -z "${DNS_CLUSTER_IP}" ]]; then + if [[ "${IP_FAMILY}" == "ipv6" ]]; then + if [[ -z "${SERVICE_IPV6_CIDR}" ]]; then + log "ERROR: One of --service-ipv6-cidr or --dns-cluster-ip must be provided when --ip-family is ipv6" + exit 1 + fi + DNS_CLUSTER_IP=$(awk -F/ '{print $1}' <<< $SERVICE_IPV6_CIDR)a + fi + + if [[ "${IP_FAMILY}" == "ipv4" ]]; then + if [[ ! -z "${SERVICE_IPV4_CIDR}" ]] && [[ "${SERVICE_IPV4_CIDR}" != "None" ]]; then + #Sets the DNS Cluster IP address that would be chosen from the serviceIpv4Cidr. (x.y.z.10) + DNS_CLUSTER_IP=${SERVICE_IPV4_CIDR%.*}.10 + else + TEN_RANGE=$(imds "latest/meta-data/network/interfaces/macs/$MAC/vpc-ipv4-cidr-blocks" | grep -c '^10\..*' || true) + DNS_CLUSTER_IP=10.100.0.10 + if [[ "$TEN_RANGE" != "0" ]]; then + DNS_CLUSTER_IP=172.20.0.10 + fi + fi + fi +else + DNS_CLUSTER_IP="${DNS_CLUSTER_IP}" fi KUBELET_CONFIG=/etc/kubernetes/kubelet/kubelet-config.json echo "$(jq ".clusterDNS=[\"$DNS_CLUSTER_IP\"]" $KUBELET_CONFIG)" > $KUBELET_CONFIG -INTERNAL_IP=$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4) -INSTANCE_TYPE=$(curl -s http://169.254.169.254/latest/meta-data/instance-type) +if [[ "${IP_FAMILY}" == "ipv4" ]]; then + INTERNAL_IP=$(imds 'latest/meta-data/local-ipv4') +else + INTERNAL_IP_URI=latest/meta-data/network/interfaces/macs/$MAC/ipv6s + INTERNAL_IP=$(imds $INTERNAL_IP_URI) +fi +INSTANCE_TYPE=$(imds 'latest/meta-data/instance-type') -if [[ "$USE_MAX_PODS" = "true" ]]; then - MAX_PODS_FILE="/etc/eks/eni-max-pods.txt" - set +o pipefail - MAX_PODS=$(grep ^$INSTANCE_TYPE $MAX_PODS_FILE | awk '{print $2}') - set -o pipefail - if [[ -n "$MAX_PODS" ]]; then - echo "$(jq ".maxPods=$MAX_PODS" $KUBELET_CONFIG)" > $KUBELET_CONFIG - else - echo "No entry for $INSTANCE_TYPE in $MAX_PODS_FILE. Not setting max pods for kubelet" - fi +if vercmp "$KUBELET_VERSION" gteq "1.22.0" && vercmp "$KUBELET_VERSION" lt "1.27.0"; then + # for K8s versions that suport API Priority & Fairness, increase our API server QPS + # in 1.27, the default is already increased to 50/100, so use the higher defaults + echo $(jq ".kubeAPIQPS=( .kubeAPIQPS // 10)|.kubeAPIBurst=( .kubeAPIBurst // 20)" $KUBELET_CONFIG) > $KUBELET_CONFIG fi -cat < /etc/systemd/system/kubelet.service.d/10-kubelet-args.conf -[Service] -Environment='KUBELET_ARGS=--node-ip=$INTERNAL_IP --pod-infra-container-image=$(get_pause_container_account_for_region "${AWS_DEFAULT_REGION}").dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/eks/pause-${ARCH}:$PAUSE_CONTAINER_VERSION' -EOF +# Sets kubeReserved and evictionHard in /etc/kubernetes/kubelet/kubelet-config.json for worker nodes. The following two function +# calls calculate the CPU and memory resources to reserve for kubeReserved based on the instance type of the worker node. +# Note that allocatable memory and CPU resources on worker nodes is calculated by the Kubernetes scheduler +# with this formula when scheduling pods: Allocatable = Capacity - Reserved - Eviction Threshold. -if [[ -n "$KUBELET_EXTRA_ARGS" ]]; then - cat < /etc/systemd/system/kubelet.service.d/30-kubelet-extra-args.conf -[Service] -Environment='KUBELET_EXTRA_ARGS=$KUBELET_EXTRA_ARGS' -EOF +#calculate the max number of pods per instance type +MAX_PODS_FILE="/etc/eks/eni-max-pods.txt" +set +o pipefail +MAX_PODS=$(cat $MAX_PODS_FILE | awk "/^${INSTANCE_TYPE:-unset}/"' { print $2 }') +set -o pipefail +if [ -z "$MAX_PODS" ] || [ -z "$INSTANCE_TYPE" ]; then + log "INFO: No entry for type '$INSTANCE_TYPE' in $MAX_PODS_FILE. Will attempt to auto-discover value." + # When determining the value of maxPods, we're using the legacy calculation by default since it's more restrictive than + # the PrefixDelegation based alternative and is likely to be in-use by more customers. + # The legacy numbers also maintain backwards compatibility when used to calculate `kubeReserved.memory` + MAX_PODS=$(/etc/eks/max-pods-calculator.sh --instance-type-from-imds --cni-version 1.10.0 --show-max-allowed) fi -# Replace with custom docker config contents. -if [[ -n "$DOCKER_CONFIG_JSON" ]]; then - echo "$DOCKER_CONFIG_JSON" > /etc/docker/daemon.json - systemctl restart docker +# calculates the amount of each resource to reserve +mebibytes_to_reserve=$(get_memory_mebibytes_to_reserve $MAX_PODS) +cpu_millicores_to_reserve=$(get_cpu_millicores_to_reserve) +# writes kubeReserved and evictionHard to the kubelet-config using the amount of CPU and memory to be reserved +echo "$(jq '. += {"evictionHard": {"memory.available": "100Mi", "nodefs.available": "10%", "nodefs.inodesFree": "5%"}}' $KUBELET_CONFIG)" > $KUBELET_CONFIG +echo "$(jq --arg mebibytes_to_reserve "${mebibytes_to_reserve}Mi" --arg cpu_millicores_to_reserve "${cpu_millicores_to_reserve}m" \ + '. += {kubeReserved: {"cpu": $cpu_millicores_to_reserve, "ephemeral-storage": "1Gi", "memory": $mebibytes_to_reserve}}' $KUBELET_CONFIG)" > $KUBELET_CONFIG + +if [[ "$USE_MAX_PODS" = "true" ]]; then + echo "$(jq ".maxPods=$MAX_PODS" $KUBELET_CONFIG)" > $KUBELET_CONFIG +fi + +KUBELET_ARGS="--node-ip=$INTERNAL_IP --pod-infra-container-image=$PAUSE_CONTAINER --v=2" + +if vercmp "$KUBELET_VERSION" lt "1.26.0"; then + # TODO: remove this when 1.25 is EOL + KUBELET_CLOUD_PROVIDER="aws" +else + KUBELET_CLOUD_PROVIDER="external" + echo "$(jq ".providerID=\"$(provider-id)\"" $KUBELET_CONFIG)" > $KUBELET_CONFIG + # When the external cloud provider is used, kubelet will use /etc/hostname as the name of the Node object. + # If the VPC has a custom `domain-name` in its DHCP options set, and the VPC has `enableDnsHostnames` set to `true`, + # then /etc/hostname is not the same as EC2's PrivateDnsName. + # The name of the Node object must be equal to EC2's PrivateDnsName for the aws-iam-authenticator to allow this kubelet to manage it. + KUBELET_ARGS="$KUBELET_ARGS --hostname-override=$(private-dns-name)" fi -if [[ "$ENABLE_DOCKER_BRIDGE" = "true" ]]; then +KUBELET_ARGS="$KUBELET_ARGS --cloud-provider=$KUBELET_CLOUD_PROVIDER" + +mkdir -p /etc/systemd/system + +if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then + if $ENABLE_DOCKER_BRIDGE; then + log "WARNING: Flag --enable-docker-bridge was set but will be ignored as it's not relevant to containerd" + fi + + if [ ! -z "$DOCKER_CONFIG_JSON" ]; then + log "WARNING: Flag --docker-config-json was set but will be ignored as it's not relevant to containerd" + fi + + sudo mkdir -p /etc/containerd + sudo mkdir -p /etc/cni/net.d + + if [[ -n "${CONTAINERD_CONFIG_FILE}" ]]; then + sudo cp -v "${CONTAINERD_CONFIG_FILE}" /etc/eks/containerd/containerd-config.toml + fi + + sudo sed -i s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g /etc/eks/containerd/containerd-config.toml + + echo "$(jq '.cgroupDriver="systemd"' "${KUBELET_CONFIG}")" > "${KUBELET_CONFIG}" + ##allow --reserved-cpus options via kubelet arg directly. Disable default reserved cgroup option in such cases + if [[ "${USE_RESERVED_CGROUPS}" = true ]]; then + echo "$(jq '.systemReservedCgroup="/system"' "${KUBELET_CONFIG}")" > "${KUBELET_CONFIG}" + echo "$(jq '.kubeReservedCgroup="/runtime"' "${KUBELET_CONFIG}")" > "${KUBELET_CONFIG}" + fi + + # Check if the containerd config file is the same as the one used in the image build. + # If different, then restart containerd w/ proper config + if ! cmp -s /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml; then + sudo cp -v /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml + sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service + sudo chown root:root /etc/systemd/system/sandbox-image.service + systemctl daemon-reload + systemctl enable containerd sandbox-image + systemctl restart sandbox-image containerd + fi + sudo cp -v /etc/eks/containerd/kubelet-containerd.service /etc/systemd/system/kubelet.service + sudo chown root:root /etc/systemd/system/kubelet.service + # Validate containerd config + sudo containerd config dump > /dev/null + + # --container-runtime flag is gone in 1.27+ + # TODO: remove this when 1.26 is EOL + if vercmp "$KUBELET_VERSION" lt "1.27.0"; then + KUBELET_ARGS="$KUBELET_ARGS --container-runtime=remote" + fi +elif [[ "$CONTAINER_RUNTIME" = "dockerd" ]]; then + mkdir -p /etc/docker + bash -c "/sbin/iptables-save > /etc/sysconfig/iptables" + cp -v /etc/eks/iptables-restore.service /etc/systemd/system/iptables-restore.service + sudo chown root:root /etc/systemd/system/iptables-restore.service + systemctl daemon-reload + systemctl enable iptables-restore + + if [[ -n "$DOCKER_CONFIG_JSON" ]]; then + echo "$DOCKER_CONFIG_JSON" > /etc/docker/daemon.json + fi + if [[ "$ENABLE_DOCKER_BRIDGE" = "true" ]]; then # Enabling the docker bridge network. We have to disable live-restore as it # prevents docker from recreating the default bridge network on restart echo "$(jq '.bridge="docker0" | ."live-restore"=false' /etc/docker/daemon.json)" > /etc/docker/daemon.json - systemctl restart docker + fi + systemctl daemon-reload + systemctl enable docker + systemctl restart docker +else + log "ERROR: unsupported container runtime: '${CONTAINER_RUNTIME}'" + exit 1 +fi + +mkdir -p /etc/systemd/system/kubelet.service.d + +cat << EOF > /etc/systemd/system/kubelet.service.d/10-kubelet-args.conf +[Service] +Environment='KUBELET_ARGS=$KUBELET_ARGS' +EOF + +if [[ -n "$KUBELET_EXTRA_ARGS" ]]; then + cat << EOF > /etc/systemd/system/kubelet.service.d/30-kubelet-extra-args.conf +[Service] +Environment='KUBELET_EXTRA_ARGS=$KUBELET_EXTRA_ARGS' +EOF fi systemctl daemon-reload systemctl enable kubelet systemctl start kubelet + +# gpu boost clock +if command -v nvidia-smi &> /dev/null; then + log "INFO: nvidia-smi found" + + nvidia-smi -q > /tmp/nvidia-smi-check + if [[ "$?" == "0" ]]; then + sudo nvidia-smi -pm 1 # set persistence mode + sudo nvidia-smi --auto-boost-default=0 + + GPUNAME=$(nvidia-smi -L | head -n1) + log "INFO: GPU name: $GPUNAME" + + # set application clock to maximum + if [[ $GPUNAME == *"A100"* ]]; then + nvidia-smi -ac 1215,1410 + elif [[ $GPUNAME == *"V100"* ]]; then + nvidia-smi -ac 877,1530 + elif [[ $GPUNAME == *"K80"* ]]; then + nvidia-smi -ac 2505,875 + elif [[ $GPUNAME == *"T4"* ]]; then + nvidia-smi -ac 5001,1590 + elif [[ $GPUNAME == *"M60"* ]]; then + nvidia-smi -ac 2505,1177 + elif [[ $GPUNAME == *"H100"* ]]; then + nvidia-smi -ac 2619,1980 + else + echo "unsupported gpu" + fi + else + log "ERROR: nvidia-smi check failed!" + cat /tmp/nvidia-smi-check + fi +fi + +log "INFO: complete!" diff --git a/files/configure-clocksource.service b/files/configure-clocksource.service new file mode 100644 index 000000000..5274ca041 --- /dev/null +++ b/files/configure-clocksource.service @@ -0,0 +1,8 @@ +[Unit] +Description=Configure kernel clocksource + +[Service] +ExecStart=/usr/bin/configure-clocksource + +[Install] +WantedBy=multi-user.target diff --git a/files/containerd-config.toml b/files/containerd-config.toml new file mode 100644 index 000000000..42458568f --- /dev/null +++ b/files/containerd-config.toml @@ -0,0 +1,26 @@ +version = 2 +root = "/var/lib/containerd" +state = "/run/containerd" + +[grpc] +address = "/run/containerd/containerd.sock" + +[plugins."io.containerd.grpc.v1.cri".containerd] +default_runtime_name = "runc" +discard_unpacked_layers = true + +[plugins."io.containerd.grpc.v1.cri"] +sandbox_image = "SANDBOX_IMAGE" + +[plugins."io.containerd.grpc.v1.cri".registry] +config_path = "/etc/containerd/certs.d:/etc/docker/certs.d" + +[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] +runtime_type = "io.containerd.runc.v2" + +[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] +SystemdCgroup = true + +[plugins."io.containerd.grpc.v1.cri".cni] +bin_dir = "/opt/cni/bin" +conf_dir = "/etc/cni/net.d" diff --git a/files/docker-daemon.json b/files/docker-daemon.json index 55e395721..cf5459f51 100644 --- a/files/docker-daemon.json +++ b/files/docker-daemon.json @@ -6,5 +6,12 @@ "max-file": "10" }, "live-restore": true, - "max-concurrent-downloads": 10 + "max-concurrent-downloads": 10, + "default-ulimits": { + "memlock": { + "Hard": -1, + "Name": "memlock", + "Soft": -1 + } + } } diff --git a/files/ecr-credential-provider-config.json b/files/ecr-credential-provider-config.json new file mode 100644 index 000000000..6b251d69c --- /dev/null +++ b/files/ecr-credential-provider-config.json @@ -0,0 +1,18 @@ +{ + "apiVersion": "kubelet.config.k8s.io/v1", + "kind": "CredentialProviderConfig", + "providers": [ + { + "name": "ecr-credential-provider", + "matchImages": [ + "*.dkr.ecr.*.amazonaws.com", + "*.dkr.ecr.*.amazonaws.com.cn", + "*.dkr.ecr-fips.*.amazonaws.com", + "*.dkr.ecr.*.c2s.ic.gov", + "*.dkr.ecr.*.sc2s.sgov.gov" + ], + "defaultCacheDuration": "12h", + "apiVersion": "credentialprovider.kubelet.k8s.io/v1" + } + ] +} diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 7ad015812..70f9a59bc 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -1,228 +1,820 @@ -# Mapping is calculated from AWS ENI documentation, with the following modifications: +# Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may +# not use this file except in compliance with the License. A copy of the +# License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is distributed +# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# The regions queried were: +# - ap-northeast-1 +# - ap-northeast-2 +# - ap-northeast-3 +# - ap-south-1 +# - ap-southeast-1 +# - ap-southeast-2 +# - ca-central-1 +# - eu-central-1 +# - eu-north-1 +# - eu-west-1 +# - eu-west-2 +# - eu-west-3 +# - sa-east-1 +# - us-east-1 +# - us-east-2 +# - us-west-1 +# - us-west-2 +# +# Mapping is calculated from AWS EC2 API using the following formula: # * First IP on each ENI is not used for pods -# * 2 additional host-networking pods (AWS ENI and kube-proxy) are accounted for +# * +2 for the pods that use host-networking (AWS CNI and kube-proxy) # -# # of ENI * (# of IPv4 per ENI - 1) + 2 +# # of ENI * (# of IPv4 per ENI - 1) + 2 # # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-eni.html#AvailableIpPerENI # -# If f1.16xlarge, g3.16xlarge, h1.16xlarge, i3.16xlarge, and r4.16xlarge -# instances use more than 31 IPv4 or IPv6 addresses per interface, they cannot -# access the instance metadata, VPC DNS, and Time Sync services from the 32nd IP -# address onwards. If access to these services is needed from all IP addresses -# on the interface, we recommend using a maximum of 31 IP addresses per interface. -a1.medium 8 -a1.large 29 -a1.xlarge 58 a1.2xlarge 58 a1.4xlarge 234 +a1.large 29 +a1.medium 8 +a1.metal 234 +a1.xlarge 58 +bmn-sf1.metal 737 c1.medium 12 c1.xlarge 58 -c3.large 29 -c3.xlarge 58 c3.2xlarge 58 c3.4xlarge 234 c3.8xlarge 234 -c4.large 29 -c4.xlarge 58 +c3.large 29 +c3.xlarge 58 c4.2xlarge 58 c4.4xlarge 234 c4.8xlarge 234 -c5.large 29 -c5.xlarge 58 -c5.2xlarge 58 -c5.4xlarge 234 -c5.9xlarge 234 +c4.large 29 +c4.xlarge 58 c5.12xlarge 234 c5.18xlarge 737 c5.24xlarge 737 +c5.2xlarge 58 +c5.4xlarge 234 +c5.9xlarge 234 +c5.large 29 c5.metal 737 -c5d.large 29 -c5d.xlarge 58 +c5.xlarge 58 +c5a.12xlarge 234 +c5a.16xlarge 737 +c5a.24xlarge 737 +c5a.2xlarge 58 +c5a.4xlarge 234 +c5a.8xlarge 234 +c5a.large 29 +c5a.metal 737 +c5a.xlarge 58 +c5ad.12xlarge 234 +c5ad.16xlarge 737 +c5ad.24xlarge 737 +c5ad.2xlarge 58 +c5ad.4xlarge 234 +c5ad.8xlarge 234 +c5ad.large 29 +c5ad.metal 737 +c5ad.xlarge 58 +c5d.12xlarge 234 +c5d.18xlarge 737 +c5d.24xlarge 737 c5d.2xlarge 58 c5d.4xlarge 234 c5d.9xlarge 234 -c5d.18xlarge 737 -c5n.large 29 -c5n.xlarge 58 +c5d.large 29 +c5d.metal 737 +c5d.xlarge 58 +c5n.18xlarge 737 c5n.2xlarge 58 c5n.4xlarge 234 c5n.9xlarge 234 -c5n.18xlarge 737 -cc2.8xlarge 234 +c5n.large 29 +c5n.metal 737 +c5n.xlarge 58 +c6a.12xlarge 234 +c6a.16xlarge 737 +c6a.24xlarge 737 +c6a.2xlarge 58 +c6a.32xlarge 737 +c6a.48xlarge 737 +c6a.4xlarge 234 +c6a.8xlarge 234 +c6a.large 29 +c6a.metal 737 +c6a.xlarge 58 +c6g.12xlarge 234 +c6g.16xlarge 737 +c6g.2xlarge 58 +c6g.4xlarge 234 +c6g.8xlarge 234 +c6g.large 29 +c6g.medium 8 +c6g.metal 737 +c6g.xlarge 58 +c6gd.12xlarge 234 +c6gd.16xlarge 737 +c6gd.2xlarge 58 +c6gd.4xlarge 234 +c6gd.8xlarge 234 +c6gd.large 29 +c6gd.medium 8 +c6gd.metal 737 +c6gd.xlarge 58 +c6gn.12xlarge 234 +c6gn.16xlarge 737 +c6gn.2xlarge 58 +c6gn.4xlarge 234 +c6gn.8xlarge 234 +c6gn.large 29 +c6gn.medium 8 +c6gn.xlarge 58 +c6i.12xlarge 234 +c6i.16xlarge 737 +c6i.24xlarge 737 +c6i.2xlarge 58 +c6i.32xlarge 737 +c6i.4xlarge 234 +c6i.8xlarge 234 +c6i.large 29 +c6i.metal 737 +c6i.xlarge 58 +c6id.12xlarge 234 +c6id.16xlarge 737 +c6id.24xlarge 737 +c6id.2xlarge 58 +c6id.32xlarge 737 +c6id.4xlarge 234 +c6id.8xlarge 234 +c6id.large 29 +c6id.metal 737 +c6id.xlarge 58 +c6in.12xlarge 234 +c6in.16xlarge 737 +c6in.24xlarge 737 +c6in.2xlarge 58 +c6in.32xlarge 345 +c6in.4xlarge 234 +c6in.8xlarge 234 +c6in.large 29 +c6in.metal 345 +c6in.xlarge 58 +c7a.12xlarge 234 +c7a.16xlarge 737 +c7a.24xlarge 737 +c7a.2xlarge 58 +c7a.32xlarge 737 +c7a.48xlarge 737 +c7a.4xlarge 234 +c7a.8xlarge 234 +c7a.large 29 +c7a.medium 8 +c7a.metal-48xl 737 +c7a.xlarge 58 +c7g.12xlarge 234 +c7g.16xlarge 737 +c7g.2xlarge 58 +c7g.4xlarge 234 +c7g.8xlarge 234 +c7g.large 29 +c7g.medium 8 +c7g.metal 737 +c7g.xlarge 58 +c7gd.12xlarge 234 +c7gd.16xlarge 737 +c7gd.2xlarge 58 +c7gd.4xlarge 234 +c7gd.8xlarge 234 +c7gd.large 29 +c7gd.medium 8 +c7gd.xlarge 58 +c7gn.12xlarge 234 +c7gn.16xlarge 737 +c7gn.2xlarge 58 +c7gn.4xlarge 234 +c7gn.8xlarge 234 +c7gn.large 29 +c7gn.medium 8 +c7gn.xlarge 58 +c7i.12xlarge 234 +c7i.16xlarge 737 +c7i.24xlarge 737 +c7i.2xlarge 58 +c7i.48xlarge 737 +c7i.4xlarge 234 +c7i.8xlarge 234 +c7i.large 29 +c7i.metal-24xl 737 +c7i.metal-48xl 737 +c7i.xlarge 58 cr1.8xlarge 234 -d2.xlarge 58 d2.2xlarge 58 d2.4xlarge 234 d2.8xlarge 234 +d2.xlarge 58 +d3.2xlarge 18 +d3.4xlarge 38 +d3.8xlarge 59 +d3.xlarge 10 +d3en.12xlarge 89 +d3en.2xlarge 18 +d3en.4xlarge 38 +d3en.6xlarge 58 +d3en.8xlarge 78 +d3en.xlarge 10 +dl1.24xlarge 737 +dl2q.24xlarge 737 +f1.16xlarge 394 f1.2xlarge 58 f1.4xlarge 234 -f1.16xlarge 242 -g2.2xlarge 58 -g2.8xlarge 234 -g3s.xlarge 58 +g3.16xlarge 737 g3.4xlarge 234 g3.8xlarge 234 -g3.16xlarge 452 +g3s.xlarge 58 +g4ad.16xlarge 234 +g4ad.2xlarge 8 +g4ad.4xlarge 29 +g4ad.8xlarge 58 +g4ad.xlarge 8 +g4dn.12xlarge 234 +g4dn.16xlarge 58 +g4dn.2xlarge 29 +g4dn.4xlarge 29 +g4dn.8xlarge 58 +g4dn.metal 737 +g4dn.xlarge 29 +g5.12xlarge 737 +g5.16xlarge 234 +g5.24xlarge 737 +g5.2xlarge 58 +g5.48xlarge 345 +g5.4xlarge 234 +g5.8xlarge 234 +g5.xlarge 58 +g5g.16xlarge 737 +g5g.2xlarge 58 +g5g.4xlarge 234 +g5g.8xlarge 234 +g5g.metal 737 +g5g.xlarge 58 +h1.16xlarge 737 h1.2xlarge 58 h1.4xlarge 234 h1.8xlarge 234 -h1.16xlarge 452 +hpc6a.48xlarge 100 +hpc6id.32xlarge 51 +hpc7a.12xlarge 100 +hpc7a.24xlarge 100 +hpc7a.48xlarge 100 +hpc7a.96xlarge 100 +hpc7g.16xlarge 198 +hpc7g.4xlarge 198 +hpc7g.8xlarge 198 hs1.8xlarge 234 -i2.xlarge 58 i2.2xlarge 58 i2.4xlarge 234 i2.8xlarge 234 -i3.large 29 -i3.xlarge 58 +i2.xlarge 58 +i3.16xlarge 737 i3.2xlarge 58 i3.4xlarge 234 i3.8xlarge 234 -i3.16xlarge 452 +i3.large 29 i3.metal 737 -i3en.large 29 -i3en.xlarge 58 +i3.xlarge 58 +i3en.12xlarge 234 +i3en.24xlarge 737 i3en.2xlarge 58 i3en.3xlarge 58 i3en.6xlarge 234 -i3en.12xlarge 234 -i3en.24xlarge 737 -m1.small 8 -m1.medium 12 +i3en.large 29 +i3en.metal 737 +i3en.xlarge 58 +i4g.16xlarge 737 +i4g.2xlarge 58 +i4g.4xlarge 234 +i4g.8xlarge 234 +i4g.large 29 +i4g.xlarge 58 +i4i.12xlarge 234 +i4i.16xlarge 737 +i4i.24xlarge 437 +i4i.2xlarge 58 +i4i.32xlarge 737 +i4i.4xlarge 234 +i4i.8xlarge 234 +i4i.large 29 +i4i.metal 737 +i4i.xlarge 58 +im4gn.16xlarge 737 +im4gn.2xlarge 58 +im4gn.4xlarge 234 +im4gn.8xlarge 234 +im4gn.large 29 +im4gn.xlarge 58 +inf1.24xlarge 321 +inf1.2xlarge 38 +inf1.6xlarge 234 +inf1.xlarge 38 +inf2.24xlarge 737 +inf2.48xlarge 737 +inf2.8xlarge 234 +inf2.xlarge 58 +is4gen.2xlarge 58 +is4gen.4xlarge 234 +is4gen.8xlarge 234 +is4gen.large 29 +is4gen.medium 8 +is4gen.xlarge 58 m1.large 29 +m1.medium 12 +m1.small 8 m1.xlarge 58 -m2.xlarge 58 m2.2xlarge 118 m2.4xlarge 234 -m3.medium 12 +m2.xlarge 58 +m3.2xlarge 118 m3.large 29 +m3.medium 12 m3.xlarge 58 -m3.2xlarge 118 -m4.large 20 -m4.xlarge 58 -m4.2xlarge 58 -m4.4xlarge 234 m4.10xlarge 234 m4.16xlarge 234 -m5.large 29 -m5.xlarge 58 -m5.2xlarge 58 -m5.4xlarge 234 -m5.8xlarge 234 +m4.2xlarge 58 +m4.4xlarge 234 +m4.large 20 +m4.xlarge 58 m5.12xlarge 234 m5.16xlarge 737 m5.24xlarge 737 +m5.2xlarge 58 +m5.4xlarge 234 +m5.8xlarge 234 +m5.large 29 m5.metal 737 -m5a.large 29 -m5a.xlarge 58 -m5a.2xlarge 58 -m5a.4xlarge 234 -m5a.8xlarge 234 +m5.xlarge 58 m5a.12xlarge 234 m5a.16xlarge 737 m5a.24xlarge 737 -m5ad.large 29 -m5ad.xlarge 58 -m5ad.2xlarge 58 -m5ad.4xlarge 234 +m5a.2xlarge 58 +m5a.4xlarge 234 +m5a.8xlarge 234 +m5a.large 29 +m5a.xlarge 58 m5ad.12xlarge 234 +m5ad.16xlarge 737 m5ad.24xlarge 737 -m5d.large 29 -m5d.xlarge 58 -m5d.2xlarge 58 -m5d.4xlarge 234 -m5d.8xlarge 234 +m5ad.2xlarge 58 +m5ad.4xlarge 234 +m5ad.8xlarge 234 +m5ad.large 29 +m5ad.xlarge 58 m5d.12xlarge 234 m5d.16xlarge 737 m5d.24xlarge 737 +m5d.2xlarge 58 +m5d.4xlarge 234 +m5d.8xlarge 234 +m5d.large 29 m5d.metal 737 -p2.xlarge 58 -p2.8xlarge 234 +m5d.xlarge 58 +m5dn.12xlarge 234 +m5dn.16xlarge 737 +m5dn.24xlarge 737 +m5dn.2xlarge 58 +m5dn.4xlarge 234 +m5dn.8xlarge 234 +m5dn.large 29 +m5dn.metal 737 +m5dn.xlarge 58 +m5n.12xlarge 234 +m5n.16xlarge 737 +m5n.24xlarge 737 +m5n.2xlarge 58 +m5n.4xlarge 234 +m5n.8xlarge 234 +m5n.large 29 +m5n.metal 737 +m5n.xlarge 58 +m5zn.12xlarge 737 +m5zn.2xlarge 58 +m5zn.3xlarge 234 +m5zn.6xlarge 234 +m5zn.large 29 +m5zn.metal 737 +m5zn.xlarge 58 +m6a.12xlarge 234 +m6a.16xlarge 737 +m6a.24xlarge 737 +m6a.2xlarge 58 +m6a.32xlarge 737 +m6a.48xlarge 737 +m6a.4xlarge 234 +m6a.8xlarge 234 +m6a.large 29 +m6a.metal 737 +m6a.xlarge 58 +m6g.12xlarge 234 +m6g.16xlarge 737 +m6g.2xlarge 58 +m6g.4xlarge 234 +m6g.8xlarge 234 +m6g.large 29 +m6g.medium 8 +m6g.metal 737 +m6g.xlarge 58 +m6gd.12xlarge 234 +m6gd.16xlarge 737 +m6gd.2xlarge 58 +m6gd.4xlarge 234 +m6gd.8xlarge 234 +m6gd.large 29 +m6gd.medium 8 +m6gd.metal 737 +m6gd.xlarge 58 +m6i.12xlarge 234 +m6i.16xlarge 737 +m6i.24xlarge 737 +m6i.2xlarge 58 +m6i.32xlarge 737 +m6i.4xlarge 234 +m6i.8xlarge 234 +m6i.large 29 +m6i.metal 737 +m6i.xlarge 58 +m6id.12xlarge 234 +m6id.16xlarge 737 +m6id.24xlarge 737 +m6id.2xlarge 58 +m6id.32xlarge 737 +m6id.4xlarge 234 +m6id.8xlarge 234 +m6id.large 29 +m6id.metal 737 +m6id.xlarge 58 +m6idn.12xlarge 234 +m6idn.16xlarge 737 +m6idn.24xlarge 737 +m6idn.2xlarge 58 +m6idn.32xlarge 345 +m6idn.4xlarge 234 +m6idn.8xlarge 234 +m6idn.large 29 +m6idn.metal 345 +m6idn.xlarge 58 +m6in.12xlarge 234 +m6in.16xlarge 737 +m6in.24xlarge 737 +m6in.2xlarge 58 +m6in.32xlarge 345 +m6in.4xlarge 234 +m6in.8xlarge 234 +m6in.large 29 +m6in.metal 345 +m6in.xlarge 58 +m7a.12xlarge 234 +m7a.16xlarge 737 +m7a.24xlarge 737 +m7a.2xlarge 58 +m7a.32xlarge 737 +m7a.48xlarge 737 +m7a.4xlarge 234 +m7a.8xlarge 234 +m7a.large 29 +m7a.medium 8 +m7a.metal-48xl 737 +m7a.xlarge 58 +m7g.12xlarge 234 +m7g.16xlarge 737 +m7g.2xlarge 58 +m7g.4xlarge 234 +m7g.8xlarge 234 +m7g.large 29 +m7g.medium 8 +m7g.metal 737 +m7g.xlarge 58 +m7gd.12xlarge 234 +m7gd.16xlarge 737 +m7gd.2xlarge 58 +m7gd.4xlarge 234 +m7gd.8xlarge 234 +m7gd.large 29 +m7gd.medium 8 +m7gd.xlarge 58 +m7i-flex.2xlarge 58 +m7i-flex.4xlarge 234 +m7i-flex.8xlarge 234 +m7i-flex.large 29 +m7i-flex.xlarge 58 +m7i.12xlarge 234 +m7i.16xlarge 737 +m7i.24xlarge 737 +m7i.2xlarge 58 +m7i.48xlarge 737 +m7i.4xlarge 234 +m7i.8xlarge 234 +m7i.large 29 +m7i.metal-24xl 737 +m7i.metal-48xl 737 +m7i.xlarge 58 +mac1.metal 234 +mac2-m2.metal 234 +mac2-m2pro.metal 234 +mac2.metal 234 p2.16xlarge 234 +p2.8xlarge 234 +p2.xlarge 58 +p3.16xlarge 234 p3.2xlarge 58 p3.8xlarge 234 -p3.16xlarge 234 p3dn.24xlarge 737 -r3.large 29 -r3.xlarge 58 +p4d.24xlarge 737 +p4de.24xlarge 737 +p5.48xlarge 100 r3.2xlarge 58 r3.4xlarge 234 r3.8xlarge 234 -r4.large 29 -r4.xlarge 58 +r3.large 29 +r3.xlarge 58 +r4.16xlarge 737 r4.2xlarge 58 r4.4xlarge 234 r4.8xlarge 234 -r4.16xlarge 452 -r5.large 29 -r5.xlarge 58 -r5.2xlarge 58 -r5.4xlarge 234 -r5.8xlarge 234 +r4.large 29 +r4.xlarge 58 r5.12xlarge 234 r5.16xlarge 737 r5.24xlarge 737 +r5.2xlarge 58 +r5.4xlarge 234 +r5.8xlarge 234 +r5.large 29 r5.metal 737 -r5a.large 29 -r5a.xlarge 58 -r5a.2xlarge 58 -r5a.4xlarge 234 -r5a.8xlarge 234 +r5.xlarge 58 r5a.12xlarge 234 r5a.16xlarge 737 r5a.24xlarge 737 -r5ad.large 29 -r5ad.xlarge 58 -r5ad.2xlarge 58 -r5ad.4xlarge 234 +r5a.2xlarge 58 +r5a.4xlarge 234 +r5a.8xlarge 234 +r5a.large 29 +r5a.xlarge 58 r5ad.12xlarge 234 +r5ad.16xlarge 737 r5ad.24xlarge 737 -r5d.large 29 -r5d.xlarge 58 -r5d.2xlarge 58 -r5d.4xlarge 234 -r5d.8xlarge 234 +r5ad.2xlarge 58 +r5ad.4xlarge 234 +r5ad.8xlarge 234 +r5ad.large 29 +r5ad.xlarge 58 +r5b.12xlarge 234 +r5b.16xlarge 737 +r5b.24xlarge 737 +r5b.2xlarge 58 +r5b.4xlarge 234 +r5b.8xlarge 234 +r5b.large 29 +r5b.metal 737 +r5b.xlarge 58 r5d.12xlarge 234 r5d.16xlarge 737 r5d.24xlarge 737 +r5d.2xlarge 58 +r5d.4xlarge 234 +r5d.8xlarge 234 +r5d.large 29 r5d.metal 737 +r5d.xlarge 58 +r5dn.12xlarge 234 +r5dn.16xlarge 737 +r5dn.24xlarge 737 +r5dn.2xlarge 58 +r5dn.4xlarge 234 +r5dn.8xlarge 234 +r5dn.large 29 +r5dn.metal 737 +r5dn.xlarge 58 +r5n.12xlarge 234 +r5n.16xlarge 737 +r5n.24xlarge 737 +r5n.2xlarge 58 +r5n.4xlarge 234 +r5n.8xlarge 234 +r5n.large 29 +r5n.metal 737 +r5n.xlarge 58 +r6a.12xlarge 234 +r6a.16xlarge 737 +r6a.24xlarge 737 +r6a.2xlarge 58 +r6a.32xlarge 737 +r6a.48xlarge 737 +r6a.4xlarge 234 +r6a.8xlarge 234 +r6a.large 29 +r6a.metal 737 +r6a.xlarge 58 +r6g.12xlarge 234 +r6g.16xlarge 737 +r6g.2xlarge 58 +r6g.4xlarge 234 +r6g.8xlarge 234 +r6g.large 29 +r6g.medium 8 +r6g.metal 737 +r6g.xlarge 58 +r6gd.12xlarge 234 +r6gd.16xlarge 737 +r6gd.2xlarge 58 +r6gd.4xlarge 234 +r6gd.8xlarge 234 +r6gd.large 29 +r6gd.medium 8 +r6gd.metal 737 +r6gd.xlarge 58 +r6i.12xlarge 234 +r6i.16xlarge 737 +r6i.24xlarge 737 +r6i.2xlarge 58 +r6i.32xlarge 737 +r6i.4xlarge 234 +r6i.8xlarge 234 +r6i.large 29 +r6i.metal 737 +r6i.xlarge 58 +r6id.12xlarge 234 +r6id.16xlarge 737 +r6id.24xlarge 737 +r6id.2xlarge 58 +r6id.32xlarge 737 +r6id.4xlarge 234 +r6id.8xlarge 234 +r6id.large 29 +r6id.metal 737 +r6id.xlarge 58 +r6idn.12xlarge 234 +r6idn.16xlarge 737 +r6idn.24xlarge 737 +r6idn.2xlarge 58 +r6idn.32xlarge 345 +r6idn.4xlarge 234 +r6idn.8xlarge 234 +r6idn.large 29 +r6idn.metal 345 +r6idn.xlarge 58 +r6in.12xlarge 234 +r6in.16xlarge 737 +r6in.24xlarge 737 +r6in.2xlarge 58 +r6in.32xlarge 345 +r6in.4xlarge 234 +r6in.8xlarge 234 +r6in.large 29 +r6in.metal 345 +r6in.xlarge 58 +r7a.12xlarge 234 +r7a.16xlarge 737 +r7a.24xlarge 737 +r7a.2xlarge 58 +r7a.32xlarge 737 +r7a.48xlarge 737 +r7a.4xlarge 234 +r7a.8xlarge 234 +r7a.large 29 +r7a.medium 8 +r7a.metal-48xl 737 +r7a.xlarge 58 +r7g.12xlarge 234 +r7g.16xlarge 737 +r7g.2xlarge 58 +r7g.4xlarge 234 +r7g.8xlarge 234 +r7g.large 29 +r7g.medium 8 +r7g.metal 737 +r7g.xlarge 58 +r7gd.12xlarge 234 +r7gd.16xlarge 737 +r7gd.2xlarge 58 +r7gd.4xlarge 234 +r7gd.8xlarge 234 +r7gd.large 29 +r7gd.medium 8 +r7gd.xlarge 58 +r7i.12xlarge 234 +r7i.16xlarge 737 +r7i.24xlarge 737 +r7i.2xlarge 58 +r7i.48xlarge 737 +r7i.4xlarge 234 +r7i.8xlarge 234 +r7i.large 29 +r7i.metal-24xl 737 +r7i.metal-48xl 737 +r7i.xlarge 58 +r7iz.12xlarge 234 +r7iz.16xlarge 737 +r7iz.2xlarge 58 +r7iz.32xlarge 737 +r7iz.4xlarge 234 +r7iz.8xlarge 234 +r7iz.large 29 +r7iz.metal-16xl 737 +r7iz.metal-32xl 737 +r7iz.xlarge 58 t1.micro 4 -t2.nano 4 +t2.2xlarge 44 +t2.large 35 +t2.medium 17 t2.micro 4 +t2.nano 4 t2.small 11 -t2.medium 17 -t2.large 35 t2.xlarge 44 -t2.2xlarge 44 -t3.nano 4 +t3.2xlarge 58 +t3.large 35 +t3.medium 17 t3.micro 4 +t3.nano 4 t3.small 11 -t3.medium 17 -t3.large 35 t3.xlarge 58 -t3.2xlarge 58 -t3a.nano 4 +t3a.2xlarge 58 +t3a.large 35 +t3a.medium 17 t3a.micro 4 +t3a.nano 4 t3a.small 8 -t3a.medium 17 -t3a.large 35 t3a.xlarge 58 -t3a.2xlarge 58 +t4g.2xlarge 58 +t4g.large 35 +t4g.medium 17 +t4g.micro 4 +t4g.nano 4 +t4g.small 11 +t4g.xlarge 58 +trn1.2xlarge 58 +trn1.32xlarge 247 +trn1n.32xlarge 247 +u-12tb1.112xlarge 737 +u-12tb1.metal 147 +u-18tb1.112xlarge 737 +u-18tb1.metal 737 +u-24tb1.112xlarge 737 +u-24tb1.metal 737 +u-3tb1.56xlarge 234 +u-6tb1.112xlarge 737 +u-6tb1.56xlarge 737 u-6tb1.metal 147 +u-9tb1.112xlarge 737 u-9tb1.metal 147 -u-12tb1.metal 147 +vt1.24xlarge 737 +vt1.3xlarge 58 +vt1.6xlarge 234 x1.16xlarge 234 x1.32xlarge 234 -x1e.xlarge 29 +x1e.16xlarge 234 x1e.2xlarge 58 +x1e.32xlarge 234 x1e.4xlarge 58 x1e.8xlarge 58 -x1e.16xlarge 234 -x1e.32xlarge 234 -z1d.large 29 -z1d.xlarge 58 +x1e.xlarge 29 +x2gd.12xlarge 234 +x2gd.16xlarge 737 +x2gd.2xlarge 58 +x2gd.4xlarge 234 +x2gd.8xlarge 234 +x2gd.large 29 +x2gd.medium 8 +x2gd.metal 737 +x2gd.xlarge 58 +x2idn.16xlarge 737 +x2idn.24xlarge 737 +x2idn.32xlarge 737 +x2idn.metal 737 +x2iedn.16xlarge 737 +x2iedn.24xlarge 737 +x2iedn.2xlarge 58 +x2iedn.32xlarge 737 +x2iedn.4xlarge 234 +x2iedn.8xlarge 234 +x2iedn.metal 737 +x2iedn.xlarge 58 +x2iezn.12xlarge 737 +x2iezn.2xlarge 58 +x2iezn.4xlarge 234 +x2iezn.6xlarge 234 +x2iezn.8xlarge 234 +x2iezn.metal 737 +z1d.12xlarge 737 z1d.2xlarge 58 z1d.3xlarge 234 z1d.6xlarge 234 -z1d.12xlarge 737 +z1d.large 29 z1d.metal 737 +z1d.xlarge 58 diff --git a/files/get-ecr-uri.sh b/files/get-ecr-uri.sh new file mode 100755 index 000000000..3dc56523a --- /dev/null +++ b/files/get-ecr-uri.sh @@ -0,0 +1,127 @@ +#!/usr/bin/env bash +set -euo pipefail + +# More details about the mappings in this file can be found here https://docs.aws.amazon.com/eks/latest/userguide/add-ons-images.html + +region=$1 +aws_domain=$2 +if [[ $# -eq 3 ]] && [[ ! -z $3 ]]; then + acct=$3 +else + case "${region}" in + ap-east-1) + acct="800184023465" + ;; + me-south-1) + acct="558608220178" + ;; + cn-north-1) + acct="918309763551" + ;; + cn-northwest-1) + acct="961992271922" + ;; + us-gov-west-1) + acct="013241004608" + ;; + us-gov-east-1) + acct="151742754352" + ;; + us-iso-west-1) + acct="608367168043" + ;; + us-iso-east-1) + acct="725322719131" + ;; + us-isob-east-1) + acct="187977181151" + ;; + af-south-1) + acct="877085696533" + ;; + ap-southeast-3) + acct="296578399912" + ;; + me-central-1) + acct="759879836304" + ;; + eu-south-1) + acct="590381155156" + ;; + eu-south-2) + acct="455263428931" + ;; + eu-central-2) + acct="900612956339" + ;; + ap-south-2) + acct="900889452093" + ;; + ap-southeast-4) + acct="491585149902" + ;; + il-central-1) + acct="066635153087" + ;; + ca-west-1) + acct="761377655185" + ;; + # This sections includes all commercial non-opt-in regions, which use + # the same account for ECR pause container images, but still have in-region + # registries. + ap-northeast-1 | \ + ap-northeast-2 | \ + ap-northeast-3 | \ + ap-south-1 | \ + ap-southeast-1 | \ + ap-southeast-2 | \ + ca-central-1 | \ + eu-central-1 | \ + eu-north-1 | \ + eu-west-1 | \ + eu-west-2 | \ + eu-west-3 | \ + sa-east-1 | \ + us-east-1 | \ + us-east-2 | \ + us-west-1 | \ + us-west-2) + acct="602401143452" + ;; + # If the region is not mapped to an account, let's try to choose another region + # in that partition. + us-gov-*) + acct="013241004608" + region="us-gov-west-1" + ;; + cn-*) + acct="961992271922" + region="cn-northwest-1" + ;; + us-iso-*) + acct="725322719131" + region="us-iso-east-1" + ;; + us-isob-*) + acct="187977181151" + region="us-isob-east-1" + ;; + *) + acct="602401143452" + region="us-west-2" + ;; + esac # end region check +fi + +ECR_DOMAIN="${acct}.dkr.ecr.${region}.${aws_domain}" + +# if FIPS is enabled on the machine, use the FIPS endpoint if it's available +if [[ "$(sysctl -n crypto.fips_enabled)" == 1 ]]; then + ECR_FIPS_DOMAIN="${acct}.dkr.ecr-fips.${region}.${aws_domain}" + if [ $(getent hosts "$ECR_FIPS_DOMAIN" | wc -l) -gt 0 ]; then + echo "$ECR_FIPS_DOMAIN" + exit 0 + fi +fi + +echo "$ECR_DOMAIN" diff --git a/files/kubelet-config-with-secret-polling.json b/files/kubelet-config-with-secret-polling.json deleted file mode 100644 index 08f7127f5..000000000 --- a/files/kubelet-config-with-secret-polling.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "kind": "KubeletConfiguration", - "apiVersion": "kubelet.config.k8s.io/v1beta1", - "address": "0.0.0.0", - "authentication": { - "anonymous": { - "enabled": false - }, - "webhook": { - "cacheTTL": "2m0s", - "enabled": true - }, - "x509": { - "clientCAFile": "/etc/kubernetes/pki/ca.crt" - } - }, - "authorization": { - "mode": "Webhook", - "webhook": { - "cacheAuthorizedTTL": "5m0s", - "cacheUnauthorizedTTL": "30s" - } - }, - "clusterDomain": "cluster.local", - "hairpinMode": "hairpin-veth", - "cgroupDriver": "cgroupfs", - "cgroupRoot": "/", - "featureGates": { - "RotateKubeletServerCertificate": true - }, - "serializeImagePulls": false, - "serverTLSBootstrap": true, - "configMapAndSecretChangeDetectionStrategy": "Cache", - "tlsCipherSuites": ["TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256"] -} diff --git a/files/kubelet-config.json b/files/kubelet-config.json index 3a41c2a95..b78510c6a 100644 --- a/files/kubelet-config.json +++ b/files/kubelet-config.json @@ -23,11 +23,14 @@ }, "clusterDomain": "cluster.local", "hairpinMode": "hairpin-veth", + "readOnlyPort": 0, "cgroupDriver": "cgroupfs", "cgroupRoot": "/", "featureGates": { "RotateKubeletServerCertificate": true }, + "protectKernelDefaults": true, "serializeImagePulls": false, - "serverTLSBootstrap": true + "serverTLSBootstrap": true, + "tlsCipherSuites": ["TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256", "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256", "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305", "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384", "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305", "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384", "TLS_RSA_WITH_AES_256_GCM_SHA384", "TLS_RSA_WITH_AES_128_GCM_SHA256"] } diff --git a/files/kubelet-containerd.service b/files/kubelet-containerd.service new file mode 100644 index 000000000..946fb1c28 --- /dev/null +++ b/files/kubelet-containerd.service @@ -0,0 +1,27 @@ +[Unit] +Description=Kubernetes Kubelet +Documentation=https://github.com/kubernetes/kubernetes +After=containerd.service sandbox-image.service +Requires=containerd.service sandbox-image.service + +[Service] +Slice=runtime.slice +ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 +ExecStart=/usr/bin/kubelet \ + --config /etc/kubernetes/kubelet/kubelet-config.json \ + --kubeconfig /var/lib/kubelet/kubeconfig \ + --container-runtime-endpoint unix:///run/containerd/containerd.sock \ + --image-credential-provider-config /etc/eks/image-credential-provider/config.json \ + --image-credential-provider-bin-dir /etc/eks/image-credential-provider \ + $KUBELET_ARGS \ + $KUBELET_EXTRA_ARGS + +Restart=on-failure +RestartForceExitStatus=SIGPIPE +RestartSec=5 +KillMode=process +CPUAccounting=true +MemoryAccounting=true + +[Install] +WantedBy=multi-user.target diff --git a/files/kubelet-kubeconfig b/files/kubelet-kubeconfig index 8c1436883..e8ad7efd4 100644 --- a/files/kubelet-kubeconfig +++ b/files/kubelet-kubeconfig @@ -15,9 +15,11 @@ users: - name: kubelet user: exec: - apiVersion: client.authentication.k8s.io/v1alpha1 + apiVersion: client.authentication.k8s.io/v1beta1 command: /usr/bin/aws-iam-authenticator args: - "token" - "-i" - "CLUSTER_NAME" + - --region + - "AWS_REGION" diff --git a/files/kubelet.service b/files/kubelet.service index 125e06798..08c746504 100644 --- a/files/kubelet.service +++ b/files/kubelet.service @@ -1,20 +1,22 @@ [Unit] Description=Kubernetes Kubelet Documentation=https://github.com/kubernetes/kubernetes -After=docker.service +After=docker.service iptables-restore.service Requires=docker.service [Service] -ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -ExecStart=/usr/bin/kubelet --cloud-provider aws \ +ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 +ExecStart=/usr/bin/kubelet \ --config /etc/kubernetes/kubelet/kubelet-config.json \ - --allow-privileged=true \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime docker \ - --network-plugin cni $KUBELET_ARGS $KUBELET_EXTRA_ARGS + --network-plugin cni \ + --image-credential-provider-config /etc/eks/image-credential-provider/config.json \ + --image-credential-provider-bin-dir /etc/eks/image-credential-provider \ + $KUBELET_ARGS \ + $KUBELET_EXTRA_ARGS -Restart=on-failure -RestartForceExitStatus=SIGPIPE +Restart=always RestartSec=5 KillMode=process diff --git a/files/logrotate.conf b/files/logrotate.conf new file mode 100644 index 000000000..13af94c03 --- /dev/null +++ b/files/logrotate.conf @@ -0,0 +1,31 @@ +# see "man logrotate" for details +# rotate log files weekly +weekly + +# keep 4 weeks worth of backlogs +rotate 4 + +# create new (empty) log files after rotating old ones +create + +# use date as a suffix of the rotated file +dateext + +compress + +# RPM packages drop log rotation information into this directory +include /etc/logrotate.d + +/var/log/wtmp { + monthly + create 0664 root utmp + minsize 1M + rotate 1 +} + +/var/log/btmp { + missingok + monthly + create 0600 root utmp + rotate 1 +} diff --git a/files/max-pods-calculator.sh b/files/max-pods-calculator.sh new file mode 100755 index 000000000..c52c5d8d5 --- /dev/null +++ b/files/max-pods-calculator.sh @@ -0,0 +1,159 @@ +#!/usr/bin/env bash + +set -o pipefail +set -o nounset +set -o errexit + +err_report() { + echo "Exited with error on line $1" +} +trap 'err_report $LINENO' ERR + +function print_help { + echo "usage: $0 [options]" + echo "Calculates maxPods value to be used when starting up the kubelet." + echo "-h,--help print this help." + echo "--instance-type Specify the instance type to calculate max pods value." + echo "--instance-type-from-imds Use this flag if the instance type should be fetched from IMDS." + echo "--cni-version Specify the version of the CNI (example - 1.7.5)." + echo "--cni-custom-networking-enabled Use this flag to indicate if CNI custom networking mode has been enabled." + echo "--cni-prefix-delegation-enabled Use this flag to indicate if CNI prefix delegation has been enabled." + echo "--cni-max-eni specify how many ENIs should be used for prefix delegation. Defaults to using all ENIs per instance." + echo "--show-max-allowed Use this flag to show max number of Pods allowed to run in Worker Node. Otherwise the script will show the recommended value" +} + +POSITIONAL=() + +while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -h | --help) + print_help + exit 1 + ;; + --instance-type) + INSTANCE_TYPE=$2 + shift + shift + ;; + --instance-type-from-imds) + INSTANCE_TYPE_FROM_IMDS=true + shift + ;; + --cni-version) + CNI_VERSION=$2 + shift + shift + ;; + --cni-custom-networking-enabled) + CNI_CUSTOM_NETWORKING_ENABLED=true + shift + ;; + --cni-prefix-delegation-enabled) + CNI_PREFIX_DELEGATION_ENABLED=true + shift + ;; + --cni-max-eni) + CNI_MAX_ENI=$2 + shift + shift + ;; + --show-max-allowed) + SHOW_MAX_ALLOWED=true + shift + ;; + *) # unknown option + POSITIONAL+=("$1") # save it in an array for later + shift # past argument + ;; + esac +done + +CNI_VERSION="${CNI_VERSION:-}" +CNI_CUSTOM_NETWORKING_ENABLED="${CNI_CUSTOM_NETWORKING_ENABLED:-false}" +CNI_PREFIX_DELEGATION_ENABLED="${CNI_PREFIX_DELEGATION_ENABLED:-false}" +CNI_MAX_ENI="${CNI_MAX_ENI:-}" +INSTANCE_TYPE="${INSTANCE_TYPE:-}" +INSTANCE_TYPE_FROM_IMDS="${INSTANCE_TYPE_FROM_IMDS:-false}" +SHOW_MAX_ALLOWED="${SHOW_MAX_ALLOWED:-false}" + +PREFIX_DELEGATION_SUPPORTED=false +IPS_PER_PREFIX=16 + +if [ "$INSTANCE_TYPE_FROM_IMDS" = true ]; then + export AWS_DEFAULT_REGION=$(imds /latest/dynamic/instance-identity/document | jq .region -r) + INSTANCE_TYPE=$(imds /latest/meta-data/instance-type) +elif [ -z "$INSTANCE_TYPE" ]; then # There's no reasonable default for an instanceType so force one to be provided to the script. + echo "You must specify an instance type to calculate max pods value." + exit 1 +fi + +if [ -z "$CNI_VERSION" ]; then + echo "You must specify a CNI Version to use. Example - 1.7.5" + exit 1 +fi + +calculate_max_ip_addresses_prefix_delegation() { + enis=$1 + instance_max_eni_ips=$2 + echo $(($enis * (($instance_max_eni_ips - 1) * $IPS_PER_PREFIX) + 2)) +} + +calculate_max_ip_addresses_secondary_ips() { + enis=$1 + instance_max_eni_ips=$2 + echo $(($enis * ($instance_max_eni_ips - 1) + 2)) +} + +min_number() { + printf "%s\n" "$@" | sort -g | head -n1 +} + +VERSION_SPLIT=(${CNI_VERSION//./ }) +CNI_MAJOR_VERSION="${VERSION_SPLIT[0]}" +CNI_MINOR_VERSION="${VERSION_SPLIT[1]}" +if [[ "$CNI_MAJOR_VERSION" -gt 1 ]] || ([[ "$CNI_MAJOR_VERSION" = 1 ]] && [[ "$CNI_MINOR_VERSION" -gt 8 ]]); then + PREFIX_DELEGATION_SUPPORTED=true +fi + +DESCRIBE_INSTANCES_RESULT=$(aws ec2 describe-instance-types --instance-type "${INSTANCE_TYPE}" --query 'InstanceTypes[0].{Hypervisor: Hypervisor, EniCount: NetworkInfo.MaximumNetworkInterfaces, PodsPerEniCount: NetworkInfo.Ipv4AddressesPerInterface, CpuCount: VCpuInfo.DefaultVCpus}' --output json) + +HYPERVISOR_TYPE=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.Hypervisor') +IS_NITRO=false +if [[ "$HYPERVISOR_TYPE" == "nitro" ]]; then + IS_NITRO=true +fi +INSTANCE_MAX_ENIS=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.EniCount') +INSTANCE_MAX_ENIS_IPS=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.PodsPerEniCount') + +if [ -z "$CNI_MAX_ENI" ]; then + enis_for_pods=$INSTANCE_MAX_ENIS +else + enis_for_pods="$(min_number $CNI_MAX_ENI $INSTANCE_MAX_ENIS)" +fi + +if [ "$CNI_CUSTOM_NETWORKING_ENABLED" = true ]; then + enis_for_pods=$((enis_for_pods - 1)) +fi + +if [ "$IS_NITRO" = true ] && [ "$CNI_PREFIX_DELEGATION_ENABLED" = true ] && [ "$PREFIX_DELEGATION_SUPPORTED" = true ]; then + max_pods=$(calculate_max_ip_addresses_prefix_delegation $enis_for_pods $INSTANCE_MAX_ENIS_IPS) +else + max_pods=$(calculate_max_ip_addresses_secondary_ips $enis_for_pods $INSTANCE_MAX_ENIS_IPS) +fi + +# Limit the total number of pods that can be launched on any instance type based on the vCPUs on that instance type. +MAX_POD_CEILING_FOR_LOW_CPU=110 +MAX_POD_CEILING_FOR_HIGH_CPU=250 +CPU_COUNT=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.CpuCount') + +if [ "$SHOW_MAX_ALLOWED" = true ]; then + echo $max_pods + exit 0 +fi + +if [ "$CPU_COUNT" -gt 30 ]; then + echo $(min_number $MAX_POD_CEILING_FOR_HIGH_CPU $max_pods) +else + echo $(min_number $MAX_POD_CEILING_FOR_LOW_CPU $max_pods) +fi diff --git a/files/pull-image.sh b/files/pull-image.sh new file mode 100755 index 000000000..ca89e2550 --- /dev/null +++ b/files/pull-image.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +img=$1 +region=$(echo "${img}" | cut -f4 -d ".") +MAX_RETRIES=3 + +function retry() { + local rc=0 + for attempt in $(seq 0 $MAX_RETRIES); do + rc=0 + [[ $attempt -gt 0 ]] && echo "Attempt $attempt of $MAX_RETRIES" 1>&2 + "$@" + rc=$? + [[ $rc -eq 0 ]] && break + [[ $attempt -eq $MAX_RETRIES ]] && exit $rc + local jitter=$((1 + RANDOM % 10)) + local sleep_sec="$(($((5 << $((1 + $attempt)))) + $jitter))" + sleep $sleep_sec + done +} + +ecr_password=$(retry aws ecr get-login-password --region $region) +if [[ -z ${ecr_password} ]]; then + echo >&2 "Unable to retrieve the ECR password." + exit 1 +fi +retry sudo ctr --namespace k8s.io content fetch "${img}" --user AWS:${ecr_password} diff --git a/files/pull-sandbox-image.sh b/files/pull-sandbox-image.sh new file mode 100644 index 000000000..5610c09b1 --- /dev/null +++ b/files/pull-sandbox-image.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +source <(grep "sandbox_image" /etc/containerd/config.toml | tr -d ' ') + +### skip if we don't have a sandbox_image set in config.toml +if [[ -z ${sandbox_image:-} ]]; then + echo >&2 "Skipping ... missing sandbox_image from /etc/containerd/config.toml" + exit 0 +fi + +### Short-circuit fetching sandbox image if its already present +if [[ -n $(sudo ctr --namespace k8s.io image ls | grep "${sandbox_image}") ]]; then + echo >&2 "Skipping ... sandbox_image '${sandbox_image}' is already present" + exit 0 +fi + +# use the region that the sandbox image comes from for the ecr authentication, +# also mitigating the localzone isse: https://github.com/aws/aws-cli/issues/7043 +region=$(echo "${sandbox_image}" | cut -f4 -d ".") + +MAX_RETRIES=3 + +function retry() { + local rc=0 + for attempt in $(seq 0 $MAX_RETRIES); do + rc=0 + [[ $attempt -gt 0 ]] && echo "Attempt $attempt of $MAX_RETRIES" 1>&2 + "$@" + rc=$? + [[ $rc -eq 0 ]] && break + [[ $attempt -eq $MAX_RETRIES ]] && exit $rc + local jitter=$((1 + RANDOM % 10)) + local sleep_sec="$(($((5 << $((1 + $attempt)))) + $jitter))" + sleep $sleep_sec + done +} + +# for public, non-ecr repositories even if this fails to get ECR credentials the image will pull +ecr_password=$(retry aws ecr get-login-password --region "${region}") +if [[ -z ${ecr_password} ]]; then + echo >&2 "Unable to retrieve the ECR password. Image pull may not be properly authenticated." +fi +retry sudo crictl pull --creds "AWS:${ecr_password}" "${sandbox_image}" diff --git a/files/runtime.slice b/files/runtime.slice new file mode 100644 index 000000000..5e189639a --- /dev/null +++ b/files/runtime.slice @@ -0,0 +1,4 @@ +[Unit] +Description=Kubernetes and container runtime slice +Documentation=man:systemd.special(7) +Before=slices.target diff --git a/files/sandbox-image.service b/files/sandbox-image.service new file mode 100644 index 000000000..6694e29ba --- /dev/null +++ b/files/sandbox-image.service @@ -0,0 +1,12 @@ +[Unit] +Description=pull sandbox image defined in containerd config.toml +# pulls sandbox image using ctr tool +After=containerd.service +Requires=containerd.service + +[Service] +Type=oneshot +ExecStart=/etc/eks/containerd/pull-sandbox-image.sh + +[Install] +WantedBy=multi-user.target diff --git a/hack/generate-template-variable-doc.py b/hack/generate-template-variable-doc.py new file mode 100755 index 000000000..3f08fcb7a --- /dev/null +++ b/hack/generate-template-variable-doc.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 + +import json +import os +import re + +whereami = os.path.abspath(__file__) +os.chdir(os.path.dirname(whereami)) + +template = {} +with open('../eks-worker-al2.json') as template_file: + template = json.load(template_file) + +default_vars = {} +with open('../eks-worker-al2-variables.json') as default_var_file: + default_vars = json.load(default_var_file) + +all_vars = {} + +for var in template['variables']: + all_vars[var] = None +for var, default_val in default_vars.items(): + all_vars[var] = default_val + +doc_file_name = '../doc/USER_GUIDE.md' +doc = None +with open(doc_file_name) as doc_file: + doc = doc_file.read() + +table_boundary = '' +existing_table_pattern = f"{table_boundary}([\S\s]*){table_boundary}" +existing_table_matches = re.search(existing_table_pattern, doc) +existing_table_lines = existing_table_matches.group(1).splitlines() + +new_table = f"{table_boundary}\n" +new_table += f"{existing_table_lines[1]}\n" +new_table += f"{existing_table_lines[2]}\n" + +existing_descriptions = {} +for line in existing_table_lines[3:]: + columns = line.split('|') + var = columns[1].strip(" `") + existing_descriptions[var] = columns[3].strip(" `") + +for var, val in all_vars.items(): + if val is not None: + if val == "": + val = f"`\"\"`" + else: + val = f"```{val}```" + else: + val = "*None*" + description = "" + if var in existing_descriptions: + description = existing_descriptions[var] + new_table += f"| `{var}` | {val} | {description} |\n" + +new_table += table_boundary + +replace_doc_pattern = f"{table_boundary}[\S\s]*{table_boundary}" +new_doc = re.sub(replace_doc_pattern, new_table, doc) + +with open(doc_file_name, 'w') as doc_file: + doc_file.write(new_doc) diff --git a/hack/latest-binaries.sh b/hack/latest-binaries.sh new file mode 100755 index 000000000..246fc8dd8 --- /dev/null +++ b/hack/latest-binaries.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +set -o errexit +set -o pipefail +set -o nounset + +if [ "$#" -ne 1 ]; then + echo "usage: $0 KUBERNETES_MINOR_VERSION" + exit 1 +fi + +MINOR_VERSION="${1}" + +# retrieve the available "VERSION/BUILD_DATE" prefixes (e.g. "1.28.1/2023-09-14") +# from the binary object keys, sorted in descending semver order, and pick the first one +LATEST_BINARIES=$(aws s3api list-objects-v2 --bucket amazon-eks --prefix "${MINOR_VERSION}" --query 'Contents[*].[Key]' --output text | cut -d'/' -f-2 | sort -Vru | head -n1) + +if [ "${LATEST_BINARIES}" == "None" ]; then + echo >&2 "No binaries available for minor version: ${MINOR_VERSION}" + exit 1 +fi + +LATEST_VERSION=$(echo "${LATEST_BINARIES}" | cut -d'/' -f1) +LATEST_BUILD_DATE=$(echo "${LATEST_BINARIES}" | cut -d'/' -f2) + +echo "kubernetes_version=${LATEST_VERSION} kubernetes_build_date=${LATEST_BUILD_DATE}" diff --git a/hack/lint-docs.sh b/hack/lint-docs.sh new file mode 100755 index 000000000..24ef64720 --- /dev/null +++ b/hack/lint-docs.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +set -o errexit +cd $(dirname $0) +./generate-template-variable-doc.py +if ! git diff --exit-code ../doc/USER_GUIDE.md; then + echo "ERROR: doc/USER_GUIDE.md is out of date. Please run hack/generate-template-variable-doc.py and commit the changes." + exit 1 +fi +./mkdocs.sh build --strict diff --git a/hack/lint-space-errors.sh b/hack/lint-space-errors.sh new file mode 100755 index 000000000..6c0f84a73 --- /dev/null +++ b/hack/lint-space-errors.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +cd $(dirname $0)/.. + +# `git apply|diff` can check for space errors, with the core implementation being `git diff-tree` +# this tool compares two trees, generally used to find errors in proposed changes +# we want to check the entire existing tree, so we compare HEAD against an empty tree +git diff-tree --check $(git hash-object -t tree /dev/null) HEAD diff --git a/hack/mkdocs.Dockerfile b/hack/mkdocs.Dockerfile new file mode 100644 index 000000000..0f02dedce --- /dev/null +++ b/hack/mkdocs.Dockerfile @@ -0,0 +1,4 @@ +FROM python:3.9 +RUN pip install mkdocs mkdocs-material +WORKDIR /workdir +ENTRYPOINT ["mkdocs"] \ No newline at end of file diff --git a/hack/mkdocs.sh b/hack/mkdocs.sh new file mode 100755 index 000000000..4f7c93b95 --- /dev/null +++ b/hack/mkdocs.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +set -o errexit + +cd $(dirname $0) + +IMAGE_ID=$(docker build --file mkdocs.Dockerfile --quiet .) +cd .. + +if [[ "$*" =~ "serve" ]]; then + EXTRA_ARGS="${EXTRA_ARGS} -a 0.0.0.0:8000" +fi + +docker run --rm -v ${PWD}:/workdir -p 8000:8000 ${IMAGE_ID} "${@}" ${EXTRA_ARGS} diff --git a/hack/transform-al2-to-al2023.sh b/hack/transform-al2-to-al2023.sh new file mode 100755 index 000000000..7a5c0bb69 --- /dev/null +++ b/hack/transform-al2-to-al2023.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +set -o pipefail +set -o nounset +set -o errexit + +if [[ -z "${PACKER_TEMPLATE_FILE:-}" ]]; then + echo "PACKER_TEMPLATE_FILE must be set." >&2 + exit 1 +fi +if [[ -z "${PACKER_DEFAULT_VARIABLE_FILE:-}" ]]; then + echo "PACKER_DEFAULT_VARIABLE_FILE must be set." >&2 + exit 1 +fi + +# rsa keys are not supported in al2023, switch to ed25519 +# delete the upgrade kernel provisioner as we don't need it for al2023 +cat "${PACKER_TEMPLATE_FILE}" \ + | jq '._comment = "All template variables are enumerated here; and most variables have a default value defined in eks-worker-al2023-variables.json"' \ + | jq '.variables.temporary_key_pair_type = "ed25519"' \ + | jq 'del(.provisioners[5])' \ + | jq 'del(.provisioners[5])' \ + | jq 'del(.provisioners[5])' \ + > "${PACKER_TEMPLATE_FILE/al2/al2023}" + +# use newer versions of containerd and runc, do not install docker +# use al2023 6.1 minimal image +cat "${PACKER_DEFAULT_VARIABLE_FILE}" \ + | jq '.ami_component_description = "(k8s: {{ user `kubernetes_version` }}, containerd: {{ user `containerd_version` }})"' \ + | jq '.ami_description = "EKS-optimized Kubernetes node based on Amazon Linux 2023"' \ + | jq '.containerd_version = "*" | .runc_version = "*" | .docker_version = "" ' \ + | jq '.source_ami_filter_name = "al2023-ami-minimal-2023.*-kernel-6.1-x86_64"' \ + | jq '.volume_type = "gp3"' \ + > "${PACKER_DEFAULT_VARIABLE_FILE/al2/al2023}" diff --git a/install-worker.sh b/install-worker.sh deleted file mode 100644 index 8b4bbe9df..000000000 --- a/install-worker.sh +++ /dev/null @@ -1,264 +0,0 @@ -#!/usr/bin/env bash - -set -o pipefail -set -o nounset -set -o errexit -IFS=$'\n\t' - -TEMPLATE_DIR=${TEMPLATE_DIR:-/tmp/worker} - -################################################################################ -### Validate Required Arguments ################################################ -################################################################################ -validate_env_set() { - ( - set +o nounset - - if [ -z "${!1}" ]; then - echo "Packer variable '$1' was not set. Aborting" - exit 1 - fi - ) -} - -validate_env_set BINARY_BUCKET_NAME -validate_env_set BINARY_BUCKET_REGION -validate_env_set DOCKER_VERSION -validate_env_set CNI_VERSION -validate_env_set CNI_PLUGIN_VERSION -validate_env_set KUBERNETES_VERSION -validate_env_set KUBERNETES_BUILD_DATE - -################################################################################ -### Machine Architecture ####################################################### -################################################################################ - -MACHINE=$(uname -m) -if [ "$MACHINE" == "x86_64" ]; then - ARCH="amd64" -elif [ "$MACHINE" == "aarch64" ]; then - ARCH="arm64" -else - echo "Unknown machine architecture '$MACHINE'" >&2 - exit 1 -fi - -################################################################################ -### Packages ################################################################### -################################################################################ - -# Update the OS to begin with to catch up to the latest packages. -sudo yum update -y - -# Install necessary packages -sudo yum install -y \ - aws-cfn-bootstrap \ - awscli \ - chrony \ - conntrack \ - curl \ - jq \ - ec2-instance-connect \ - nfs-utils \ - socat \ - unzip \ - wget - -################################################################################ -### Time ####################################################################### -################################################################################ - -# Make sure Amazon Time Sync Service starts on boot. -sudo chkconfig chronyd on - -# Make sure that chronyd syncs RTC clock to the kernel. -cat < /etc/sysconfig/iptables" - -sudo mv $TEMPLATE_DIR/iptables-restore.service /etc/systemd/system/iptables-restore.service - -sudo systemctl daemon-reload -sudo systemctl enable iptables-restore - -################################################################################ -### Docker ##################################################################### -################################################################################ - -sudo yum install -y yum-utils device-mapper-persistent-data lvm2 - -INSTALL_DOCKER="${INSTALL_DOCKER:-true}" -if [[ "$INSTALL_DOCKER" == "true" ]]; then - sudo amazon-linux-extras enable docker - sudo yum install -y docker-${DOCKER_VERSION}* - sudo usermod -aG docker $USER - - # Remove all options from sysconfig docker. - sudo sed -i '/OPTIONS/d' /etc/sysconfig/docker - - sudo mkdir -p /etc/docker - sudo mv $TEMPLATE_DIR/docker-daemon.json /etc/docker/daemon.json - sudo chown root:root /etc/docker/daemon.json - - # Enable docker daemon to start on boot. - sudo systemctl daemon-reload - sudo systemctl enable docker -fi - -################################################################################ -### Logrotate ################################################################## -################################################################################ - -# kubelet uses journald which has built-in rotation and capped size. -# See man 5 journald.conf -sudo mv $TEMPLATE_DIR/logrotate-kube-proxy /etc/logrotate.d/kube-proxy -sudo chown root:root /etc/logrotate.d/kube-proxy -sudo mkdir -p /var/log/journal - -################################################################################ -### Kubernetes ################################################################# -################################################################################ - -sudo mkdir -p /etc/kubernetes/manifests -sudo mkdir -p /var/lib/kubernetes -sudo mkdir -p /var/lib/kubelet -sudo mkdir -p /opt/cni/bin - -wget https://github.com/containernetworking/cni/releases/download/${CNI_VERSION}/cni-${ARCH}-${CNI_VERSION}.tgz -wget https://github.com/containernetworking/cni/releases/download/${CNI_VERSION}/cni-${ARCH}-${CNI_VERSION}.tgz.sha512 -sudo sha512sum -c cni-${ARCH}-${CNI_VERSION}.tgz.sha512 -sudo tar -xvf cni-${ARCH}-${CNI_VERSION}.tgz -C /opt/cni/bin -rm cni-${ARCH}-${CNI_VERSION}.tgz cni-${ARCH}-${CNI_VERSION}.tgz.sha512 - -wget https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGIN_VERSION}/cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz -wget https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGIN_VERSION}/cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz.sha512 -sudo sha512sum -c cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz.sha512 -sudo tar -xvf cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz -C /opt/cni/bin -rm cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz cni-plugins-${ARCH}-${CNI_PLUGIN_VERSION}.tgz.sha512 - -echo "Downloading binaries from: s3://$BINARY_BUCKET_NAME" -S3_DOMAIN="s3-$BINARY_BUCKET_REGION" -if [ "$BINARY_BUCKET_REGION" = "us-east-1" ]; then - S3_DOMAIN="s3" -fi -S3_URL_BASE="https://$BINARY_BUCKET_NAME.$S3_DOMAIN.amazonaws.com/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/linux/$ARCH" -S3_PATH="s3://$BINARY_BUCKET_NAME/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/linux/$ARCH" - -BINARIES=( - kubelet - aws-iam-authenticator -) -for binary in ${BINARIES[*]} ; do - if [[ ! -z "$AWS_ACCESS_KEY_ID" ]]; then - echo "AWS cli present - using it to copy binaries from s3." - aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/$binary . - aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/$binary.sha256 . - else - echo "AWS cli missing - using wget to fetch binaries from s3. Note: This won't work for private bucket." - sudo wget $S3_URL_BASE/$binary - sudo wget $S3_URL_BASE/$binary.sha256 - fi - sudo sha256sum -c $binary.sha256 - sudo chmod +x $binary - sudo mv $binary /usr/bin/ -done -sudo rm *.sha256 - -KUBELET_CONFIG="" -KUBERNETES_MINOR_VERSION=${KUBERNETES_VERSION%.*} -if [ "$KUBERNETES_MINOR_VERSION" = "1.10" ] || [ "$KUBERNETES_MINOR_VERSION" = "1.11" ]; then - KUBELET_CONFIG=kubelet-config.json -else - # For newer versions use this config to fix https://github.com/kubernetes/kubernetes/issues/74412. - KUBELET_CONFIG=kubelet-config-with-secret-polling.json -fi - -sudo mkdir -p /etc/kubernetes/kubelet -sudo mkdir -p /etc/systemd/system/kubelet.service.d -sudo mv $TEMPLATE_DIR/kubelet-kubeconfig /var/lib/kubelet/kubeconfig -sudo chown root:root /var/lib/kubelet/kubeconfig -if [ "$KUBERNETES_MINOR_VERSION" = "1.14" ]; then - sudo mv $TEMPLATE_DIR/1.14/kubelet.service /etc/systemd/system/kubelet.service -else - sudo mv $TEMPLATE_DIR/kubelet.service /etc/systemd/system/kubelet.service -fi -sudo chown root:root /etc/systemd/system/kubelet.service -sudo mv $TEMPLATE_DIR/$KUBELET_CONFIG /etc/kubernetes/kubelet/kubelet-config.json -sudo chown root:root /etc/kubernetes/kubelet/kubelet-config.json - - -sudo systemctl daemon-reload -# Disable the kubelet until the proper dropins have been configured -sudo systemctl disable kubelet - -################################################################################ -### EKS ######################################################################## -################################################################################ - -sudo mkdir -p /etc/eks -sudo mv $TEMPLATE_DIR/eni-max-pods.txt /etc/eks/eni-max-pods.txt -sudo mv $TEMPLATE_DIR/bootstrap.sh /etc/eks/bootstrap.sh -sudo chmod +x /etc/eks/bootstrap.sh - -################################################################################ -### AMI Metadata ############################################################### -################################################################################ - -BASE_AMI_ID=$(curl -s http://169.254.169.254/latest/meta-data/ami-id) -cat < /tmp/release -BASE_AMI_ID="$BASE_AMI_ID" -BUILD_TIME="$(date)" -BUILD_KERNEL="$(uname -r)" -ARCH="$(uname -m)" -EOF -sudo mv /tmp/release /etc/eks/release -sudo chown root:root /etc/eks/* - -################################################################################ -### Cleanup #################################################################### -################################################################################ - -# Clean up yum caches to reduce the image size -sudo yum clean all -sudo rm -rf \ - $TEMPLATE_DIR \ - /var/cache/yum - -# Clean up files to reduce confusion during debug -sudo rm -rf \ - /etc/hostname \ - /etc/machine-id \ - /etc/resolv.conf \ - /etc/ssh/ssh_host* \ - /home/ec2-user/.ssh/authorized_keys \ - /root/.ssh/authorized_keys \ - /var/lib/cloud/data \ - /var/lib/cloud/instance \ - /var/lib/cloud/instances \ - /var/lib/cloud/sem \ - /var/lib/dhclient/* \ - /var/lib/dhcp/dhclient.* \ - /var/lib/yum/history \ - /var/log/cloud-init-output.log \ - /var/log/cloud-init.log \ - /var/log/secure \ - /var/log/wtmp - -sudo touch /etc/machine-id diff --git a/log-collector-script/README.md b/log-collector-script/README.md index 7f0d529f8..eda832408 100644 --- a/log-collector-script/README.md +++ b/log-collector-script/README.md @@ -1,110 +1,3 @@ -### EKS Logs Collector - -This project was created to collect Amazon EKS log files and OS logs for troubleshooting Amazon EKS customer support cases. - -#### Usage -* Collect EKS logs using SSM agent, jump to below [section](#collect-eks-logs-using-ssm-agent) _(or)_ - -* Run this project as the root user: -``` -curl -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/eks-log-collector.sh -sudo bash eks-log-collector.sh -``` - -Confirm if the tarball file was successfully created (it can be .tgz or .tar.gz) - -#### Retrieving the logs -Download the tarball using your favourite Secure Copy tool. - -#### Example output -The project can be used in normal or enable_debug(**Caution: enable_debug will prompt to confirm if we can restart Docker daemon which would kill running containers**). - -``` -# sudo bash eks-log-collector.sh --help -USAGE: eks-log-collector --help [ --mode=collect|enable_debug --ignore_introspection=true|false --ignore_metrics=true|false ] - -OPTIONS: - --mode Has two parameters 1) collect or 2) enable_debug,: - collect Gathers basic operating system, Docker daemon, and - Amazon EKS related config files and logs. This is the default mode. - enable_debug Enables debug mode for the Docker daemon(Not for production use) - - --ignore_introspection To ignore introspection of IPAMD; Pass this flag if DISABLE_INTROSPECTION is enabled on CNI - - --ignore_metrics To ignore prometheus metrics collection; Pass this flag if DISABLE_METRICS enabled on CNI - - --help Show this help message. - -Example to Ignore IPAMD introspection: -sudo bash eks-log-collector.sh --ignore_introspection=true - -Example to Ignore IPAMD Prometheus metrics collection: -sudo bash eks-log-collector.sh --ignore_metrics=true - -Example to Ignore IPAMD introspection and Prometheus metrics collection: -sudo bash eks-log-collector.sh --ignore_introspection=true --ignore_metrics=true -``` -#### Example output in normal mode -The following output shows this project running in normal mode. - -``` -sudo bash eks-log-collector.sh - - This is version 0.5.0. New versions can be found at https://github.com/awslabs/amazon-eks-ami - -Trying to collect common operating system logs... -Trying to collect kernel logs... -Trying to collect mount points and volume information... -Trying to collect SELinux status... -Trying to collect iptables information... -Trying to collect installed packages... -Trying to collect active system services... -Trying to collect Docker daemon information... -Trying to collect kubelet information... -Trying to collect L-IPAMD information... -Trying to collect sysctls information... -Trying to collect networking infomation... -Trying to collect CNI configuration information... -Trying to collect running Docker containers and gather container data... -Trying to collect Docker daemon logs... -Trying to archive gathered information... - - Done... your bundled logs are located in /opt/log-collector/eks_i-0717c9d54b6cfaa19_2019-02-02_0103-UTC_0.0.4.tar.gz -``` - - -### Collect EKS logs using SSM agent -#### To run EKS log collector script on Worker Node(s) and upload the bundle(tar) to a S3 Bucket using SSM agent, please follow below steps - -##### *Prerequisites*: - -* Configure AWS CLI on the system where you will run the below commands. The IAM entity (User/Role) should have permissions to run/invoke `aws ssm send-command` and `get-command-invocation` commands. - -* SSM agent should be installed and running on Worker Node(s). [How to Install SSM Agent link](https://docs.aws.amazon.com/systems-manager/latest/userguide/sysman-manual-agent-install.html) - -* Worker Node(s) should have required permissions to communicate with SSM service. IAM managed role `AmazonEC2RoleforSSM` will have all the required permission for SSM agent to run on EC2 instances. The IAM managed role `AmazonEC2RoleforSSM` has `S3:PutObject` permission to all S3 resources. - -        *Note:* For more granular control of the IAM permission check [AWS Systems Manager Permissions link ](https://docs.aws.amazon.com/systems-manager/latest/userguide/auth-and-access-control-permissions-reference.html) - -* A S3 bucket location is required which is taken as an input parameter to `aws ssm send-command` command, to which the logs should be pushed. - - -#### *To invoke SSM agent to run EKS log collector script and push bundle to S3 from Worker Node(s):* - -1. Create the SSM document named "EKSLogCollector" using the following command:
-``` -aws ssm create-document --name "EKSLogCollector" --document-type "Command" --content https://raw.githubusercontent.com/nithu0115/eks-logs-collector/master/eks-ssm-content.json -``` -2. To execute the bash script in the SSM document and to collect the logs from worker, run the following command:
-``` -aws ssm send-command --instance-ids --document-name "EKSLogCollector" --parameters "bucketName=" --output json -``` -3. To check the status of SSM command submitted in previous step use the command
-``` -aws ssm get-command-invocation --command-id "" --instance-id "" --output text -``` -    `SSM command ID`One of the response parameters after running `aws ssm send-command` in step2
-    `EC2 Instance ID`The EC2 Instance ID provided in the `aws ssm send-command` in step2 - -4. Once the above command is executed successfully, the logs should be present in the S3 bucket specified in the previous step. +### EKS Logs Collector +This project was created to collect Amazon EKS log files and OS logs for troubleshooting Amazon EKS customer support cases. diff --git a/log-collector-script/eks-log-collector.sh b/log-collector-script/eks-log-collector.sh deleted file mode 100644 index 44ae0d7d5..000000000 --- a/log-collector-script/eks-log-collector.sh +++ /dev/null @@ -1,570 +0,0 @@ -#!/usr/bin/env bash - -# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You may -# not use this file except in compliance with the License. A copy of the -# License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. -# -# This script generates a file in go with the license contents as a constant - -# Set language to C to make sorting consistent among different environments. - -export LANG="C" -export LC_ALL="C" - -# Global options -readonly PROGRAM_VERSION="0.5.1" -readonly PROGRAM_SOURCE="https://github.com/awslabs/amazon-eks-ami/blob/master/log-collector-script/" -readonly PROGRAM_NAME="$(basename "$0" .sh)" -readonly PROGRAM_DIR="/opt/log-collector" -readonly COLLECT_DIR="/tmp/${PROGRAM_NAME}" -readonly DAYS_10=$(date -d "-10 days" '+%Y-%m-%d %H:%M') -INSTANCE_ID="" -INIT_TYPE="" -PACKAGE_TYPE="" - -# Script run defaults -mode='collect' -ignore_introspection='false' -ignore_metrics='false' - -REQUIRED_UTILS=( - timeout - curl - tar - date - mkdir - iptables - iptables-save - grep - awk - df - sysctl -) - -COMMON_DIRECTORIES=( - kernel - system - docker - storage - var_log - networking - ipamd # eks - sysctls # eks - kubelet # eks - cni # eks -) - -COMMON_LOGS=( - syslog - messages - aws-routed-eni # eks - containers # eks - pods # eks - cloud-init.log - cloud-init-output.log - kube-proxy.log -) - -# L-IPAMD introspection data points -IPAMD_DATA=( - enis - pods - networkutils-env-settings - ipamd-env-settings - eni-configs -) - -help() { - echo "" - echo "USAGE: ${PROGRAM_NAME} --help [ --mode=collect|enable_debug --ignore_introspection=true|false --ignore_metrics=true|false ]" - echo "" - echo "OPTIONS:" - echo " --mode Has two parameters 1) collect or 2) enable_debug,:" - echo " collect Gathers basic operating system, Docker daemon, and" - echo " Amazon EKS related config files and logs. This is the default mode." - echo " enable_debug Enables debug mode for the Docker daemon(Not for production use)" - echo "" - echo " --ignore_introspection To ignore introspection of IPAMD; Pass this flag if DISABLE_INTROSPECTION is enabled on CNI" - echo "" - echo " --ignore_metrics Variable To ignore prometheus metrics collection; Pass this flag if DISABLE_METRICS enabled on CNI" - echo "" - echo " --help Show this help message." - echo "" -} - -parse_options() { - local count="$#" - - for i in $(seq "${count}"); do - eval arg="\$$i" - param="$(echo "${arg}" | awk -F '=' '{print $1}' | sed -e 's|--||')" - val="$(echo "${arg}" | awk -F '=' '{print $2}')" - - case "${param}" in - mode) - eval "${param}"="${val}" - ;; - ignore_introspection) - eval "${param}"="${val}" - ;; - ignore_metrics) - eval "${param}"="${val}" - ;; - help) - help && exit 0 - ;; - *) - echo "Parameter not found: '$param'" - help && exit 1 - ;; - esac - done -} - -ok() { - echo -} - -try() { - local action=$* - echo -n "Trying to $action... " -} - -warning() { - local reason=$* - echo -e "\n\n\tWarning: $reason " -} - -die() { - echo -e "\n\tFatal Error! $* Exiting!\n" - exit 1 -} - -is_root() { - if [[ "$(id -u)" -ne 0 ]]; then - die "This script must be run as root!" - fi -} - -check_required_utils() { - for utils in ${REQUIRED_UTILS[*]}; do - # if exit code of "command -v" not equal to 0, fail - if ! command -v "${utils}" >/dev/null 2>&1; then - die "Application \"${utils}\" is missing, please install \"${utils}\" as this script requires it, and will not function without it." - fi - done -} - -version_output() { - echo -e "\n\tThis is version ${PROGRAM_VERSION}. New versions can be found at ${PROGRAM_SOURCE}\n" -} - -log_parameters() { - echo mode: "${mode}" >> "${COLLECT_DIR}"/system/script-params.txt - echo ignore_introspection: "${ignore_introspection}" >> "${COLLECT_DIR}"/system/script-params.txt - echo ignore_metrics: "${ignore_metrics}" >> "${COLLECT_DIR}"/system/script-params.txt -} - -systemd_check() { - if command -v systemctl >/dev/null 2>&1; then - INIT_TYPE="systemd" - else - INIT_TYPE="other" - fi -} - -create_directories() { - # Make sure the directory the script lives in is there. Not an issue if - # the EKS AMI is used, as it will have it. - mkdir --parents "${PROGRAM_DIR}" - - # Common directors creation - for directory in ${COMMON_DIRECTORIES[*]}; do - mkdir --parents "${COLLECT_DIR}"/"${directory}" - done -} - -get_instance_metadata() { - readonly INSTANCE_ID=$(curl --max-time 3 --silent http://169.254.169.254/latest/meta-data/instance-id 2>/dev/null) - echo "${INSTANCE_ID}" > "${COLLECT_DIR}"/system/instance-id.txt -} - -is_diskfull() { - local threshold - local result - - # 1.5GB in KB - threshold=1500000 - result=$(df / | grep --invert-match "Filesystem" | awk '{ print $4 }') - - # If "result" is less than or equal to "threshold", fail. - if [[ "${result}" -le "${threshold}" ]]; then - die "Free space on root volume is less than or equal to $((threshold>>10))MB, please ensure adequate disk space to collect and store the log files." - fi -} - -cleanup() { - rm --recursive --force "${COLLECT_DIR}" >/dev/null 2>&1 -} - -init() { - check_required_utils - version_output - create_directories - # Log parameters passed when this script is invoked - log_parameters - is_root - systemd_check - get_pkgtype -} - -collect() { - init - is_diskfull - get_instance_metadata - get_common_logs - get_kernel_info - get_mounts_info - get_selinux_info - get_iptables_info - get_pkglist - get_system_services - get_docker_info - get_k8s_info - get_ipamd_info - get_sysctls_info - get_networking_info - get_cni_config - get_docker_logs -} - -enable_debug() { - init - enable_docker_debug -} - -pack() { - try "archive gathered information" - - tar --create --verbose --gzip --file "${PROGRAM_DIR}"/eks_"${INSTANCE_ID}"_"$(date --utc +%Y-%m-%d_%H%M-%Z)"_"${PROGRAM_VERSION}".tar.gz --directory="${COLLECT_DIR}" . > /dev/null 2>&1 - - ok -} - -finished() { - if [[ "${mode}" == "collect" ]]; then - cleanup - echo -e "\n\tDone... your bundled logs are located in ${PROGRAM_DIR}/eks_${INSTANCE_ID}_$(date --utc +%Y-%m-%d_%H%M-%Z)_${PROGRAM_VERSION}.tar.gz\n" - fi -} - -get_mounts_info() { - try "collect mount points and volume information" - mount > "${COLLECT_DIR}"/storage/mounts.txt - echo >> "${COLLECT_DIR}"/storage/mounts.txt - df --human-readable >> "${COLLECT_DIR}"/storage/mounts.txt - lsblk > "${COLLECT_DIR}"/storage/lsblk.txt - lvs > "${COLLECT_DIR}"/storage/lvs.txt - pvs > "${COLLECT_DIR}"/storage/pvs.txt - vgs > "${COLLECT_DIR}"/storage/vgs.txt - - ok -} - -get_selinux_info() { - try "collect SELinux status" - - if ! command -v getenforce >/dev/null 2>&1; then - echo -e "SELinux mode:\n\t Not installed" > "${COLLECT_DIR}"/system/selinux.txt - else - echo -e "SELinux mode:\n\t $(getenforce)" > "${COLLECT_DIR}"/system/selinux.txt - fi - - ok -} - -get_iptables_info() { - try "collect iptables information" - - iptables --wait 1 --numeric --verbose --list --table mangle > "${COLLECT_DIR}"/networking/iptables-mangle.txt - iptables --wait 1 --numeric --verbose --list --table filter > "${COLLECT_DIR}"/networking/iptables-filter.txt - iptables --wait 1 --numeric --verbose --list --table nat > "${COLLECT_DIR}"/networking/iptables-nat.txt - iptables --wait 1 --numeric --verbose --list > "${COLLECT_DIR}"/networking/iptables.txt - iptables-save > "${COLLECT_DIR}"/networking/iptables-save.txt - - ok -} - -get_common_logs() { - try "collect common operating system logs" - - for entry in ${COMMON_LOGS[*]}; do - if [[ -e "/var/log/${entry}" ]]; then - if [[ "${entry}" == "messages" ]]; then - tail -c 10M /var/log/messages > "${COLLECT_DIR}"/var_log/messages - continue - fi - cp --force --recursive --dereference /var/log/"${entry}" "${COLLECT_DIR}"/var_log/ - fi - done - - ok -} - -get_kernel_info() { - try "collect kernel logs" - - if [[ -e "/var/log/dmesg" ]]; then - cp --force /var/log/dmesg "${COLLECT_DIR}/kernel/dmesg.boot" - fi - dmesg > "${COLLECT_DIR}/kernel/dmesg.current" - dmesg --ctime > "${COLLECT_DIR}/kernel/dmesg.human.current" - uname -a > "${COLLECT_DIR}/kernel/uname.txt" - - ok -} - -get_docker_logs() { - try "collect Docker daemon logs" - - case "${INIT_TYPE}" in - systemd) - journalctl --unit=docker --since "${DAYS_10}" > "${COLLECT_DIR}"/docker/docker.log - ;; - other) - for entry in docker upstart/docker; do - if [[ -e "/var/log/${entry}" ]]; then - cp --force --recursive --dereference /var/log/"${entry}" "${COLLECT_DIR}"/docker/ - fi - done - ;; - *) - warning "The current operating system is not supported." - ;; - esac - - ok -} - -get_k8s_info() { - try "collect kubelet information" - - if [[ -n "${KUBECONFIG:-}" ]]; then - command -v kubectl > /dev/null && kubectl get --kubeconfig=${KUBECONFIG} svc > "${COLLECT_DIR}"/kubelet/svc.log - kubectl --kubeconfig=${KUBECONFIG} config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml - - elif [[ -f /etc/eksctl/kubeconfig.yaml ]]; then - KUBECONFIG="/etc/eksctl/kubeconfig.yaml" - command -v kubectl > /dev/null && kubectl get --kubeconfig=${KUBECONFIG} svc > "${COLLECT_DIR}"/kubelet/svc.log - kubectl --kubeconfig=${KUBECONFIG} config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml - - elif [[ -f /etc/systemd/system/kubelet.service ]]; then - KUBECONFIG=`grep kubeconfig /etc/systemd/system/kubelet.service | awk '{print $2}'` - command -v kubectl > /dev/null && kubectl get --kubeconfig=${KUBECONFIG} svc > "${COLLECT_DIR}"/kubelet/svc.log - kubectl --kubeconfig=${KUBECONFIG} config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml - - else - echo "======== Unable to find KUBECONFIG, IGNORING POD DATA =========" >> "${COLLECT_DIR}"/kubelet/svc.log - fi - - case "${INIT_TYPE}" in - systemd) - timeout 75 journalctl --unit=kubelet --since "${DAYS_10}" > "${COLLECT_DIR}"/kubelet/kubelet.log - timeout 75 journalctl --unit=kubeproxy --since "${DAYS_10}" > "${COLLECT_DIR}"/kubelet/kubeproxy.log - - for entry in kubelet kube-proxy; do - systemctl cat "${entry}" > "${COLLECT_DIR}"/kubelet/"${entry}"_service.txt 2>&1 - done - ;; - *) - warning "The current operating system is not supported." - ;; - esac - - ok -} - -get_ipamd_info() { - if [[ "${ignore_introspection}" == "false" ]]; then - try "collect L-IPAMD introspectioon information" - for entry in ${IPAMD_DATA[*]}; do - curl --max-time 3 --silent http://localhost:61679/v1/"${entry}" >> "${COLLECT_DIR}"/ipamd/"${entry}".txt - done - else - echo "Ignoring IPAM introspection stats as mentioned"| tee -a "${COLLECT_DIR}"/ipamd/ipam_introspection_ignore.txt - - fi - - if [[ "${ignore_metrics}" == "false" ]]; then - try "collect L-IPAMD prometheus metrics" - curl --max-time 3 --silent http://localhost:61678/metrics > "${COLLECT_DIR}"/ipamd/metrics.txt 2>&1 - else - echo "Ignoring Prometheus Metrics collection as mentioned"| tee -a "${COLLECT_DIR}"/ipamd/ipam_metrics_ignore.txt - fi - - ok -} - -get_sysctls_info() { - try "collect sysctls information" - # dump all sysctls - sysctl --all >> "${COLLECT_DIR}"/sysctls/sysctl_all.txt 2>/dev/null - - ok -} - -get_networking_info() { - try "collect networking infomation" - - # ifconfig - timeout 75 ifconfig > "${COLLECT_DIR}"/networking/ifconfig.txt - - # ip rule show - timeout 75 ip rule show > "${COLLECT_DIR}"/networking/iprule.txt - timeout 75 ip route show table all >> "${COLLECT_DIR}"/networking/iproute.txt - - ok -} - -get_cni_config() { - try "collect CNI configuration information" - - if [[ -e "/etc/cni/net.d/" ]]; then - cp --force --recursive --dereference /etc/cni/net.d/* "${COLLECT_DIR}"/cni/ - fi - - ok -} - -get_pkgtype() { - if [[ "$(command -v rpm )" ]]; then - PACKAGE_TYPE=rpm - elif [[ "$(command -v deb )" ]]; then - PACKAGE_TYPE=deb - else - PACKAGE_TYPE='unknown' - fi -} - -get_pkglist() { - try "collect installed packages" - - case "${PACKAGE_TYPE}" in - rpm) - rpm -qa > "${COLLECT_DIR}"/system/pkglist.txt 2>&1 - ;; - deb) - dpkg --list > "${COLLECT_DIR}"/system/pkglist.txt 2>&1 - ;; - *) - warning "Unknown package type." - ;; - esac - - ok -} - -get_system_services() { - try "collect active system services" - - case "${INIT_TYPE}" in - systemd) - systemctl list-units > "${COLLECT_DIR}"/system/services.txt 2>&1 - ;; - other) - initctl list | awk '{ print $1 }' | xargs -n1 initctl show-config > "${COLLECT_DIR}"/system/services.txt 2>&1 - printf "\n\n\n\n" >> "${COLLECT_DIR}"/system/services.txt 2>&1 - service --status-all >> "${COLLECT_DIR}"/system/services.txt 2>&1 - ;; - *) - warning "Unable to determine active services." - ;; - esac - - timeout 75 top -b -n 1 > "${COLLECT_DIR}"/system/top.txt 2>&1 - timeout 75 ps fauxwww > "${COLLECT_DIR}"/system/ps.txt 2>&1 - timeout 75 netstat -plant > "${COLLECT_DIR}"/system/netstat.txt 2>&1 - - ok -} - -get_docker_info() { - try "collect Docker daemon information" - - if [[ "$(pgrep dockerd)" -ne 0 ]]; then - timeout 75 docker info > "${COLLECT_DIR}"/docker/docker-info.txt 2>&1 || echo -e "\tTimed out, ignoring \"docker info output \" " - timeout 75 docker ps --all --no-trunc > "${COLLECT_DIR}"/docker/docker-ps.txt 2>&1 || echo -e "\tTimed out, ignoring \"docker ps --all --no-truc output \" " - timeout 75 docker images > "${COLLECT_DIR}"/docker/docker-images.txt 2>&1 || echo -e "\tTimed out, ignoring \"docker images output \" " - timeout 75 docker version > "${COLLECT_DIR}"/docker/docker-version.txt 2>&1 || echo -e "\tTimed out, ignoring \"docker version output \" " - else - warning "The Docker daemon is not running." - fi - - ok -} - -enable_docker_debug() { - try "enable debug mode for the Docker daemon" - - case "${PACKAGE_TYPE}" in - rpm) - - if [[ -e /etc/sysconfig/docker ]] && grep -q "^\s*OPTIONS=\"-D" /etc/sysconfig/docker - then - echo "Debug mode is already enabled." - ok - else - if [[ -e /etc/sysconfig/docker ]]; then - echo "OPTIONS=\"-D \$OPTIONS\"" >> /etc/sysconfig/docker - - try "restart Docker daemon to enable debug mode" - service docker restart - ok - fi - fi - ;; - *) - warning "The current operating system is not supported." - - ok - ;; - esac -} - -confirm_enable_docker_debug() { - read -r -p "${1:-Enabled Docker Debug will restart the Docker Daemon and restart all running container. Are you sure? [y/N]} " USER_INPUT - case "$USER_INPUT" in - [yY][eE][sS]|[yY]) - enable_docker_debug - ;; - *) - die "\"No\" was selected." - ;; - esac -} - -parse_options "$@" - -case "${mode}" in - collect) - collect - pack - finished - ;; - enable_debug) - confirm_enable_docker_debug - finished - ;; - *) - help && exit 1 - ;; -esac diff --git a/log-collector-script/eks-ssm-content.json b/log-collector-script/eks-ssm-content.json deleted file mode 100644 index 8c237a30f..000000000 --- a/log-collector-script/eks-ssm-content.json +++ /dev/null @@ -1,44 +0,0 @@ -{ - "schemaVersion":"2.2", - "description":"EKS Log Collector", - "parameters":{ - "bucketName":{ - "type": "String", - "default": "Enabled" - } - }, - "mainSteps":[ - { - "action":"aws:runShellScript", - "name":"PatchLinux", - "precondition":{ - "StringEquals":[ - "platformType", - "Linux" - ] - }, - "inputs":{ - "runCommand":[ - "curl -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/eks-log-collector.sh", - "bash ./eks-log-collector.sh >/dev/null 2>&1", - "echo \"EKS logs collected\"", - "if [ -f /usr/local/bin/aws ]; then", - "echo \"AWS_already_installed\"", - "else", - "echo \"Installing AWSCLI\"", - "curl \"https://s3.amazonaws.com/aws-cli/awscli-bundle.zip\" -o \"awscli-bundle.zip\" >/dev/null 2>&1", - "yum install unzip -y >/dev/null 2>&1", - "unzip awscli-bundle.zip >/dev/null 2>&1", - "./awscli-bundle/install -i /usr/local/aws -b /usr/local/bin/aws", - "echo \"AWSCLI version is\"", - "/usr/local/bin/aws --version", - "fi", - "echo \"Pushing to S3\"", - "/usr/local/bin/aws s3 cp --recursive /opt/log-collector/ s3://{{bucketName}}", - "echo \"Logs uploaded to S3\"" - ] - } - } - ] -} - diff --git a/log-collector-script/linux/README.md b/log-collector-script/linux/README.md new file mode 100644 index 000000000..9bdad98bd --- /dev/null +++ b/log-collector-script/linux/README.md @@ -0,0 +1,140 @@ +### EKS Logs Collector (Linux) + +This project was created to collect Amazon EKS log files and OS logs for troubleshooting Amazon EKS customer support cases. + +#### Usage + +At a high level, you run this script on your Kubernetes node, and it will collect system information, configuration and logs that will assist in troubleshooting issues with your node. AWS support and service team engineers can use this information once provided via a customer support case. + +* Collect EKS logs using SSM agent, jump to below [section](#collect-eks-logs-using-ssm-agent) _(or)_ + +* Run this project as the root user + +``` +curl -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-log-collector.sh +sudo bash eks-log-collector.sh +``` + +Confirm if the tarball file was successfully created (it can be .tgz or .tar.gz) + +#### Retrieving the logs + +Download the tarball using your favorite Secure Copy tool. + +#### Example output + +The project can be used in normal or enable_debug (**Caution: enable_debug will prompt to confirm if we can restart Docker daemon which would kill running containers**). + +``` +$ sudo bash eks-log-collector.sh --help + +USAGE: eks-log-collector --help [ --ignore_introspection=true|false --ignore_metrics=true|false ] + +OPTIONS: + + --ignore_introspection To ignore introspection of IPAMD; Pass this flag if DISABLE_INTROSPECTION is enabled on CNI + + --ignore_metrics Variable To ignore prometheus metrics collection; Pass this flag if DISABLE_METRICS enabled on CNI + + --help Show this help message. +``` + +#### Example output in normal mode + +The following output shows this project running in normal mode. + +``` +$ sudo bash eks-log-collector.sh + + This is version 0.7.3. New versions can be found at https://github.com/awslabs/amazon-eks-ami/blob/master/log-collector-script/ + +Trying to collect common operating system logs... +Trying to collect kernel logs... +Trying to collect mount points and volume information... +Trying to collect SELinux status... +Trying to collect iptables information... +Trying to collect installed packages... +Trying to collect active system services... +Trying to Collect Containerd daemon information... +Trying to Collect Containerd running information... +Trying to Collect Docker daemon information... + + Warning: The Docker daemon is not running. + +Trying to collect kubelet information... +Trying to collect L-IPAMD introspection information... Trying to collect L-IPAMD prometheus metrics... Trying to collect L-IPAMD checkpoint... +Trying to collect Multus logs if they exist... +Trying to collect sysctls information... +Trying to collect networking infomation... conntrack v1.4.4 (conntrack-tools): 165 flow entries have been shown. + +Trying to collect CNI configuration information... +Trying to collect Docker daemon logs... +Trying to Collect sandbox-image daemon information... +Trying to Collect CPU Throttled Process Information... +Trying to Collect IO Throttled Process Information... +Trying to archive gathered information... + + Done... your bundled logs are located in /var/log/eks_i-XXXXXXXXXXXXXXXXX_2022-12-19_1639-UTC_0.7.3.tar.gz +``` + +### Collect EKS logs using SSM agent + +#### To run EKS log collector script on Worker Node(s) and upload the bundle(tar) to a S3 Bucket using SSM agent, please follow below steps + +##### Prerequisites + +* Configure AWS CLI on the system where you will run the below commands. The IAM entity (User/Role) should have permissions to run/invoke `aws ssm create-document`, `aws ssm send-command` and `aws ssm get-command-invocation` commands. + + * `ssm:CreateDocument` + * `ssm:GetCommandInvocation` + * `ssm:SendCommand` + +* SSM agent should be installed and running on Worker Node(s). [How to Install SSM Agent link](https://docs.aws.amazon.com/systems-manager/latest/userguide/sysman-manual-agent-install.html) + +* Worker Node(s) should have required permissions to communicate with SSM service. IAM managed role `AmazonSSMManagedInstanceCore` will have all the required permission for SSM agent to run on EC2 instances. The IAM managed role `AmazonSSMManagedInstanceCore` has `S3:PutObject` permission to all S3 resources. + +*Note:* For more granular control of the IAM permission check [Actions defined by AWS Systems Manager](https://docs.aws.amazon.com/IAM/latest/UserGuide/list_awssystemsmanager.html%23awssystemsmanager-actions-as-permissions) + +* A S3 bucket location is required which is taken as an input parameter to `aws ssm send-command` command, to which the logs should be pushed. + +#### To invoke SSM agent to run EKS log collector script and push bundle to S3 from Worker Node(s) + +1. Create the SSM document named "EKSLogCollector" using the following commands: + +``` +curl -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-ssm-content.json +aws ssm create-document \ + --name "EKSLogCollectorLinux" \ + --document-type "Command" \ + --content file://eks-ssm-content.json +``` + +2. To execute the bash script in the SSM document and to collect the logs from worker, run the following command: + +``` +aws ssm send-command \ + --instance-ids \ + --document-name "EKSLogCollectorLinux" \ + --parameters "bucketName=" \ + --output json +``` + +3. To check the status of SSM command submitted in previous step use the command + +``` +aws ssm get-command-invocation \ + --command-id "" \ + --instance-id "" \ + --output text +``` + +4. Once the above command is executed successfully, the logs should be present in the S3 bucket specified in the previous step. + +### Collect User Data + +If collecting user data is required as apart of troubleshooting please use the commands below to retrieve data via IMDSv2: + +``` +TOKEN=`curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600"` \ +&& curl -H "X-aws-ec2-metadata-token: $TOKEN" -v http://169.254.169.254/latest/user-data +``` diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh new file mode 100644 index 000000000..72f6caf22 --- /dev/null +++ b/log-collector-script/linux/eks-log-collector.sh @@ -0,0 +1,780 @@ +#!/usr/bin/env bash +# Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may +# not use this file except in compliance with the License. A copy of the +# License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is distributed +# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# This script generates a file in go with the license contents as a constant + +# Set language to C to make sorting consistent among different environments. + +export LANG="C" +export LC_ALL="C" + +# Global options +readonly PROGRAM_VERSION="0.7.6" +readonly PROGRAM_SOURCE="https://github.com/awslabs/amazon-eks-ami/blob/master/log-collector-script/" +readonly PROGRAM_NAME="$(basename "$0" .sh)" +readonly PROGRAM_DIR="/opt/log-collector" +readonly LOG_DIR="/var/log" +readonly COLLECT_DIR="/tmp/eks-log-collector" +readonly CURRENT_TIME=$(date --utc +%Y-%m-%d_%H%M-%Z) +readonly DAYS_10=$(date -d "-10 days" '+%Y-%m-%d %H:%M') +INSTANCE_ID="" +INIT_TYPE="" +PACKAGE_TYPE="" + +# Script run defaults +ignore_introspection='false' +ignore_metrics='false' + +REQUIRED_UTILS=( + timeout + curl + tar + date + mkdir + grep + awk + df + sysctl +) + +COMMON_DIRECTORIES=( + kernel + modinfo + system + docker + containerd + storage + var_log + networking + sandbox-image # eks + ipamd # eks + sysctls # eks + kubelet # eks + cni # eks +) + +COMMON_LOGS=( + syslog + messages + aws-routed-eni # eks + containers # eks + pods # eks + cloud-init.log + cloud-init-output.log + user-data.log + kube-proxy.log +) + +# L-IPAMD introspection data points +IPAMD_DATA=( + enis + pods + networkutils-env-settings + ipamd-env-settings + eni-configs +) + +help() { + echo "" + echo "USAGE: ${PROGRAM_NAME} --help [ --ignore_introspection=true|false --ignore_metrics=true|false ]" + echo "" + echo "OPTIONS:" + echo "" + echo " --ignore_introspection To ignore introspection of IPAMD; Pass this flag if DISABLE_INTROSPECTION is enabled on CNI" + echo "" + echo " --ignore_metrics Variable To ignore prometheus metrics collection; Pass this flag if DISABLE_METRICS enabled on CNI" + echo "" + echo " --help Show this help message." + echo "" +} + +parse_options() { + local count="$#" + + for i in $(seq "${count}"); do + eval arg="\$$i" + param="$(echo "${arg}" | awk -F '=' '{print $1}' | sed -e 's|--||')" + val="$(echo "${arg}" | awk -F '=' '{print $2}')" + + case "${param}" in + ignore_introspection) + eval "${param}"="${val}" + ;; + ignore_metrics) + eval "${param}"="${val}" + ;; + help) + help && exit 0 + ;; + *) + echo "Parameter not found: '$param'" + help && exit 1 + ;; + esac + done +} + +ok() { + echo +} + +try() { + local action=$* + echo -n "Trying to $action... " +} + +warning() { + local reason=$* + echo -e "\n\n\tWarning: $reason " +} + +die() { + echo -e "\n\tFatal Error! $* Exiting!\n" + exit 1 +} + +is_root() { + if [[ "$(id -u)" -ne 0 ]]; then + die "This script must be run as root!" + fi +} + +check_required_utils() { + for utils in ${REQUIRED_UTILS[*]}; do + # If exit code of "command -v" not equal to 0, fail + if ! command -v "${utils}" > /dev/null 2>&1; then + echo -e "\nApplication \"${utils}\" is missing, please install \"${utils}\" as this script requires it." + fi + done +} + +version_output() { + echo -e "\n\tThis is version ${PROGRAM_VERSION}. New versions can be found at ${PROGRAM_SOURCE}\n" +} + +log_parameters() { + echo ignore_introspection: "${ignore_introspection}" >> "${COLLECT_DIR}"/system/script-params.txt + echo ignore_metrics: "${ignore_metrics}" >> "${COLLECT_DIR}"/system/script-params.txt +} + +systemd_check() { + if command -v systemctl > /dev/null 2>&1; then + INIT_TYPE="systemd" + if command -v snap > /dev/null 2>&1; then + INIT_TYPE="snap" + fi + else + INIT_TYPE="other" + fi +} + +# Get token for IMDSv2 calls +IMDS_TOKEN=$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 360") + +create_directories() { + # Make sure the directory the script lives in is there. Not an issue if + # the EKS AMI is used, as it will have it. + mkdir -p "${PROGRAM_DIR}" + + # Common directories creation + for directory in ${COMMON_DIRECTORIES[*]}; do + mkdir -p "${COLLECT_DIR}"/"${directory}" + done +} + +get_instance_id() { + INSTANCE_ID_FILE="/var/lib/cloud/data/instance-id" + + if grep -q '^i-' "$INSTANCE_ID_FILE"; then + cp ${INSTANCE_ID_FILE} "${COLLECT_DIR}"/system/instance-id.txt + readonly INSTANCE_ID=$(cat "${COLLECT_DIR}"/system/instance-id.txt) + else + readonly INSTANCE_ID=$(curl -H "X-aws-ec2-metadata-token: $IMDS_TOKEN" -f -s --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/instance-id) + if [ 0 -eq $? ]; then # Check if previous command was successful. + echo "${INSTANCE_ID}" > "${COLLECT_DIR}"/system/instance-id.txt + else + warning "Unable to find EC2 Instance Id. Skipped Instance Id." + fi + fi +} + +get_region() { + if REGION=$(curl -H "X-aws-ec2-metadata-token: $IMDS_TOKEN" -f -s --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/placement/region); then + echo "${REGION}" > "${COLLECT_DIR}"/system/region.txt + else + warning "Unable to find EC2 Region, skipping." + fi + + if AZ=$(curl -H "X-aws-ec2-metadata-token: $IMDS_TOKEN" -f -s --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/placement/availability-zone); then + echo "${AZ}" > "${COLLECT_DIR}"/system/availability-zone.txt + else + warning "Unable to find EC2 AZ, skipping." + fi +} + +is_diskfull() { + local threshold + local result + + # 1.5GB in KB + threshold=1500000 + result=$(timeout 75 df / | grep --invert-match "Filesystem" | awk '{ print $4 }') + + # If "result" is less than or equal to "threshold", fail. + if [[ "${result}" -le "${threshold}" ]]; then + die "Free space on root volume is less than or equal to $((threshold >> 10))MB, please ensure adequate disk space to collect and store the log files." + fi +} + +cleanup() { + #guard rails to avoid accidental deletion of unknown data + if [[ "${COLLECT_DIR}" == "/tmp/eks-log-collector" ]]; then + rm --recursive --force "${COLLECT_DIR}" > /dev/null 2>&1 + else + echo "Unable to Cleanup as {COLLECT_DIR} variable is modified. Please cleanup manually!" + fi +} + +init() { + check_required_utils + version_output + create_directories + # Log parameters passed when this script is invoked + log_parameters + is_root + systemd_check + get_pkgtype +} + +collect() { + init + is_diskfull + get_instance_id + get_region + get_common_logs + get_kernel_info + get_modinfo + get_mounts_info + get_selinux_info + get_iptables_info + get_iptables_legacy_info + get_pkglist + get_system_services + get_containerd_info + get_docker_info + get_k8s_info + get_ipamd_info + get_multus_info + get_sysctls_info + get_networking_info + get_cni_config + get_cni_configuration_variables + get_network_policy_ebpf_info + get_docker_logs + get_sandboxImage_info + get_cpu_throttled_processes + get_io_throttled_processes +} + +pack() { + try "archive gathered information" + + tar --create --verbose --gzip --file "${LOG_DIR}"/eks_"${INSTANCE_ID}"_"${CURRENT_TIME}"_"${PROGRAM_VERSION}".tar.gz --directory="${COLLECT_DIR}" . > /dev/null 2>&1 + + ok +} + +finished() { + cleanup + echo -e "\n\tDone... your bundled logs are located in ${LOG_DIR}/eks_${INSTANCE_ID}_${CURRENT_TIME}_${PROGRAM_VERSION}.tar.gz\n" +} + +get_mounts_info() { + try "collect mount points and volume information" + mount > "${COLLECT_DIR}"/storage/mounts.txt + echo >> "${COLLECT_DIR}"/storage/mounts.txt + timeout 75 df --human-readable >> "${COLLECT_DIR}"/storage/mounts.txt + timeout 75 df --inodes >> "${COLLECT_DIR}"/storage/inodes.txt + lsblk > "${COLLECT_DIR}"/storage/lsblk.txt + lvs > "${COLLECT_DIR}"/storage/lvs.txt + pvs > "${COLLECT_DIR}"/storage/pvs.txt + vgs > "${COLLECT_DIR}"/storage/vgs.txt + mount -t xfs | awk '{print $1}' | xargs -I{} -- sh -c "xfs_info {}; xfs_db -r -c 'freesp -s' {}" > "${COLLECT_DIR}"/storage/xfs.txt + mount | grep ^overlay | sed 's/.*upperdir=//' | sed 's/,.*//' | xargs -n 1 timeout 75 du -sh | grep -v ^0 > "${COLLECT_DIR}"/storage/pod_local_storage.txt + ok +} + +get_selinux_info() { + try "collect SELinux status" + + if ! command -v getenforce > /dev/null 2>&1; then + echo -e "SELinux mode:\n\t Not installed" > "${COLLECT_DIR}"/system/selinux.txt + else + echo -e "SELinux mode:\n\t $(getenforce)" > "${COLLECT_DIR}"/system/selinux.txt + fi + + ok +} + +get_iptables_info() { + if ! command -v iptables > /dev/null 2>&1; then + echo "IPtables not installed" | tee -a "${COLLECT_DIR}"/iptables.txt + else + try "collect iptables information" + iptables --wait 1 --numeric --verbose --list --table mangle | tee "${COLLECT_DIR}"/networking/iptables-mangle.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables-mangle.txt + iptables --wait 1 --numeric --verbose --list --table filter | tee "${COLLECT_DIR}"/networking/iptables-filter.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables-filter.txt + iptables --wait 1 --numeric --verbose --list --table nat | tee "${COLLECT_DIR}"/networking/iptables-nat.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables-nat.txt + iptables --wait 1 --numeric --verbose --list | tee "${COLLECT_DIR}"/networking/iptables.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables.txt + iptables-save > "${COLLECT_DIR}"/networking/iptables-save.txt + fi + + ok +} + +get_iptables_legacy_info() { + if ! command -v iptables-legacy > /dev/null 2>&1; then + echo "IPtables-legacy not installed" | tee -a "${COLLECT_DIR}"/iptables-legacy.txt + else + try "collect iptables-legacy information" + iptables-legacy --wait 1 --numeric --verbose --list --table mangle | tee "${COLLECT_DIR}"/networking/iptables-legacy-mangle.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables-legacy-mangle.txt + iptables-legacy --wait 1 --numeric --verbose --list --table filter | tee "${COLLECT_DIR}"/networking/iptables-legacy-filter.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables-legacy-filter.txt + iptables-legacy --wait 1 --numeric --verbose --list --table nat | tee "${COLLECT_DIR}"/networking/iptables-legacy-nat.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables-legacy-nat.txt + iptables-legacy --wait 1 --numeric --verbose --list | tee "${COLLECT_DIR}"/networking/iptables-legacy.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables-legacy.txt + iptables-legacy-save > "${COLLECT_DIR}"/networking/iptables-legacy-save.txt + fi + + ok +} + +get_common_logs() { + try "collect common operating system logs" + + for entry in ${COMMON_LOGS[*]}; do + if [[ -e "/var/log/${entry}" ]]; then + if [[ "${entry}" == "messages" ]]; then + tail -c 100M /var/log/messages > "${COLLECT_DIR}"/var_log/messages + continue + fi + if [[ "${entry}" == "containers" ]]; then + cp --force --dereference --recursive /var/log/containers/aws-node* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/kube-system_cni-metrics-helper* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/coredns-* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/kube-proxy* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/ebs-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/efs-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/fsx-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/fsx-openzfs-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/file-cache-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/eks-pod-identity-agent* "${COLLECT_DIR}"/var_log/ 2> /dev/null + continue + fi + if [[ "${entry}" == "pods" ]]; then + cp --force --dereference --recursive /var/log/pods/kube-system_aws-node* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_cni-metrics-helper* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_coredns* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_kube-proxy* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_ebs-csi-* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_efs-csi-* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_fsx-csi-* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_fsx-openzfs-csi-* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_file-cache-csi-* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_eks-pod-identity-agent* "${COLLECT_DIR}"/var_log/ 2> /dev/null + continue + fi + cp --force --recursive --dereference /var/log/"${entry}" "${COLLECT_DIR}"/var_log/ 2> /dev/null + fi + done + + ok +} + +get_kernel_info() { + try "collect kernel logs" + + if [[ -e "/var/log/dmesg" ]]; then + cp --force /var/log/dmesg "${COLLECT_DIR}/kernel/dmesg.boot" + fi + dmesg > "${COLLECT_DIR}/kernel/dmesg.current" + dmesg --ctime > "${COLLECT_DIR}/kernel/dmesg.human.current" + uname -a > "${COLLECT_DIR}/kernel/uname.txt" + + ok +} + +# collect modinfo on specific modules for debugging purposes +get_modinfo() { + try "collect modinfo" + modinfo lustre > "${COLLECT_DIR}/modinfo/lustre" + + ok +} + +get_docker_logs() { + try "collect Docker daemon logs" + + case "${INIT_TYPE}" in + systemd | snap) + journalctl --unit=docker --since "${DAYS_10}" > "${COLLECT_DIR}"/docker/docker.log + ;; + other) + for entry in docker upstart/docker; do + if [[ -e "/var/log/${entry}" ]]; then + cp --force --recursive --dereference /var/log/"${entry}" "${COLLECT_DIR}"/docker/ + fi + done + ;; + *) + warning "The current operating system is not supported." + ;; + esac + + ok +} + +get_k8s_info() { + try "collect kubelet information" + + if [[ -n "${KUBECONFIG:-}" ]]; then + command -v kubectl > /dev/null && kubectl get --kubeconfig="${KUBECONFIG}" svc > "${COLLECT_DIR}"/kubelet/svc.log + command -v kubectl > /dev/null && kubectl --kubeconfig="${KUBECONFIG}" config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + + elif [[ -f /etc/eksctl/kubeconfig.yaml ]]; then + KUBECONFIG="/etc/eksctl/kubeconfig.yaml" + command -v kubectl > /dev/null && kubectl get --kubeconfig="${KUBECONFIG}" svc > "${COLLECT_DIR}"/kubelet/svc.log + command -v kubectl > /dev/null && kubectl --kubeconfig="${KUBECONFIG}" config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + + elif [[ -f /etc/systemd/system/kubelet.service ]]; then + KUBECONFIG=$(grep kubeconfig /etc/systemd/system/kubelet.service | awk '{print $2}') + command -v kubectl > /dev/null && kubectl get --kubeconfig="${KUBECONFIG}" svc > "${COLLECT_DIR}"/kubelet/svc.log + command -v kubectl > /dev/null && kubectl --kubeconfig="${KUBECONFIG}" config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + + elif [[ -f /var/lib/kubelet/kubeconfig ]]; then + KUBECONFIG="/var/lib/kubelet/kubeconfig" + command -v kubectl > /dev/null && kubectl get --kubeconfig=${KUBECONFIG} svc > "${COLLECT_DIR}"/kubelet/svc.log + command -v kubectl > /dev/null && kubectl --kubeconfig=${KUBECONFIG} config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + + else + echo "======== Unable to find KUBECONFIG, IGNORING POD DATA =========" >> "${COLLECT_DIR}"/kubelet/svc.log + fi + + # Try to copy the kubeconfig file if kubectl command doesn't exist + [[ (! -f "${COLLECT_DIR}/kubelet/kubeconfig.yaml") && (-n ${KUBECONFIG}) ]] && cp ${KUBECONFIG} "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + + case "${INIT_TYPE}" in + systemd) + timeout 75 journalctl --unit=kubelet --since "${DAYS_10}" > "${COLLECT_DIR}"/kubelet/kubelet.log + + systemctl cat kubelet > "${COLLECT_DIR}"/kubelet/kubelet_service.txt 2>&1 + ;; + snap) + timeout 75 snap logs kubelet-eks -n all > "${COLLECT_DIR}"/kubelet/kubelet.log + + timeout 75 snap get kubelet-eks > "${COLLECT_DIR}"/kubelet/kubelet-eks_service.txt 2>&1 + ;; + *) + warning "The current operating system is not supported." + ;; + esac + + ok +} + +get_ipamd_info() { + if [[ "${ignore_introspection}" == "false" ]]; then + try "collect L-IPAMD introspection information" + for entry in ${IPAMD_DATA[*]}; do + curl --max-time 3 --silent http://localhost:61679/v1/"${entry}" >> "${COLLECT_DIR}"/ipamd/"${entry}".json + done + else + echo "Ignoring IPAM introspection stats as mentioned" | tee -a "${COLLECT_DIR}"/ipamd/ipam_introspection_ignore.txt + fi + + ok + + if [[ "${ignore_metrics}" == "false" ]]; then + try "collect L-IPAMD prometheus metrics" + curl --max-time 3 --silent http://localhost:61678/metrics > "${COLLECT_DIR}"/ipamd/metrics.json 2>&1 + else + echo "Ignoring Prometheus Metrics collection as mentioned" | tee -a "${COLLECT_DIR}"/ipamd/ipam_metrics_ignore.txt + fi + + ok + + try "collect L-IPAMD checkpoint" + cp /var/run/aws-node/ipam.json "${COLLECT_DIR}"/ipamd/ipam.json + + ok +} + +get_multus_info() { + try "collect Multus logs if they exist" + cp --force --dereference --recursive /var/log/pods/kube-system_kube-multus* "${COLLECT_DIR}"/var_log/ 2> /dev/null + + ok +} + +get_sysctls_info() { + try "collect sysctls information" + # dump all sysctls + sysctl --all >> "${COLLECT_DIR}"/sysctls/sysctl_all.txt 2> /dev/null + + ok +} + +get_network_policy_ebpf_info() { + try "collect network policy ebpf loaded data" + echo "*** EBPF loaded data ***" >> "${COLLECT_DIR}"/networking/ebpf-data.txt + LOADED_EBPF=$(/opt/cni/bin/aws-eks-na-cli ebpf loaded-ebpfdata | tee -a "${COLLECT_DIR}"/networking/ebpf-data.txt) + + for mapid in $(echo "$LOADED_EBPF" | grep "Map ID:" | sed 's/Map ID: \+//' | sort | uniq); do + echo "*** EBPF Maps Data for Map ID $mapid ***" >> "${COLLECT_DIR}"/networking/ebpf-maps-data.txt + /opt/cni/bin/aws-eks-na-cli ebpf dump-maps $mapid >> "${COLLECT_DIR}"/networking/ebpf-maps-data.txt + done + ok +} + +get_networking_info() { + try "collect networking infomation" + + # conntrack info + echo "*** Output of conntrack -S *** " >> "${COLLECT_DIR}"/networking/conntrack.txt + timeout 75 conntrack -S >> "${COLLECT_DIR}"/networking/conntrack.txt + echo "*** Output of conntrack -L ***" >> "${COLLECT_DIR}"/networking/conntrack.txt + timeout 75 conntrack -L >> "${COLLECT_DIR}"/networking/conntrack.txt + + # ifconfig + timeout 75 ifconfig > "${COLLECT_DIR}"/networking/ifconfig.txt + + # ip rule show + timeout 75 ip rule show > "${COLLECT_DIR}"/networking/iprule.txt + timeout 75 ip route show table all >> "${COLLECT_DIR}"/networking/iproute.txt + + # configure-multicard-interfaces + timeout 75 journalctl -u configure-multicard-interfaces > "${COLLECT_DIR}"/networking/configure-multicard-interfaces.txt || echo -e "\tTimed out, ignoring \"configure-multicard-interfaces unit output \" " + + # test some network connectivity + timeout 75 ping -A -c 10 amazon.com > "${COLLECT_DIR}"/networking/ping_amazon.com.txt + timeout 75 ping -A -c 10 public.ecr.aws > "${COLLECT_DIR}"/networking/ping_public.ecr.aws.txt + + if [[ -e "${COLLECT_DIR}"/kubelet/kubeconfig.yaml ]]; then + API_SERVER=$(grep server: "${COLLECT_DIR}"/kubelet/kubeconfig.yaml | sed 's/.*server: //') + CA_CRT=$(grep certificate-authority: "${COLLECT_DIR}"/kubelet/kubeconfig.yaml | sed 's/.*certificate-authority: //') + for i in $(seq 5); do + echo -e "curling ${API_SERVER} ($i of 5) $(date --utc +%FT%T.%3N%Z)\n\n" >> ${COLLECT_DIR}"/networking/curl_api_server.txt" + timeout 75 curl -v --connect-timeout 3 --max-time 10 --noproxy '*' --cacert "${CA_CRT}" "${API_SERVER}"/livez?verbose >> ${COLLECT_DIR}"/networking/curl_api_server.txt" 2>&1 + done + fi + + cp /etc/resolv.conf "${COLLECT_DIR}"/networking/resolv.conf + + # collect ethtool -S for all interfaces + INTERFACES=$(ip -o a | awk '{print $2}' | sort -n | uniq) + for ifc in ${INTERFACES}; do + echo "Interface ${ifc}" >> "${COLLECT_DIR}"/networking/ethtool.txt + ethtool -S ${ifc} >> "${COLLECT_DIR}"/networking/ethtool.txt 2>&1 + echo -e "\n" >> "${COLLECT_DIR}"/networking/ethtool.txt + done + ok +} + +get_cni_config() { + try "collect CNI configuration information" + + if [[ -e "/etc/cni/net.d/" ]]; then + cp --force --recursive --dereference /etc/cni/net.d/* "${COLLECT_DIR}"/cni/ + fi + + ok +} + +get_cni_configuration_variables() { + # To get cni configuration variables, gather from the main container "amazon-k8s-cni" + # - https://github.com/aws/amazon-vpc-cni-k8s#cni-configuration-variables + try "collect CNI Configuration Variables from Docker" + + # "docker container list" will only show "RUNNING" containers. + # "docker container inspect" will generate plain text output. + if [[ "$(pgrep -o dockerd)" -ne 0 ]]; then + timeout 75 docker container list | awk '/amazon-k8s-cni/{print$NF}' | xargs -n 1 docker container inspect > "${COLLECT_DIR}"/cni/cni-configuration-variables-dockerd.txt 2>&1 || echo -e "\tTimed out, ignoring \"cni configuration variables output \" " + else + warning "The Docker daemon is not running." + fi + + try "collect CNI Configuration Variables from Containerd" + + # "ctr container list" will list down all containers, including stopped ones. + # "ctr container info" will generate JSON format output. + if ! command -v ctr > /dev/null 2>&1; then + warning "ctr not installed" + else + # "ctr --namespace k8s.io container list" will return two containers + # - amazon-k8s-cni:v1.xx.yy + # - amazon-k8s-cni-init:v1.xx.yy + timeout 75 ctr --namespace k8s.io container list | awk '/amazon-k8s-cni:v/{print$1}' | xargs -n 1 ctr --namespace k8s.io container info > "${COLLECT_DIR}"/cni/cni-configuration-variables-containerd.json 2>&1 || echo -e "\tTimed out, ignoring \"cni configuration variables output \" " + fi + + ok +} + +get_pkgtype() { + if [[ "$(command -v rpm)" ]]; then + PACKAGE_TYPE=rpm + elif [[ "$(command -v dpkg)" ]]; then + PACKAGE_TYPE=deb + else + PACKAGE_TYPE='unknown' + fi +} + +get_pkglist() { + try "collect installed packages" + + case "${PACKAGE_TYPE}" in + rpm) + rpm -qa > "${COLLECT_DIR}"/system/pkglist.txt 2>&1 + ;; + deb) + dpkg --list > "${COLLECT_DIR}"/system/pkglist.txt 2>&1 + ;; + *) + warning "Unknown package type." + ;; + esac + + ok +} + +get_system_services() { + try "collect active system services" + + case "${INIT_TYPE}" in + systemd | snap) + systemctl list-units > "${COLLECT_DIR}"/system/services.txt 2>&1 + ;; + other) + initctl list | awk '{ print $1 }' | xargs -n1 initctl show-config > "${COLLECT_DIR}"/system/services.txt 2>&1 + printf "\n\n\n\n" >> "${COLLECT_DIR}"/system/services.txt 2>&1 + service --status-all >> "${COLLECT_DIR}"/system/services.txt 2>&1 + ;; + *) + warning "Unable to determine active services." + ;; + esac + + timeout 75 top -b -n 1 > "${COLLECT_DIR}"/system/top.txt 2>&1 + timeout 75 ps fauxwww --headers > "${COLLECT_DIR}"/system/ps.txt 2>&1 + timeout 75 ps -eTF --headers > "${COLLECT_DIR}"/system/ps-threads.txt 2>&1 + timeout 75 netstat -plant > "${COLLECT_DIR}"/system/netstat.txt 2>&1 + timeout 75 cat /proc/stat > "${COLLECT_DIR}"/system/procstat.txt 2>&1 + timeout 75 cat /proc/[0-9]*/stat > "${COLLECT_DIR}"/system/allprocstat.txt 2>&1 + + ok +} + +get_containerd_info() { + try "Collect Containerd daemon information" + + if [[ "$(pgrep -o containerd)" -ne 0 ]]; then + # force containerd to dump goroutines + timeout 75 killall -sUSR1 containerd + timeout 75 containerd config dump > "${COLLECT_DIR}"/containerd/containerd-config.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 journalctl -u containerd > "${COLLECT_DIR}"/containerd/containerd-log.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 cp -f /tmp/containerd.*.stacks.log "${COLLECT_DIR}"/containerd/ + else + warning "The Containerd daemon is not running." + fi + + ok + + try "Collect Containerd running information" + if ! command -v ctr > /dev/null 2>&1; then + warning "ctr not installed" + else + timeout 75 ctr version > "${COLLECT_DIR}"/containerd/containerd-version.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr namespaces list > "${COLLECT_DIR}"/containerd/containerd-namespaces.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr --namespace k8s.io images list > "${COLLECT_DIR}"/containerd/containerd-images.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr --namespace k8s.io containers list > "${COLLECT_DIR}"/containerd/containerd-containers.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr --namespace k8s.io tasks list > "${COLLECT_DIR}"/containerd/containerd-tasks.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr --namespace k8s.io plugins list > "${COLLECT_DIR}"/containerd/containerd-plugins.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + fi + + ok +} + +get_sandboxImage_info() { + try "Collect sandbox-image daemon information" + timeout 75 journalctl -u sandbox-image > "${COLLECT_DIR}"/sandbox-image/sandbox-image-log.txt 2>&1 || echo -e "\tTimed out, ignoring \"sandbox-image info output \" " + ok +} + +get_docker_info() { + try "Collect Docker daemon information" + + if [[ "$(pgrep -o dockerd)" -ne 0 ]]; then + timeout 75 docker info > "${COLLECT_DIR}"/docker/docker-info.txt 2>&1 || echo -e "\tTimed out, ignoring \"docker info output \" " + timeout 75 docker ps --all --no-trunc > "${COLLECT_DIR}"/docker/docker-ps.txt 2>&1 || echo -e "\tTimed out, ignoring \"docker ps --all --no-truc output \" " + timeout 75 docker images > "${COLLECT_DIR}"/docker/docker-images.txt 2>&1 || echo -e "\tTimed out, ignoring \"docker images output \" " + timeout 75 docker version > "${COLLECT_DIR}"/docker/docker-version.txt 2>&1 || echo -e "\tTimed out, ignoring \"docker version output \" " + timeout 75 curl --unix-socket /var/run/docker.sock http://./debug/pprof/goroutine\?debug\=2 > "${COLLECT_DIR}"/docker/docker-trace.txt 2>&1 || echo -e "\tTimed out, ignoring \"docker version output \" " + else + warning "The Docker daemon is not running." + fi + + ok +} + +get_cpu_throttled_processes() { + try "Collect CPU Throttled Process Information" + readonly THROTTLE_LOG="${COLLECT_DIR}"/system/cpu_throttling.txt + command find /sys/fs/cgroup -iname "cpu.stat" -print0 | while IFS= read -r -d '' cs; do + # look for a non-zero nr_throttled value + if grep -q "nr_throttled [1-9]" "${cs}"; then + pids=${cs/cpu.stat/cgroup.procs} + lines=$(wc -l < "${pids}") + # ignore if no PIDs are listed + if [ "${lines}" -eq "0" ]; then + continue + fi + + echo "$cs" >> "${THROTTLE_LOG}" + cat "${cs}" >> "${THROTTLE_LOG}" + while IFS= read -r pid; do + command ps ax | grep "^${pid}" >> "${THROTTLE_LOG}" + done < "${pids}" + echo "" >> "${THROTTLE_LOG}" + fi + done + if [ ! -e "${THROTTLE_LOG}" ]; then + echo "No CPU Throttling Found" >> "${THROTTLE_LOG}" + fi + ok +} + +get_io_throttled_processes() { + try "Collect IO Throttled Process Information" + readonly IO_THROTTLE_LOG="${COLLECT_DIR}"/system/io_throttling.txt + command echo -e "PID Name Block IO Delay (centisconds)" > ${IO_THROTTLE_LOG} + # column 42 is Aggregated block I/O delays, measured in centiseconds so we capture the non-zero block + # I/O delays. + command cut -d" " -f 1,2,42 /proc/[0-9]*/stat | sort -n -k+3 -r | grep -v 0$ >> ${IO_THROTTLE_LOG} + ok +} + +# ----------------------------------------------------------------------------- +# Entrypoint +parse_options "$@" + +collect +pack +finished diff --git a/log-collector-script/linux/eks-ssm-content.json b/log-collector-script/linux/eks-ssm-content.json new file mode 100644 index 000000000..42caf4503 --- /dev/null +++ b/log-collector-script/linux/eks-ssm-content.json @@ -0,0 +1,43 @@ +{ + "schemaVersion": "2.2", + "description": "EKS Log Collector", + "parameters": { + "bucketName": { + "type": "String", + "default": "Enabled" + } + }, + "mainSteps": [ + { + "action": "aws:runShellScript", + "name": "PatchLinux", + "precondition": { + "StringEquals": ["platformType", "Linux"] + }, + "inputs": { + "runCommand": [ + "curl -s -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-log-collector.sh", + "echo \"* Cleaning old eks-log-collector files\"", + "rm -vf /var/log/eks_i*", + "bash ./eks-log-collector.sh >/dev/null 2>&1", + "echo \"* EKS logs collected\"", + "if [ -f /bin/aws ]; then", + "echo \"* AWS CLI v2 already installed\"", + "else", + "echo \"* Installing AWS CLI v2\"", + "AWSCLI_DIR=$(mktemp -d)", + "curl \"https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip\" -o \"${AWSCLI_DIR}/awscliv2.zip\" >/dev/null 2>&1", + "yum install unzip -y >/dev/null 2>&1", + "unzip -q ${AWSCLI_DIR}/awscliv2.zip -d ${AWSCLI_DIR} >/dev/null 2>&1", + "${AWSCLI_DIR}/aws/install --bin-dir /bin --update", + "/bin/aws --version", + "fi", + "echo \"* Bucket name: s3://{{bucketName}}\"", + "echo \"* Pushing to S3\"", + "/bin/aws s3 cp /var/log/eks_i* s3://{{bucketName}}", + "echo \"* Logs uploaded to S3\"" + ] + } + } + ] +} diff --git a/log-collector-script/windows/README.md b/log-collector-script/windows/README.md new file mode 100644 index 000000000..1bff2287b --- /dev/null +++ b/log-collector-script/windows/README.md @@ -0,0 +1,132 @@ +### EKS Logs Collector (Windows) + +This project was created to collect Amazon EKS log files and OS logs for troubleshooting Amazon EKS customer support cases. + +#### Usage + +* Collect EKS logs using SSM agent, jump to below [section](#collect-eks-logs-using-ssm-agent) _(or)_ + +* Run this project as the Administrator user: + +``` +Invoke-WebRequest -OutFile eks-log-collector.ps1 https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/windows/eks-log-collector.ps1 +.\eks-log-collector.ps1 +``` + +#### Example output + +The project can be used in normal or Enable/Disable Debug(**Caution: Enable/Disable Debug will restart Docker daemon which would kill running containers**). + +``` +USAGE: .\eks-log-collector.ps1 +``` + +#### Example output in normal mode + +The following output shows this project running in normal mode. + +``` +.\eks-log-collector.ps1 +Running Default(Collect) Mode +Cleaning up directory +OK +Creating temporary directory +OK +Collecting System information +OK +Checking free disk space +C: drive has 58% free space +OK +Collecting System Logs +OK +Collecting Application Logs +OK +Collecting Volume info +OK +Collecting Windows Firewall info +Collecting Rules for Domain profile +Collecting Rules for Private profile +Collecting Rules for Public profile +OK +Collecting installed applications list +OK +Collecting Services list +OK +Collecting Docker daemon information +OK +Collecting Kubelet logs +OK +Collecting Kube-proxy logs +OK +Collecting kubelet information +OK +Collecting Docker daemon logs +OK +Collecting EKS logs +OK +Collecting network Information +OK +Archiving gathered data +Done... your bundled logs are located in C:\log-collector\eks_i-0b318f704c74b6ab2_20200101T0620179658Z.zip +``` + +### Collect EKS logs using SSM agent + +#### To run EKS log collector script on Worker Node(s) and upload the bundle(tar) to a S3 Bucket using SSM agent, please follow below steps + +##### Prerequisites + +* Configure AWS CLI on the system where you will run the below commands. The IAM entity (User/Role) should have permissions to run/invoke `aws ssm create-document`, `aws ssm send-command` and `aws ssm get-command-invocation` commands. + + * `ssm:CreateDocument` + * `ssm:GetCommandInvocation` + * `ssm:SendCommand` + +* SSM agent should be installed and running on Worker Node(s). [How to Install SSM Agent link](https://docs.aws.amazon.com/systems-manager/latest/userguide/sysman-manual-agent-install.html) + +* Worker Node(s) should have required permissions to communicate with SSM service. IAM managed role `AmazonSSMManagedInstanceCore` will have all the required permission for SSM agent to run on EC2 instances. The IAM managed role `AmazonSSMManagedInstanceCore` has `S3:PutObject` permission to all S3 resources. + +*Note:* For more granular control of the IAM permission check [Actions defined by AWS Systems Manager](https://docs.aws.amazon.com/IAM/latest/UserGuide/list_awssystemsmanager.html%23awssystemsmanager-actions-as-permissions) + +* A S3 bucket location is required which is taken as an input parameter to `aws ssm send-command` command, to which the logs should be pushed. + +#### To invoke SSM agent to run EKS log collector script and push bundle to S3 from Worker Node(s) + +1. Create the SSM document named "EKSLogCollector" using the following commands: + +``` +aws ssm create-document \ + --name "EKSLogCollectorWindows" \ + --document-type "Command" \ + --content https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/windows/eks-ssm-content.json +``` + +2. To execute the bash script in the SSM document and to collect the logs from worker, run the following command: + +``` +aws ssm send-command \ + --instance-ids \ + --document-name "EKSLogCollectorWindows" \ + --parameters "bucketName=" \ + --output json +``` + +3. To check the status of SSM command submitted in previous step use the command + +``` +aws ssm get-command-invocation \ + --command-id "" \ + --instance-id "" \ + --output text +``` + +4. Once the above command is executed successfully, the logs should be present in the S3 bucket specified in the previous step. + +### Collect User Data + +If collecting use rdata is required as apart of troubleshooting please use the commands below to retrieve data via IMDSv2: + +``` +[string]$token = Invoke-RestMethod -Headers @{"X-aws-ec2-metadata-token-ttl-seconds" = "21600"} -Method PUT -Uri http://169.254.169.254/latest/api/token +Invoke-RestMethod -Headers @{"X-aws-ec2-metadata-token" = $token} -Method GET -Uri http://169.254.169.254/latest/user-data +``` diff --git a/log-collector-script/windows/eks-log-collector.ps1 b/log-collector-script/windows/eks-log-collector.ps1 new file mode 100644 index 000000000..4bb1e454e --- /dev/null +++ b/log-collector-script/windows/eks-log-collector.ps1 @@ -0,0 +1,406 @@ +<# + Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. + Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at + + http://aws.amazon.com/apache2.0/ + + or in the "license" file accompanying this file. + This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + +.SYNOPSIS + Collects EKS Logs +.DESCRIPTION + Run the script to gather basic operating system, Docker daemon, and kubelet logs. + +.NOTES + You need to run this script with Elevated permissions to allow for the collection of the installed applications list +.EXAMPLE + eks-log-collector.ps1 + Gather basic operating system, Docker daemon, and kubelet logs. + +#> + +param( + [Parameter(Mandatory=$False)][string]$RunMode = "Collect" + ) + +# Common options +$basedir="C:\log-collector" +$token = Invoke-RestMethod -Headers @{"X-aws-ec2-metadata-token-ttl-seconds" = "5"} -Method PUT -Uri http://169.254.169.254/latest/api/token +$instanceId = Invoke-RestMethod -Headers @{"X-aws-ec2-metadata-token" = $token} -Method GET -Uri http://169.254.169.254/latest/meta-data/instance-id +$curtime = Get-Date -Format FileDateTimeUniversal +$outfilename = "eks_" + $instanceid + "_" + $curtime + ".zip" +$infodir="$basedir\collect" +$info_system="$infodir\system" + + +# Common functions +# --------------------------------------------------------------------------------------- + +Function is_elevated{ + If (-NOT ([Security.Principal.WindowsPrincipal] [Security.Principal.WindowsIdentity]::GetCurrent()).IsInRole(` + [Security.Principal.WindowsBuiltInRole] "Administrator")) { + Write-warning "This script requires elevated privileges to copy registry keys to the EKS logs collector folder." + Write-Host "Please re-launch as Administrator." -foreground "red" -background "black" + break + } +} + + +Function create_working_dir{ + try { + Write-Host "Creating temporary directory" + New-Item -type directory -path $info_system -Force >$null + New-Item -type directory -path $info_system\eks -Force >$null + New-Item -type directory -path $info_system\docker -Force >$null + New-Item -type directory -path $info_system\containerd -Force >$null + New-Item -type directory -path $info_system\firewall -Force >$null + New-Item -type directory -path $info_system\kubelet -Force >$null + New-Item -type directory -path $info_system\kube-proxy -Force >$null + New-Item -type directory -path $info_system\cni -Force >$null + New-Item -type directory -path $info_system\docker_log -Force >$null + New-Item -type directory -path $info_system\containerd_log -Force >$null + New-Item -type directory -path $info_system\network -Force >$null + New-Item -type directory -path $info_system\network\hns -Force >$null + Write-Host "OK" -ForegroundColor "green" + } + catch { + Write-Host "Unable to create temporary directory" + Write-Host "Please ensure you have enough permissions to create directories" + Write-Error "Failed to create temporary directory" + Break + } +} + +Function check_service_installed_and_running { + <# + .SYNOPSIS + This method checks if the specified service is installed and in running state. + #> + [CmdletBinding()] + Param ( + [Parameter(Mandatory=$true)] + [ValidateNotNullOrEmpty()] + [string]$ServiceName + ) + + Write-Host ("Checking status of service: {0}" -f $ServiceName) + try { + if (-not (Get-Service -Name $ServiceName -ErrorAction SilentlyContinue)) { + Write-Host ("Service {0} not found" -f $ServiceName) + return 0 + } + + if ((Get-Service -Name $ServiceName).Status -eq "Running") { + Write-Host ("Service {0} is running." -f $ServiceName) + return 1 + } + Write-Host ("Service {0} is not running." -f $ServiceName) + return 0 + } + catch { + Write-Error "Unable to check if service is installed and running" + break + } +} + +Function get_sysinfo{ + try { + Write-Host "Collecting System information" + systeminfo.exe > $info_system\sysinfo + Write-Host "OK" -ForegroundColor "green" + } + catch { + Write-Error "Unable to collect system information" + Break + } + +} + +Function is_diskfull{ + $threshold = 30 + try { + Write-Host "Checking free disk space" + $drive = Get-WmiObject Win32_LogicalDisk -Filter "DeviceID='C:'" + $percent = ([math]::round($drive.FreeSpace/1GB, 0) / ([math]::round($drive.Size/1GB, 0)) * 100) + Write-Host "C: drive has $percent% free space" + Write-Host "OK" -ForegroundColor "green" + } + catch { + Write-Error "Unable to Determine Free Disk Space" + Break + } + if ($percent -lt $threshold){ + Write-Error "C: drive only has $percent% free space, please ensure there is at least $threshold% free disk space to collect and store the log files" + Break + } +} + +Function get_system_logs{ + try { + Write-Host "Collecting System Logs" + Get-WinEvent -LogName System | Select-Object timecreated,leveldisplayname,machinename,message | export-csv -Path $info_system\system-eventlogs.csv + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to Collect System Logs" + break + } +} + +Function get_application_logs{ + try { + Write-Host "Collecting Application Logs" + Get-WinEvent -LogName Application | Select-Object timecreated,leveldisplayname,machinename,message | export-csv -Path $info_system\application-eventlogs.csv + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to Collect Application Logs" + break + } +} + +Function get_volumes_info{ + try { + Write-Host "Collecting Volume info" + Get-psdrive -PSProvider 'FileSystem' | Out-file $info_system\volumes + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to Collect Volume information" + break + } +} + +Function get_firewall_info{ + try { + Write-Host "Collecting Windows Firewall info" + $fw = Get-NetFirewallProfile + foreach ($f in $fw){ + if ($f.Enabled -eq "True"){ + $file = $f.name + Write-Host "Collecting Rules for" $f.name "profile" + Get-NetFirewallProfile -Name $f.name | Get-NetFirewallRule | Out-file $info_system\firewall\firewall-$file + } + } + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to Collect Windows Firewall information" + break + } +} + +Function get_softwarelist{ + try { + Write-Host "Collecting installed applications list" + gp HKLM:\Software\Microsoft\Windows\CurrentVersion\Uninstall\* |Select DisplayName, DisplayVersion, Publisher, InstallDate, HelpLink, UninstallString | out-file $info_system\installed-64bit-apps.txt + gp HKLM:\Software\Wow6432Node\Microsoft\Windows\CurrentVersion\Uninstall\* |Select DisplayName, DisplayVersion, Publisher, InstallDate, HelpLink, UninstallString | out-file $info_system\installed-32bit-apps.txt + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to collect installed applications list" + break + } +} + +Function get_system_services{ + try { + Write-Host "Collecting Services list" + get-service | fl | out-file $info_system\services + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to collect Services list" + break + } +} + +Function get_containerd_info{ + Write-Host "Collecting Containerd information" + if (check_service_installed_and_running "containerd") { + try { + ctr version > $info_system\containerd\containerd-version.txt 2>&1 + ctr namespaces list > $info_system\containerd\containerd-namespaces.txt 2>&1 + ctr --namespace k8s.io images list > $info_system\containerd\containerd-images.txt 2>&1 + ctr --namespace k8s.io containers list > $info_system\containerd\containerd-containers.txt 2>&1 + ctr --namespace k8s.io tasks list > $info_system\containerd\containerd-tasks.txt 2>&1 + ctr --namespace k8s.io plugins list > $info_system\containerd\containerd-plugins.txt 2>&1 + Write-Host "OK" -foregroundcolor "green" + } + catch{ + Write-Error "Unable to collect Containerd information" + Break + } + } +} + +Function get_docker_info{ + Write-Host "Collecting Docker daemon information" + if (check_service_installed_and_running "docker") { + try { + docker info > $info_system\docker\docker-info.txt 2>&1 + docker ps --all --no-trunc > $info_system\docker\docker-ps.txt 2>&1 + docker images > $info_system\docker\docker-images.txt 2>&1 + docker version > $info_system\docker\docker-version.txt 2>&1 + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to collect Docker daemon information" + Break + } + } +} + +Function get_eks_logs{ + try { + Write-Host "Collecting EKS logs" + copy C:\ProgramData\Amazon\EKS\logs\* $info_system\eks\ + Write-Host "OK" -foregroundcolor "green" + } + catch{ + Write-Error "Unable to collect ECS Agent logs" + Break + } +} + +Function get_k8s_info{ + try { + Write-Host "Collecting Kubelet logs" + Get-EventLog -LogName EKS -Source kubelet | Sort-Object Time | Export-CSV $info_system/kubelet/kubelet-service.csv + Write-Host "OK" -foregroundcolor "green" + } + catch{ + Write-Error "Unable to collect Kubelet logs" + Break + } + + try { + Write-Host "Collecting Kube-proxy logs" + Get-EventLog -LogName EKS -Source kube-proxy | Sort-Object Time | Export-CSV $info_system/kube-proxy/kube-proxy-service.csv + Write-Host "OK" -foregroundcolor "green" + } + catch{ + Write-Error "Unable to collect Kube-proxy logs" + Break + } + + try { + Write-Host "Collecting kubelet information" + copy C:\ProgramData\kubernetes\kubeconfig $info_system\kubelet\ + copy C:\ProgramData\kubernetes\kubelet-config.json $info_system\kubelet\ + copy C:\ProgramData\Amazon\EKS\cni\config\* $info_system\cni\ + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to collect kubelet information" + Break + } +} + +Function get_docker_logs{ + Write-Host "Collecting Docker daemon logs" + if (check_service_installed_and_running "docker") { + try { + Get-EventLog -LogName Application -Source Docker | Sort-Object Time | Export-CSV $info_system/docker_log/docker-daemon.csv + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to collect Docker daemon logs" + Break + } + } +} + +Function get_containerd_logs{ + Write-Host "Collecting containerd logs" + if (check_service_installed_and_running "containerd") { + try { + copy C:\ProgramData\containerd\root\panic.log $info_system\containerd_log\ + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to collect containerd logs" + Break + } + } +} + +Function get_network_info{ + try { + Write-Host "Collecting network Information" + Get-HnsNetwork | Select Name, Type, Id, AddressPrefix > $info_system\network\hns\network.txt + Get-hnsnetwork | Convertto-json -Depth 20 >> $info_system\network\hns\network.txt + Get-hnsnetwork | % { Get-HnsNetwork -Id $_.ID -Detailed } | Convertto-json -Depth 20 >> $info_system\network\hns\networkdetailed.txt + + Get-HnsEndpoint | Select IpAddress, MacAddress, IsRemoteEndpoint, State > $info_system\network\hns\endpoint.txt + Get-hnsendpoint | Convertto-json -Depth 20 >> $info_system\network\hns\endpoint.txt + + Get-hnspolicylist | Convertto-json -Depth 20 > $info_system\network\hns\policy.txt + + vfpctrl.exe /list-vmswitch-port > $info_system\network\ports.txt + ipconfig /allcompartments /all > $info_system\network\ip.txt + route print > $info_system\network\routes.txt + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to collect network information" + Break + } +} + +Function cleanup{ + Write-Host "Cleaning up directory" + Remove-Item -Recurse -Force $basedir -ErrorAction Ignore + Write-Host "OK" -foregroundcolor green +} + +Function pack{ + try { + Write-Host "Archiving gathered data" + Compress-Archive -Path $infodir\* -CompressionLevel Optimal -DestinationPath $basedir\$outfilename + Remove-Item -Recurse -Force $infodir -ErrorAction Ignore + Write-Host "Done... your bundled logs are located in " $basedir\$outfilename + } + catch { + Write-Error "Unable to archive data" + Break + } +} + +Function init{ + is_elevated + create_working_dir + get_sysinfo +} + +Function collect{ + init + is_diskfull + get_system_logs + get_application_logs + get_volumes_info + get_firewall_info + get_softwarelist + get_system_services + get_docker_info + get_containerd_info + get_k8s_info + get_docker_logs + get_containerd_logs + get_eks_logs + get_network_info + +} + +#-------------------------- +#Main-function +Function main { + Write-Host "Running Default(Collect) Mode" -foregroundcolor "blue" + cleanup + collect + pack +} + +#Entry point +main diff --git a/log-collector-script/windows/eks-ssm-content.json b/log-collector-script/windows/eks-ssm-content.json new file mode 100644 index 000000000..a3d6360a0 --- /dev/null +++ b/log-collector-script/windows/eks-ssm-content.json @@ -0,0 +1,47 @@ +{ + "schemaVersion": "2.2", + "description": "EKS Log Collector", + "parameters": { + "bucketName": { + "type": "String", + "default": "Enabled" + } + }, + "mainSteps": [ + { + "precondition": { + "StringEquals": ["platformType", "Windows"] + }, + "action": "aws:runPowerShellScript", + "name": "PatchWindows", + "inputs": { + "runCommand": [ + "if (!(Get-Module 'AWSPowerShell')) { ", + " Write-Host 'AWSPowerShell does not exist' ", + " Install-Module -Name AWSPowerShell -Force ", + "} ", + "try { ", + " Write-Host 'Downloading EKS Log collector script' ", + " Invoke-WebRequest -UseBasicParsing 'https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/windows/eks-log-collector.ps1' -OutFile eks-log-collector.ps1 ", + "} ", + "catch { ", + " Write-Host 'Uploaded to S3 failed' ", + " break ", + "} ", + "powershell .\\eks-log-collector.ps1", + "try { ", + " Write-Host 'Pushing to S3' ", + " Write-S3Object -BucketName {{bucketName}} -Folder C:\\log-collector -KeyPrefix eks-log-collector\\ -SearchPattern *.zip ", + " Write-Host 'Logs uploaded to S3' ", + "} ", + "catch { ", + " Write-Host 'Uploaded to S3 failed' ", + " break ", + "} ", + "", + "Remove-Item -Force .\\eks-log-collector.ps1 -ErrorAction Ignore " + ] + } + } + ] +} diff --git a/mkdocs.yaml b/mkdocs.yaml new file mode 100644 index 000000000..56ec4c37e --- /dev/null +++ b/mkdocs.yaml @@ -0,0 +1,19 @@ +site_name: Amazon EKS AMI +docs_dir: doc/ +site_description: Build template and runtime resources for the Amazon EKS AMI +repo_name: awslabs/amazon-eks-ami +repo_url: https://github.com/awslabs/amazon-eks-ami +nav: + - 'Overview': README.md + - 'User Guide': USER_GUIDE.md + - 'Changelog': CHANGELOG.md + - 'Community': + - 'Contribution guidelines': CONTRIBUTING.md + - 'Code of Conduct': CODE_OF_CONDUCT.md + +theme: + name: material + palette: + primary: black + features: + - navigation.sections \ No newline at end of file diff --git a/scripts/cleanup.sh b/scripts/cleanup.sh new file mode 100644 index 000000000..61c399fee --- /dev/null +++ b/scripts/cleanup.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +# Clean up yum caches to reduce the image size +sudo yum clean all +sudo rm -rf /var/cache/yum + +# Clean up files to reduce confusion during debug +sudo rm -rf \ + /etc/hostname \ + /etc/machine-id \ + /etc/resolv.conf \ + /etc/ssh/ssh_host* \ + /home/ec2-user/.ssh/authorized_keys \ + /root/.ssh/authorized_keys \ + /var/lib/cloud/data \ + /var/lib/cloud/instance \ + /var/lib/cloud/instances \ + /var/lib/cloud/sem \ + /var/lib/dhclient/* \ + /var/lib/dhcp/dhclient.* \ + /var/lib/yum/history \ + /var/log/cloud-init-output.log \ + /var/log/cloud-init.log \ + /var/log/secure \ + /var/log/wtmp \ + /var/log/messages \ + /var/log/audit/* + +sudo touch /etc/machine-id diff --git a/scripts/cleanup_additional_repos.sh b/scripts/cleanup_additional_repos.sh new file mode 100644 index 000000000..c9cb20f07 --- /dev/null +++ b/scripts/cleanup_additional_repos.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# +# Clean up additional YUM repositories, typically used for security patches. +# The format of ADDITIONAL_YUM_REPOS is: "repo=patches-repo,name=Install patches,baseurl=http://amazonlinux.$awsregion.$awsdomain/xxxx,priority=10" +# Multiple yum repos can be specified, separated by ';' + +if [ -z "${ADDITIONAL_YUM_REPOS}" ]; then + echo "no additional yum repo, skipping" + exit 0 +fi + +AWK_CMD=' +BEGIN {RS=";";FS=","} +{ + delete vars; + for(i = 1; i <= NF; ++i) { + n = index($i, "="); + if(n) { + vars[substr($i, 1, n-1)] = substr($i, n + 1) + } + } + Repo = "/etc/yum.repos.d/"vars["repo"]".repo" +} +{cmd="rm -f " Repo; system(cmd)} +' +sudo awk "$AWK_CMD" <<< "${ADDITIONAL_YUM_REPOS}" diff --git a/scripts/enable-fips.sh b/scripts/enable-fips.sh new file mode 100755 index 000000000..399ab6b26 --- /dev/null +++ b/scripts/enable-fips.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# https://aws.amazon.com/blogs/publicsector/enabling-fips-mode-amazon-linux-2/ +if [[ "$ENABLE_FIPS" == "true" ]]; then + # install and enable fips modules + sudo yum install -y dracut-fips openssl + sudo dracut -f + + # enable fips in the boot command + sudo /sbin/grubby --update-kernel=ALL --args="fips=1" +fi diff --git a/scripts/generate-version-info.sh b/scripts/generate-version-info.sh new file mode 100644 index 000000000..94ded309c --- /dev/null +++ b/scripts/generate-version-info.sh @@ -0,0 +1,39 @@ +#!/bin/sh + +# generates a JSON file containing version information for the software in this AMI + +set -o errexit +set -o pipefail + +if [ "$#" -ne 1 ]; then + echo "usage: $0 OUTPUT_FILE" + exit 1 +fi + +OUTPUT_FILE="$1" + +# packages +sudo rpm --query --all --queryformat '\{"%{NAME}": "%{VERSION}-%{RELEASE}"\}\n' | jq --slurp --sort-keys 'add | {packages:(.)}' > "$OUTPUT_FILE" + +# binaries +KUBELET_VERSION=$(kubelet --version | awk '{print $2}') +if [ "$?" != 0 ]; then + echo "unable to get kubelet version" + exit 1 +fi +echo $(jq ".binaries.kubelet = \"$KUBELET_VERSION\"" $OUTPUT_FILE) > $OUTPUT_FILE + +CLI_VERSION=$(aws --version | awk '{print $1}' | cut -d '/' -f 2) +if [ "$?" != 0 ]; then + echo "unable to get aws cli version" + exit 1 +fi +echo $(jq ".binaries.awscli = \"$CLI_VERSION\"" $OUTPUT_FILE) > $OUTPUT_FILE + +# cached images +if systemctl is-active --quiet containerd; then + echo $(jq ".images = [ $(sudo ctr -n k8s.io image ls -q | cut -d'/' -f2- | sort | uniq | grep -v 'sha256' | xargs -r printf "\"%s\"," | sed 's/,$//') ]" $OUTPUT_FILE) > $OUTPUT_FILE +elif [ "${CACHE_CONTAINER_IMAGES}" = "true" ]; then + echo "containerd must be active to generate version info for cached images" + exit 1 +fi diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh new file mode 100644 index 000000000..fd58fa292 --- /dev/null +++ b/scripts/install-worker.sh @@ -0,0 +1,573 @@ +#!/usr/bin/env bash + +set -o pipefail +set -o nounset +set -o errexit +IFS=$'\n\t' +export AWS_DEFAULT_OUTPUT="json" + +################################################################################ +### Validate Required Arguments ################################################ +################################################################################ +validate_env_set() { + ( + set +o nounset + + if [ -z "${!1}" ]; then + echo "Packer variable '$1' was not set. Aborting" + exit 1 + fi + ) +} + +validate_env_set BINARY_BUCKET_NAME +validate_env_set BINARY_BUCKET_REGION +validate_env_set DOCKER_VERSION +validate_env_set CONTAINERD_VERSION +validate_env_set RUNC_VERSION +validate_env_set CNI_PLUGIN_VERSION +validate_env_set KUBERNETES_VERSION +validate_env_set KUBERNETES_BUILD_DATE +validate_env_set PULL_CNI_FROM_GITHUB +validate_env_set PAUSE_CONTAINER_VERSION +validate_env_set CACHE_CONTAINER_IMAGES +validate_env_set WORKING_DIR + +################################################################################ +### Machine Architecture ####################################################### +################################################################################ + +MACHINE=$(uname -m) +if [ "$MACHINE" == "x86_64" ]; then + ARCH="amd64" +elif [ "$MACHINE" == "aarch64" ]; then + ARCH="arm64" +else + echo "Unknown machine architecture '$MACHINE'" >&2 + exit 1 +fi + +################################################################################ +### Packages ################################################################### +################################################################################ + +sudo yum install -y \ + yum-utils \ + yum-plugin-versionlock + +# lock the version of the kernel and associated packages before we yum update +sudo yum versionlock kernel-$(uname -r) kernel-headers-$(uname -r) kernel-devel-$(uname -r) + +# Update the OS to begin with to catch up to the latest packages. +sudo yum update -y + +# Install necessary packages +sudo yum install -y \ + aws-cfn-bootstrap \ + chrony \ + conntrack \ + ec2-instance-connect \ + ethtool \ + ipvsadm \ + jq \ + nfs-utils \ + socat \ + unzip \ + wget \ + mdadm \ + pigz + +# skip kernel version cleanup on al2023 +if ! cat /etc/*release | grep "al2023" > /dev/null 2>&1; then + # Remove any old kernel versions. `--count=1` here means "only leave 1 kernel version installed" + sudo package-cleanup --oldkernels --count=1 -y +fi + +# packages that need special handling +if cat /etc/*release | grep "al2023" > /dev/null 2>&1; then + # exists in al2023 only (needed by kubelet) + sudo yum install -y iptables-nft + + # Mask udev triggers installed by amazon-ec2-net-utils package + sudo touch /etc/udev/rules.d/99-vpc-policy-routes.rules + + # Make networkd ignore foreign settings, else it may unexpectedly delete IP rules and routes added by CNI + sudo mkdir -p /usr/lib/systemd/networkd.conf.d/ + cat << EOF | sudo tee /usr/lib/systemd/networkd.conf.d/80-release.conf +# Do not clobber any routes or rules added by CNI. +[Network] +ManageForeignRoutes=no +ManageForeignRoutingPolicyRules=no +EOF + + # Temporary fix for https://github.com/aws/amazon-vpc-cni-k8s/pull/2118 + sudo sed -i "s/^MACAddressPolicy=.*/MACAddressPolicy=none/" /usr/lib/systemd/network/99-default.link || true +else + # curl-minimal already exists in al2023 so install curl only on al2 + sudo yum install -y curl + + # Remove the ec2-net-utils package, if it's installed. This package interferes with the route setup on the instance. + if yum list installed | grep ec2-net-utils; then sudo yum remove ec2-net-utils -y -q; fi +fi + +sudo mkdir -p /etc/eks/ + +################################################################################ +### Time ####################################################################### +################################################################################ + +sudo mv $WORKING_DIR/configure-clocksource.service /etc/eks/configure-clocksource.service + +################################################################################ +### SSH ######################################################################## +################################################################################ + +# Disable weak ciphers +echo -e "\nCiphers aes128-ctr,aes256-ctr,aes128-gcm@openssh.com,aes256-gcm@openssh.com" | sudo tee -a /etc/ssh/sshd_config +sudo systemctl restart sshd.service + +################################################################################ +### iptables ################################################################### +################################################################################ + +sudo mv $WORKING_DIR/iptables-restore.service /etc/eks/iptables-restore.service + +################################################################################ +### awscli ##################################################### +################################################################################ + +### isolated regions can't communicate to awscli.amazonaws.com so installing awscli through yum +ISOLATED_REGIONS="${ISOLATED_REGIONS:-us-iso-east-1 us-iso-west-1 us-isob-east-1}" +if ! [[ ${ISOLATED_REGIONS} =~ $BINARY_BUCKET_REGION ]]; then + # https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html + echo "Installing awscli v2 bundle" + AWSCLI_DIR="${WORKING_DIR}/awscli-install" + mkdir "${AWSCLI_DIR}" + curl \ + --silent \ + --show-error \ + --retry 10 \ + --retry-delay 1 \ + -L "https://awscli.amazonaws.com/awscli-exe-linux-${MACHINE}.zip" -o "${AWSCLI_DIR}/awscliv2.zip" + unzip -q "${AWSCLI_DIR}/awscliv2.zip" -d ${AWSCLI_DIR} + sudo "${AWSCLI_DIR}/aws/install" --bin-dir /bin/ --update +else + echo "Installing awscli package" + sudo yum install -y awscli +fi + +################################################################################ +### systemd #################################################################### +################################################################################ + +sudo mv "${WORKING_DIR}/runtime.slice" /etc/systemd/system/runtime.slice + +############################################################################### +### Containerd setup ########################################################## +############################################################################### + +# install runc and lock version +sudo yum install -y runc-${RUNC_VERSION} +sudo yum versionlock runc-* + +# install containerd and lock version +sudo yum install -y containerd-${CONTAINERD_VERSION} +sudo yum versionlock containerd-* + +# install cri-tools for crictl, needed to interact with containerd's CRI server +sudo yum install -y cri-tools + +sudo mkdir -p /etc/eks/containerd +if [ -f "/etc/eks/containerd/containerd-config.toml" ]; then + ## this means we are building a gpu ami and have already placed a containerd configuration file in /etc/eks + echo "containerd config is already present" +else + sudo mv $WORKING_DIR/containerd-config.toml /etc/eks/containerd/containerd-config.toml +fi + +sudo mv $WORKING_DIR/kubelet-containerd.service /etc/eks/containerd/kubelet-containerd.service +sudo mv $WORKING_DIR/sandbox-image.service /etc/eks/containerd/sandbox-image.service +sudo mv $WORKING_DIR/pull-sandbox-image.sh /etc/eks/containerd/pull-sandbox-image.sh +sudo mv $WORKING_DIR/pull-image.sh /etc/eks/containerd/pull-image.sh +sudo chmod +x /etc/eks/containerd/pull-sandbox-image.sh +sudo chmod +x /etc/eks/containerd/pull-image.sh +sudo mkdir -p /etc/systemd/system/containerd.service.d +CONFIGURE_CONTAINERD_SLICE=$(vercmp "$KUBERNETES_VERSION" gteq "1.24.0" || true) +if [ "$CONFIGURE_CONTAINERD_SLICE" == "true" ]; then + cat << EOF | sudo tee /etc/systemd/system/containerd.service.d/00-runtime-slice.conf +[Service] +Slice=runtime.slice +EOF +fi + +cat << EOF | sudo tee /etc/systemd/system/containerd.service.d/10-compat-symlink.conf +[Service] +ExecStartPre=/bin/ln -sf /run/containerd/containerd.sock /run/dockershim.sock +EOF + +cat << EOF | sudo tee -a /etc/modules-load.d/containerd.conf +overlay +br_netfilter +EOF + +cat << EOF | sudo tee -a /etc/sysctl.d/99-kubernetes-cri.conf +net.bridge.bridge-nf-call-ip6tables = 1 +net.bridge.bridge-nf-call-iptables = 1 +net.ipv4.ip_forward = 1 +EOF + +############################################################################### +### Nerdctl setup ############################################################# +############################################################################### + +sudo yum install -y nerdctl +sudo mkdir /etc/nerdctl +cat << EOF | sudo tee -a /etc/nerdctl/nerdctl.toml +namespace = "k8s.io" +EOF + +################################################################################ +### Docker ##################################################################### +################################################################################ + +sudo yum install -y device-mapper-persistent-data lvm2 + +if [[ ! -v "INSTALL_DOCKER" ]]; then + INSTALL_DOCKER=$(vercmp "$KUBERNETES_VERSION" lt "1.25.0" || true) +else + echo "WARNING: using override INSTALL_DOCKER=${INSTALL_DOCKER}. This option is deprecated and will be removed in a future release." +fi + +if [[ "$INSTALL_DOCKER" == "true" ]]; then + sudo amazon-linux-extras enable docker + sudo groupadd -og 1950 docker + sudo useradd --gid $(getent group docker | cut -d: -f3) docker + + # install docker and lock version + sudo yum install -y docker-${DOCKER_VERSION}* + sudo yum versionlock docker-* + sudo usermod -aG docker $USER + + # Remove all options from sysconfig docker. + sudo sed -i '/OPTIONS/d' /etc/sysconfig/docker + + sudo mkdir -p /etc/docker + sudo mv $WORKING_DIR/docker-daemon.json /etc/docker/daemon.json + sudo chown root:root /etc/docker/daemon.json + + # Enable docker daemon to start on boot. + sudo systemctl daemon-reload +fi + +################################################################################ +### Logrotate ################################################################## +################################################################################ + +# kubelet uses journald which has built-in rotation and capped size. +# See man 5 journald.conf +sudo mv $WORKING_DIR/logrotate-kube-proxy /etc/logrotate.d/kube-proxy +sudo mv $WORKING_DIR/logrotate.conf /etc/logrotate.conf +sudo chown root:root /etc/logrotate.d/kube-proxy +sudo chown root:root /etc/logrotate.conf +sudo mkdir -p /var/log/journal + +################################################################################ +### Kubernetes ################################################################# +################################################################################ + +sudo mkdir -p /etc/kubernetes/manifests +sudo mkdir -p /var/lib/kubernetes +sudo mkdir -p /var/lib/kubelet +sudo mkdir -p /opt/cni/bin + +echo "Downloading binaries from: s3://$BINARY_BUCKET_NAME" +S3_DOMAIN="amazonaws.com" +if [ "$BINARY_BUCKET_REGION" = "cn-north-1" ] || [ "$BINARY_BUCKET_REGION" = "cn-northwest-1" ]; then + S3_DOMAIN="amazonaws.com.cn" +elif [ "$BINARY_BUCKET_REGION" = "us-iso-east-1" ] || [ "$BINARY_BUCKET_REGION" = "us-iso-west-1" ]; then + S3_DOMAIN="c2s.ic.gov" +elif [ "$BINARY_BUCKET_REGION" = "us-isob-east-1" ]; then + S3_DOMAIN="sc2s.sgov.gov" +fi +S3_URL_BASE="https://$BINARY_BUCKET_NAME.s3.$BINARY_BUCKET_REGION.$S3_DOMAIN/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/linux/$ARCH" +S3_PATH="s3://$BINARY_BUCKET_NAME/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/linux/$ARCH" + +BINARIES=( + kubelet + aws-iam-authenticator +) +for binary in ${BINARIES[*]}; do + if [[ -n "$AWS_ACCESS_KEY_ID" ]]; then + echo "AWS cli present - using it to copy binaries from s3." + aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/$binary . + aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/$binary.sha256 . + else + echo "AWS cli missing - using wget to fetch binaries from s3. Note: This won't work for private bucket." + sudo wget $S3_URL_BASE/$binary + sudo wget $S3_URL_BASE/$binary.sha256 + fi + sudo sha256sum -c $binary.sha256 + sudo chmod +x $binary + sudo mv $binary /usr/bin/ +done + +# Verify that the aws-iam-authenticator is at last v0.5.9 or greater. Otherwise, nodes will be +# unable to join clusters due to upgrading to client.authentication.k8s.io/v1beta1 +iam_auth_version=$(sudo /usr/bin/aws-iam-authenticator version | jq -r .Version) +if vercmp "$iam_auth_version" lt "v0.5.9"; then + # To resolve this issue, you need to update the aws-iam-authenticator binary. Using binaries distributed by EKS + # with kubernetes_build_date 2022-10-31 or later include v0.5.10 or greater. + echo "❌ The aws-iam-authenticator should be on version v0.5.9 or later. Found $iam_auth_version" + exit 1 +fi + +# Since CNI 0.7.0, all releases are done in the plugins repo. +CNI_PLUGIN_FILENAME="cni-plugins-linux-${ARCH}-${CNI_PLUGIN_VERSION}" + +if [ "$PULL_CNI_FROM_GITHUB" = "true" ]; then + echo "Downloading CNI plugins from Github" + wget "https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGIN_VERSION}/${CNI_PLUGIN_FILENAME}.tgz" + wget "https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGIN_VERSION}/${CNI_PLUGIN_FILENAME}.tgz.sha512" + sudo sha512sum -c "${CNI_PLUGIN_FILENAME}.tgz.sha512" + rm "${CNI_PLUGIN_FILENAME}.tgz.sha512" +else + if [[ -n "$AWS_ACCESS_KEY_ID" ]]; then + echo "AWS cli present - using it to copy binaries from s3." + aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/${CNI_PLUGIN_FILENAME}.tgz . + aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/${CNI_PLUGIN_FILENAME}.tgz.sha256 . + else + echo "AWS cli missing - using wget to fetch cni binaries from s3. Note: This won't work for private bucket." + sudo wget "$S3_URL_BASE/${CNI_PLUGIN_FILENAME}.tgz" + sudo wget "$S3_URL_BASE/${CNI_PLUGIN_FILENAME}.tgz.sha256" + fi + sudo sha256sum -c "${CNI_PLUGIN_FILENAME}.tgz.sha256" +fi +sudo tar -xvf "${CNI_PLUGIN_FILENAME}.tgz" -C /opt/cni/bin +rm "${CNI_PLUGIN_FILENAME}.tgz" + +sudo rm ./*.sha256 + +sudo mkdir -p /etc/kubernetes/kubelet +sudo mkdir -p /etc/systemd/system/kubelet.service.d +sudo mv $WORKING_DIR/kubelet-kubeconfig /var/lib/kubelet/kubeconfig +sudo chown root:root /var/lib/kubelet/kubeconfig + +# Inject CSIServiceAccountToken feature gate to kubelet config if kubernetes version starts with 1.20. +# This is only injected for 1.20 since CSIServiceAccountToken will be moved to beta starting 1.21. +if [[ $KUBERNETES_VERSION == "1.20"* ]]; then + KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED=$(cat $WORKING_DIR/kubelet-config.json | jq '.featureGates += {CSIServiceAccountToken: true}') + echo $KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED > $WORKING_DIR/kubelet-config.json +fi + +# Enable Feature Gate for KubeletCredentialProviders in versions less than 1.28 since this feature flag was removed in 1.28. +# TODO: Remove this during 1.27 EOL +if vercmp $KUBERNETES_VERSION lt "1.28"; then + KUBELET_CONFIG_WITH_KUBELET_CREDENTIAL_PROVIDER_FEATURE_GATE_ENABLED=$(cat $WORKING_DIR/kubelet-config.json | jq '.featureGates += {KubeletCredentialProviders: true}') + echo $KUBELET_CONFIG_WITH_KUBELET_CREDENTIAL_PROVIDER_FEATURE_GATE_ENABLED > $WORKING_DIR/kubelet-config.json +fi + +sudo mv $WORKING_DIR/kubelet.service /etc/systemd/system/kubelet.service +sudo chown root:root /etc/systemd/system/kubelet.service +sudo mv $WORKING_DIR/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json +sudo chown root:root /etc/kubernetes/kubelet/kubelet-config.json + +sudo systemctl daemon-reload +# Disable the kubelet until the proper dropins have been configured +sudo systemctl disable kubelet + +################################################################################ +### EKS ######################################################################## +################################################################################ + +sudo mkdir -p /etc/eks +sudo mv $WORKING_DIR/get-ecr-uri.sh /etc/eks/get-ecr-uri.sh +sudo chmod +x /etc/eks/get-ecr-uri.sh +sudo mv $WORKING_DIR/eni-max-pods.txt /etc/eks/eni-max-pods.txt +sudo mv $WORKING_DIR/bootstrap.sh /etc/eks/bootstrap.sh +sudo chmod +x /etc/eks/bootstrap.sh +sudo mv $WORKING_DIR/max-pods-calculator.sh /etc/eks/max-pods-calculator.sh +sudo chmod +x /etc/eks/max-pods-calculator.sh + +################################################################################ +### ECR CREDENTIAL PROVIDER #################################################### +################################################################################ +ECR_CREDENTIAL_PROVIDER_BINARY="ecr-credential-provider" +if [[ -n "$AWS_ACCESS_KEY_ID" ]]; then + echo "AWS cli present - using it to copy ${ECR_CREDENTIAL_PROVIDER_BINARY} from s3." + aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/$ECR_CREDENTIAL_PROVIDER_BINARY . +else + echo "AWS cli missing - using wget to fetch ${ECR_CREDENTIAL_PROVIDER_BINARY} from s3. Note: This won't work for private bucket." + sudo wget "$S3_URL_BASE/$ECR_CREDENTIAL_PROVIDER_BINARY" +fi +sudo chmod +x $ECR_CREDENTIAL_PROVIDER_BINARY +sudo mkdir -p /etc/eks/image-credential-provider +sudo mv $ECR_CREDENTIAL_PROVIDER_BINARY /etc/eks/image-credential-provider/ +sudo mv $WORKING_DIR/ecr-credential-provider-config.json /etc/eks/image-credential-provider/config.json + +################################################################################ +### Cache Images ############################################################### +################################################################################ + +if [[ "$CACHE_CONTAINER_IMAGES" == "true" ]] && ! [[ ${ISOLATED_REGIONS} =~ $BINARY_BUCKET_REGION ]]; then + AWS_DOMAIN=$(imds 'latest/meta-data/services/domain') + ECR_URI=$(/etc/eks/get-ecr-uri.sh "${BINARY_BUCKET_REGION}" "${AWS_DOMAIN}") + + sudo systemctl daemon-reload + sudo systemctl start containerd + sudo systemctl enable containerd + + K8S_MINOR_VERSION=$(echo "${KUBERNETES_VERSION}" | cut -d'.' -f1-2) + + #### Cache kube-proxy images starting with the addon default version and the latest version + KUBE_PROXY_ADDON_VERSIONS=$(aws eks describe-addon-versions --addon-name kube-proxy --kubernetes-version=${K8S_MINOR_VERSION}) + KUBE_PROXY_IMGS=() + if [[ $(jq '.addons | length' <<< $KUBE_PROXY_ADDON_VERSIONS) -gt 0 ]]; then + DEFAULT_KUBE_PROXY_FULL_VERSION=$(echo "${KUBE_PROXY_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] | select(.compatibilities[] .defaultVersion==true).addonVersion') + DEFAULT_KUBE_PROXY_VERSION=$(echo "${DEFAULT_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f1) + DEFAULT_KUBE_PROXY_PLATFORM_VERSION=$(echo "${DEFAULT_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f2) + + LATEST_KUBE_PROXY_FULL_VERSION=$(echo "${KUBE_PROXY_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] .addonVersion' | sort -V | tail -n1) + LATEST_KUBE_PROXY_VERSION=$(echo "${LATEST_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f1) + LATEST_KUBE_PROXY_PLATFORM_VERSION=$(echo "${LATEST_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f2) + + KUBE_PROXY_IMGS=( + ## Default kube-proxy images + "${ECR_URI}/eks/kube-proxy:${DEFAULT_KUBE_PROXY_VERSION}-${DEFAULT_KUBE_PROXY_PLATFORM_VERSION}" + "${ECR_URI}/eks/kube-proxy:${DEFAULT_KUBE_PROXY_VERSION}-minimal-${DEFAULT_KUBE_PROXY_PLATFORM_VERSION}" + + ## Latest kube-proxy images + "${ECR_URI}/eks/kube-proxy:${LATEST_KUBE_PROXY_VERSION}-${LATEST_KUBE_PROXY_PLATFORM_VERSION}" + "${ECR_URI}/eks/kube-proxy:${LATEST_KUBE_PROXY_VERSION}-minimal-${LATEST_KUBE_PROXY_PLATFORM_VERSION}" + ) + fi + + #### Cache VPC CNI images starting with the addon default version and the latest version + VPC_CNI_ADDON_VERSIONS=$(aws eks describe-addon-versions --addon-name vpc-cni --kubernetes-version=${K8S_MINOR_VERSION}) + VPC_CNI_IMGS=() + if [[ $(jq '.addons | length' <<< $VPC_CNI_ADDON_VERSIONS) -gt 0 ]]; then + DEFAULT_VPC_CNI_VERSION=$(echo "${VPC_CNI_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] | select(.compatibilities[] .defaultVersion==true).addonVersion') + LATEST_VPC_CNI_VERSION=$(echo "${VPC_CNI_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] .addonVersion' | sort -V | tail -n1) + CNI_IMG="${ECR_URI}/amazon-k8s-cni" + CNI_INIT_IMG="${CNI_IMG}-init" + + VPC_CNI_IMGS=( + ## Default VPC CNI Images + "${CNI_IMG}:${DEFAULT_VPC_CNI_VERSION}" + "${CNI_INIT_IMG}:${DEFAULT_VPC_CNI_VERSION}" + + ## Latest VPC CNI Images + "${CNI_IMG}:${LATEST_VPC_CNI_VERSION}" + "${CNI_INIT_IMG}:${LATEST_VPC_CNI_VERSION}" + ) + fi + + CACHE_IMGS=( + ${KUBE_PROXY_IMGS[@]+"${KUBE_PROXY_IMGS[@]}"} + ${VPC_CNI_IMGS[@]+"${VPC_CNI_IMGS[@]}"} + ) + PULLED_IMGS=() + REGIONS=$(aws ec2 describe-regions --all-regions --output text --query 'Regions[].[RegionName]') + + for img in "${CACHE_IMGS[@]}"; do + ## only kube-proxy-minimal is vended for K8s 1.24+ + if [[ "${img}" == *"kube-proxy:"* ]] && [[ "${img}" != *"-minimal-"* ]] && vercmp "${K8S_MINOR_VERSION}" gteq "1.24"; then + continue + fi + ## Since eksbuild.x version may not match the image tag, we need to decrement the eksbuild version until we find the latest image tag within the app semver + eksbuild_version="1" + if [[ ${img} == *'eksbuild.'* ]]; then + eksbuild_version=$(echo "${img}" | grep -o 'eksbuild\.[0-9]\+' | cut -d'.' -f2) + fi + ## iterate through decrementing the build version each time + for build_version in $(seq "${eksbuild_version}" -1 1); do + img=$(echo "${img}" | sed -E "s/eksbuild.[0-9]+/eksbuild.${build_version}/") + if /etc/eks/containerd/pull-image.sh "${img}"; then + PULLED_IMGS+=("${img}") + break + elif [[ "${build_version}" -eq 1 ]]; then + exit 1 + fi + done + done + + #### Tag the pulled down image for all other regions in the partition + for region in ${REGIONS[*]}; do + for img in "${PULLED_IMGS[@]}"; do + region_uri=$(/etc/eks/get-ecr-uri.sh "${region}" "${AWS_DOMAIN}") + regional_img="${img/$ECR_URI/$region_uri}" + sudo ctr -n k8s.io image tag "${img}" "${regional_img}" || : + ## Tag ECR fips endpoint for supported regions + if [[ "${region}" =~ (us-east-1|us-east-2|us-west-1|us-west-2|us-gov-east-1|us-gov-west-1) ]]; then + regional_fips_img="${regional_img/.ecr./.ecr-fips.}" + sudo ctr -n k8s.io image tag "${img}" "${regional_fips_img}" || : + sudo ctr -n k8s.io image tag "${img}" "${regional_fips_img/-eksbuild.1/}" || : + fi + ## Cache the non-addon VPC CNI images since "v*.*.*-eksbuild.1" is equivalent to leaving off the eksbuild suffix + if [[ "${img}" == *"-cni"*"-eksbuild.1" ]]; then + sudo ctr -n k8s.io image tag "${img}" "${regional_img/-eksbuild.1/}" || : + fi + done + done +fi + +################################################################################ +### SSM Agent ################################################################## +################################################################################ + +if yum list installed | grep amazon-ssm-agent; then + echo "amazon-ssm-agent already present - skipping install" +else + if ! [[ -z "${SSM_AGENT_VERSION}" ]]; then + echo "Installing amazon-ssm-agent@${SSM_AGENT_VERSION} from S3" + sudo yum install -y https://s3.${BINARY_BUCKET_REGION}.${S3_DOMAIN}/amazon-ssm-${BINARY_BUCKET_REGION}/${SSM_AGENT_VERSION}/linux_${ARCH}/amazon-ssm-agent.rpm + else + echo "Installing amazon-ssm-agent from AL core repository" + sudo yum install -y amazon-ssm-agent + fi +fi + +################################################################################ +### AMI Metadata ############################################################### +################################################################################ + +BASE_AMI_ID=$(imds /latest/meta-data/ami-id) +cat << EOF > "${WORKING_DIR}/release" +BASE_AMI_ID="$BASE_AMI_ID" +BUILD_TIME="$(date)" +BUILD_KERNEL="$(uname -r)" +ARCH="$(uname -m)" +EOF +sudo mv "${WORKING_DIR}/release" /etc/eks/release +sudo chown -R root:root /etc/eks + +################################################################################ +### Stuff required by "protectKernelDefaults=true" ############################# +################################################################################ + +cat << EOF | sudo tee -a /etc/sysctl.d/99-amazon.conf +vm.overcommit_memory=1 +kernel.panic=10 +kernel.panic_on_oops=1 +EOF + +################################################################################ +### Setting up sysctl properties ############################################### +################################################################################ + +echo fs.inotify.max_user_watches=524288 | sudo tee -a /etc/sysctl.conf +echo fs.inotify.max_user_instances=8192 | sudo tee -a /etc/sysctl.conf +echo vm.max_map_count=524288 | sudo tee -a /etc/sysctl.conf +echo 'kernel.pid_max=4194304' | sudo tee -a /etc/sysctl.conf + +################################################################################ +### adding log-collector-script ################################################ +################################################################################ +sudo mkdir -p /etc/eks/log-collector-script/ +sudo cp $WORKING_DIR/log-collector-script/eks-log-collector.sh /etc/eks/log-collector-script/ + +################################################################################ +### Remove Yum Update from cloud-init config ################################### +################################################################################ +sudo sed -i \ + 's/ - package-update-upgrade-install/# Removed so that nodes do not have version skew based on when the node was started.\n# - package-update-upgrade-install/' \ + /etc/cloud/cloud.cfg diff --git a/scripts/install_additional_repos.sh b/scripts/install_additional_repos.sh new file mode 100644 index 000000000..dd1862743 --- /dev/null +++ b/scripts/install_additional_repos.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# +# Install additional YUM repositories, typically used for security patches. +# The format of ADDITIONAL_YUM_REPOS is: "repo=patches-repo,name=Install patches,baseurl=http://amazonlinux.$awsregion.$awsdomain/xxxx,priority=10" +# which will create the file '/etc/yum.repos.d/patches-repo.repo' having the following content: +# ``` +# [patches-repo] +# name=Install patches +# baseurl=http://amazonlinux.$awsregion.$awsdomain/xxxx +# priority=10 +# ``` +# Note that priority is optional, but the other parameters are required. Multiple yum repos can be specified, each one separated by ';' + +if [ -z "${ADDITIONAL_YUM_REPOS}" ]; then + echo "no additional yum repo, skipping" + exit 0 +fi + +AWK_CMD=' +BEGIN {RS=";";FS=","} +{ + delete vars; + for(i = 1; i <= NF; ++i) { + n = index($i, "="); + if(n) { + vars[substr($i, 1, n-1)] = substr($i, n + 1) + } + } + Repo = "/etc/yum.repos.d/"vars["repo"]".repo" +} +{print "["vars["repo"]"]" > Repo} +{print "name="vars["name"] > Repo} +{print "baseurl="vars["baseurl"] > Repo} +{if (length(vars["priority"]) != 0) print "priority="vars["priority"] > Repo} +' +sudo awk "$AWK_CMD" <<< "${ADDITIONAL_YUM_REPOS}" diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh new file mode 100755 index 000000000..9b13a18bb --- /dev/null +++ b/scripts/upgrade_kernel.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +set -o pipefail +set -o nounset +set -o errexit + +if [[ -z "$KERNEL_VERSION" ]]; then + if vercmp "$KUBERNETES_VERSION" gteq "1.24.0"; then + KERNEL_VERSION=5.10 + else + KERNEL_VERSION=5.4 + fi + echo "kernel_version is unset. Setting to $KERNEL_VERSION based on Kubernetes version $KUBERNETES_VERSION." +fi + +if [[ $KERNEL_VERSION == 4.14* ]]; then + sudo yum install -y "kernel-${KERNEL_VERSION}*" +else + KERNEL_MINOR_VERSION=$(echo ${KERNEL_VERSION} | cut -d. -f-2) + sudo amazon-linux-extras enable "kernel-${KERNEL_MINOR_VERSION}" + sudo yum install -y "kernel-${KERNEL_VERSION}*" +fi + +sudo yum install -y "kernel-headers-${KERNEL_VERSION}*" "kernel-devel-${KERNEL_VERSION}*" + +# enable pressure stall information +sudo grubby \ + --update-kernel=ALL \ + --args="psi=1" + +# use the tsc clocksource by default +# https://repost.aws/knowledge-center/manage-ec2-linux-clock-source +sudo grubby \ + --update-kernel=ALL \ + --args="clocksource=tsc tsc=reliable" diff --git a/scripts/validate.sh b/scripts/validate.sh new file mode 100644 index 000000000..42da83266 --- /dev/null +++ b/scripts/validate.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash + +set -o nounset +set -o errexit +set -o pipefail + +validate_file_nonexists() { + local file_blob=$1 + for f in $file_blob; do + if [ -e "$f" ]; then + echo "$f shouldn't exists" + exit 1 + fi + done +} + +validate_file_nonexists '/etc/hostname' +validate_file_nonexists '/etc/resolv.conf' +validate_file_nonexists '/etc/ssh/ssh_host*' +validate_file_nonexists '/home/ec2-user/.ssh/authorized_keys' +validate_file_nonexists '/root/.ssh/authorized_keys' +validate_file_nonexists '/var/lib/cloud/data' +validate_file_nonexists '/var/lib/cloud/instance' +validate_file_nonexists '/var/lib/cloud/instances' +validate_file_nonexists '/var/lib/cloud/sem' +validate_file_nonexists '/var/lib/dhclient/*' +validate_file_nonexists '/var/lib/dhcp/dhclient.*' +validate_file_nonexists '/var/lib/yum/history' +validate_file_nonexists '/var/log/cloud-init-output.log' +validate_file_nonexists '/var/log/cloud-init.log' +validate_file_nonexists '/var/log/secure' +validate_file_nonexists '/var/log/wtmp' + +actual_kernel=$(uname -r) +echo "Verifying that kernel version $actual_kernel matches $KERNEL_VERSION..." + +if [[ $actual_kernel == $KERNEL_VERSION* ]]; then + echo "Kernel matches expected version!" +else + echo "Kernel does not match expected version!" + exit 1 +fi + +function versionlock-entries() { + # the format of this output is EPOCH:NAME-VERSION-RELEASE.ARCH + # more info in yum-versionlock(1) + # rpm doesn't accept EPOCH when querying the db, so remove it + yum versionlock list --quiet | cut -d ':' -f2 +} + +function versionlock-packages() { + versionlock-entries | xargs -I '{}' rpm --query '{}' --queryformat '%{NAME}\n' +} + +function verify-versionlocks() { + for ENTRY in $(versionlock-entries); do + if ! rpm --query "$ENTRY" &> /dev/null; then + echo "There is no package matching the versionlock entry: '$ENTRY'" + exit 1 + fi + done + + LOCKED_PACKAGES=$(versionlock-packages | wc -l) + UNIQUE_LOCKED_PACKAGES=$(versionlock-packages | sort -u | wc -l) + if [ $LOCKED_PACKAGES -ne $UNIQUE_LOCKED_PACKAGES ]; then + echo "Package(s) have multiple version locks!" + versionlock-entries + fi + + echo "Package versionlocks are correct!" +} + +# run verify-versionlocks on al2 only, as it is not needed on al2023 +if ! cat /etc/*release | grep "al2023" > /dev/null 2>&1; then + echo "Verifying that the package versionlocks are correct..." + verify-versionlocks +fi + +REQUIRED_COMMANDS=(unpigz) + +for ENTRY in "${REQUIRED_COMMANDS[@]}"; do + if ! command -v "$ENTRY" > /dev/null; then + echo "Required command does not exist: '$ENTRY'" + exit 1 + fi +done + +echo "Required commands were found: ${REQUIRED_COMMANDS[*]}" + +REQUIRED_FREE_MEBIBYTES=1024 +TOTAL_MEBIBYTES=$(df -m / | tail -n1 | awk '{print $2}') +FREE_MEBIBYTES=$(df -m / | tail -n1 | awk '{print $4}') +echo "Disk space in mebibytes (required/free/total): ${REQUIRED_FREE_MEBIBYTES}/${FREE_MEBIBYTES}/${TOTAL_MEBIBYTES}" +if [ ${FREE_MEBIBYTES} -lt ${REQUIRED_FREE_MEBIBYTES} ]; then + echo "Disk space requirements not met!" + exit 1 +else + echo "Disk space requirements were met." +fi diff --git a/test/Dockerfile b/test/Dockerfile new file mode 100644 index 000000000..d00837c3e --- /dev/null +++ b/test/Dockerfile @@ -0,0 +1,20 @@ +FROM public.ecr.aws/aws-ec2/amazon-ec2-metadata-mock:v1.11.2 as aemm +FROM public.ecr.aws/amazonlinux/amazonlinux:2 +RUN amazon-linux-extras enable docker && \ + yum install -y jq containerd wget which && \ + wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 && \ + chmod a+x /usr/local/bin/yq + +ENV IMDS_ENDPOINT=127.0.0.1:1338 +COPY --from=aemm /ec2-metadata-mock /sbin/ec2-metadata-mock +RUN mkdir -p /etc/systemd/system +RUN mkdir -p /etc/eks/containerd +COPY files/ /etc/eks/ +COPY files/containerd-config.toml files/kubelet-containerd.service files/pull-sandbox-image.sh files/sandbox-image.service /etc/eks/containerd/ +COPY files/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json +COPY files/kubelet-kubeconfig /var/lib/kubelet/kubeconfig +COPY files/ecr-credential-provider-config.json /etc/eks/image-credential-provider/config.json +COPY test/entrypoint.sh /entrypoint.sh +COPY files/bin/* /usr/bin/ +COPY test/mocks/ /sbin/ +ENTRYPOINT ["/entrypoint.sh"] diff --git a/test/README.md b/test/README.md new file mode 100644 index 000000000..6d9f58a2f --- /dev/null +++ b/test/README.md @@ -0,0 +1,40 @@ +## Tests + +This directory contains a Dockerfile that is able to be used locally to test the `/etc/eks/boostrap.sh` script without having to use a real AL2 EC2 instance for a quick dev-loop. It is still necessary to test the bootstrap script on a real instance since the Docker image is not a fully accurate representation. + +## AL2 EKS Optimized AMI Docker Image + +The image is built using the official AL2 image `public.ecr.aws/amazonlinux/amazonlinux:2`. It has several mocks installed including the [ec2-metadata-mock](https://github.com/aws/amazon-ec2-metadata-mock). Mocks are installed into `/sbin`, so adding addditional ones as necessary should be as simple as dropping a bash script in the `mocks` dir named as the command you would like to mock out. + +## Usage + +```bash + +## The docker context needs to be at the root of the repo +docker build -t eks-optimized-ami -f Dockerfile ../ + +docker run -it eks-optimized-ami /etc/eks/bootstrap.sh --b64-cluster-ca dGVzdA== --apiserver-endpoint http://my-api-endpoint test +``` + +The `test-harness.sh` script wraps a build and runs test script in the `cases` dir. Tests scripts within the `cases` dir are invoked by the `test-harness.sh` script and have access to the `run` function. The `run` function accepts a temporary directory as an argument in order to mount as a volume in the container so that test scripts can check files within the `/etc/kubernetes/` directory after a bootstrap run. The remaining arguments to the `run` function are a path to a script within the AL2 EKS Optimized AMI Docker Container. + +Here's an example `run` call: + +``` +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv4 \ + --dns-cluster-ip 192.168.0.1 \ + test-cluster-name +``` + +## ECR Public + +You may need to logout of ECR public or reauthenticate if your credentials are expired: + +```bash +docker logout public.ecr.aws +``` + +ECR public allow anonymous access, but you cannot have expired credentials loaded. \ No newline at end of file diff --git a/test/cases/api-qps-k8s-1.21-below.sh b/test/cases/api-qps-k8s-1.21-below.sh new file mode 100755 index 000000000..890e580ec --- /dev/null +++ b/test/cases/api-qps-k8s-1.21-below.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "--> Should use default API server QPS for K8s 1.21-" +exit_code=0 +export KUBELET_VERSION=v1.21.0-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +# values should not be set +expected_api_qps="null" +expected_api_burst="null" + +actual_api_qps=$(jq -r '.kubeAPIQPS' < /etc/kubernetes/kubelet/kubelet-config.json) +actual_api_burst=$(jq -r '.kubeAPIBurst' < /etc/kubernetes/kubelet/kubelet-config.json) +if [[ ${actual_api_qps} != ${expected_api_qps} ]]; then + echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" + exit 1 +fi + +if [[ ${actual_api_burst} != ${expected_api_burst} ]]; then + echo "❌ Test Failed: expected kubeAPIBurst = '${expected_api_burst}' but got '${actual_api_burst}'" + exit 1 +fi diff --git a/test/cases/api-qps-k8s-1.22-to-1.26.sh b/test/cases/api-qps-k8s-1.22-to-1.26.sh new file mode 100755 index 000000000..f61f1d0ac --- /dev/null +++ b/test/cases/api-qps-k8s-1.22-to-1.26.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "--> Should increase API server QPS for K8s 1.22 - 1.26" +exit_code=0 +export KUBELET_VERSION=v1.22.0-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +expected_api_qps="10" +expected_api_burst="20" + +actual_api_qps=$(jq -r '.kubeAPIQPS' < /etc/kubernetes/kubelet/kubelet-config.json) +actual_api_burst=$(jq -r '.kubeAPIBurst' < /etc/kubernetes/kubelet/kubelet-config.json) +if [[ ${actual_api_qps} != ${expected_api_qps} ]]; then + echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" + exit 1 +fi + +if [[ ${actual_api_burst} != ${expected_api_burst} ]]; then + echo "❌ Test Failed: expected kubeAPIBurst = '${expected_api_burst}' but got '${actual_api_burst}'" + exit 1 +fi + +export KUBELET_VERSION=v1.26.0-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +expected_api_qps="10" +expected_api_burst="20" + +actual_api_qps=$(jq -r '.kubeAPIQPS' < /etc/kubernetes/kubelet/kubelet-config.json) +actual_api_burst=$(jq -r '.kubeAPIBurst' < /etc/kubernetes/kubelet/kubelet-config.json) +if [[ ${actual_api_qps} != ${expected_api_qps} ]]; then + echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" + exit 1 +fi + +if [[ ${actual_api_burst} != ${expected_api_burst} ]]; then + echo "❌ Test Failed: expected kubeAPIBurst = '${expected_api_burst}' but got '${actual_api_burst}'" + exit 1 +fi diff --git a/test/cases/api-qps-k8s-1.27-above.sh b/test/cases/api-qps-k8s-1.27-above.sh new file mode 100755 index 000000000..d25f1ac31 --- /dev/null +++ b/test/cases/api-qps-k8s-1.27-above.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "--> Should use default API server QPS for K8s 1.27+" +exit_code=0 +export KUBELET_VERSION=v1.27.0-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +# values should not be set +expected_api_qps="null" +expected_api_burst="null" + +actual_api_qps=$(jq -r '.kubeAPIQPS' < /etc/kubernetes/kubelet/kubelet-config.json) +actual_api_burst=$(jq -r '.kubeAPIBurst' < /etc/kubernetes/kubelet/kubelet-config.json) +if [[ ${actual_api_qps} != ${expected_api_qps} ]]; then + echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" + exit 1 +fi + +if [[ ${actual_api_burst} != ${expected_api_burst} ]]; then + echo "❌ Test Failed: expected kubeAPIBurst = '${expected_api_burst}' but got '${actual_api_burst}'" + exit 1 +fi diff --git a/test/cases/cloud-provider-config.sh b/test/cases/cloud-provider-config.sh new file mode 100755 index 000000000..f21cd93cb --- /dev/null +++ b/test/cases/cloud-provider-config.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash + +set -o errexit +set -o pipefail +set -o nounset + +KUBELET_UNIT_DIR="/etc/systemd/system/kubelet.service.d" +KUBELET_CONFIG_FILE="/etc/kubernetes/kubelet/kubelet-config.json" + +function fail() { + echo "❌ Test Failed:" "$@" + echo "Kubelet systemd units:" + find $KUBELET_UNIT_DIR -type f | xargs cat + echo "Kubelet config file:" + cat $KUBELET_CONFIG_FILE | jq '.' + exit 1 +} + +EXPECTED_PROVIDER_ID=$(provider-id) + +echo "--> Should use in-tree cloud provider below k8s version 1.26" +# This variable is used to override the default value in the kubelet mock +export KUBELET_VERSION=v1.25.5-eks-ba74326 +EXIT_CODE=0 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + fail "expected a zero exit code but got '${EXIT_CODE}'" +fi +EXIT_CODE=0 +grep -RFq -e "--cloud-provider=aws" $KUBELET_UNIT_DIR || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + fail "expected --cloud-provider=aws to be present in kubelet's systemd units" +fi +ACTUAL_PROVIDER_ID=$(jq -r '.providerID' $KUBELET_CONFIG_FILE) +if [ ! "$ACTUAL_PROVIDER_ID" = "null" ]; then + fail "expected .providerID to be absent in kubelet's config file but was '$ACTUAL_PROVIDER_ID'" +fi + +echo "--> Should use external cloud provider at k8s version 1.26" +# at 1.26 +export KUBELET_VERSION=v1.26.5-eks-ba74326 +EXIT_CODE=0 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + fail "expected a zero exit code but got '${EXIT_CODE}'" +fi +EXIT_CODE=0 +grep -RFq -e "--cloud-provider=external" $KUBELET_UNIT_DIR || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + fail "expected --cloud-provider=external to be present in kubelet's systemd units" +fi +ACTUAL_PROVIDER_ID=$(jq -r '.providerID' $KUBELET_CONFIG_FILE) +if [ ! "$ACTUAL_PROVIDER_ID" = "$EXPECTED_PROVIDER_ID" ]; then + fail "expected .providerID=$EXPECTED_PROVIDER_ID to be present in kubelet's config file but was '$ACTUAL_PROVIDER_ID'" +fi + +echo "--> Should use external cloud provider above k8s version 1.26" +# above 1.26 +export KUBELET_VERSION=v1.27.0-eks-ba74326 +EXIT_CODE=0 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + fail "expected a zero exit code but got '${EXIT_CODE}'" +fi +EXIT_CODE=0 +grep -RFq -e "--cloud-provider=external" $KUBELET_UNIT_DIR || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + fail "expected --cloud-provider=external to be present in kubelet's systemd units" +fi +ACTUAL_PROVIDER_ID=$(jq -r '.providerID' $KUBELET_CONFIG_FILE) +if [ ! "$ACTUAL_PROVIDER_ID" = "$EXPECTED_PROVIDER_ID" ]; then + fail "expected .providerID=$EXPECTED_PROVIDER_ID to be present in kubelet's config file but was '$ACTUAL_PROVIDER_ID" +fi diff --git a/test/cases/container-runtime-defaults.sh b/test/cases/container-runtime-defaults.sh new file mode 100755 index 000000000..8218867c5 --- /dev/null +++ b/test/cases/container-runtime-defaults.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +set -euo pipefail + +exit_code=0 + +echo "--> Should allow dockerd as container runtime when below k8s version 1.24" +# This variable is used to override the default value in the kubelet mock +export KUBELET_VERSION=v1.20.15-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --container-runtime dockerd \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +echo "--> Should allow containerd as container runtime when below k8s version 1.24" +export KUBELET_VERSION=v1.20.15-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --container-runtime containerd \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +echo "--> Should have default container runtime when below k8s version 1.24" +export KUBELET_VERSION=v1.20.15-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +echo "--> Should not allow dockerd as container runtime when at or above k8s version 1.24" +export KUBELET_VERSION=v1.24.15-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --container-runtime dockerd \ + test || exit_code=$? + +echo "EXIT CODE $exit_code" +if [[ ${exit_code} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi +exit_code=0 + +echo "--> Should allow containerd as container runtime when at or above k8s version 1.24" +export KUBELET_VERSION=v1.24.15-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --container-runtime containerd \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +echo "--> Should have default container runtime when at or above k8s version 1.24" +export KUBELET_VERSION=v1.24.15-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +echo "--> Should ignore docker-specific flags when at or above k8s version 1.24" +export KUBELET_VERSION=v1.24.15-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --enable-docker-bridge true \ + --docker-config-json "{\"some\":\"json\"}" \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi diff --git a/test/cases/containerd-config.sh b/test/cases/containerd-config.sh new file mode 100755 index 000000000..048ea9dfe --- /dev/null +++ b/test/cases/containerd-config.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +set -euo pipefail + +exit_code=0 + +echo "--> Default containerd config file should be valid" +STDERR_FILE=$(mktemp) +containerd -c /etc/eks/containerd/containerd-config.toml config dump > /dev/null 2> "$STDERR_FILE" || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: default containerd config file is invalid! $(cat "$STDERR_FILE")" + exit 1 +fi + +echo "--> Should fail when given an invalid containerd config" +CONTAINERD_TOML=$(mktemp containerd-XXXXX.toml) +cat > "$CONTAINERD_TOML" << EOF +[cgroup] +path = "foo" +[cgroup] +path = "bar" +EOF + +export KUBELET_VERSION=v1.24.15-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --container-runtime containerd \ + --containerd-config-file "$CONTAINERD_TOML" \ + test || exit_code=$? + +if [[ ${exit_code} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi diff --git a/test/cases/ecr-credential-provider-config.sh b/test/cases/ecr-credential-provider-config.sh new file mode 100755 index 000000000..4eb74a761 --- /dev/null +++ b/test/cases/ecr-credential-provider-config.sh @@ -0,0 +1,102 @@ +#!/usr/bin/env bash +set -euo pipefail + +exit_code=0 +TEMP_DIR=$(mktemp -d) + +export CRED_PROVIDER_FILE="/etc/eks/image-credential-provider/config.json" +export CRED_PROVIDER_RESET_FILE="./cred-provider-config" + +# Store the original version of the config +cp $CRED_PROVIDER_FILE $CRED_PROVIDER_RESET_FILE +# Reset the file that may have changed +function reset_scenario { + echo "Resetting test scenario" + cp $CRED_PROVIDER_RESET_FILE $CRED_PROVIDER_FILE +} + +echo "--> Should default to credentialprovider.kubelet.k8s.io/v1alpha1 and kubelet.config.k8s.io/v1alpha1 when below k8s version 1.27" +reset_scenario + +# This variable is used to override the default value in the kubelet mock +export KUBELET_VERSION=v1.22.15-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +expected_cred_provider_api="credentialprovider.kubelet.k8s.io/v1alpha1" +actual=$(jq -r '.providers[0].apiVersion' $CRED_PROVIDER_FILE) +if [[ "$expected_cred_provider_api" != "$actual" ]]; then + echo "❌ Test Failed: expected 1.22 credential provider file to contain $expected_cred_provider_api" + exit 1 +fi + +expected_kubelet_config_api="kubelet.config.k8s.io/v1alpha1" +actual=$(jq -r '.apiVersion' $CRED_PROVIDER_FILE) +if [[ "$expected_kubelet_config_api" != "$actual" ]]; then + echo "❌ Test Failed: expected 1.22 credential provider file to contain $expected_kubelet_config_api" + exit 1 +fi + +echo "--> Should default to credentialprovider.kubelet.k8s.io/v1alpha1 and kubelet.config.k8s.io/v1alpha1 when below k8s version 1.27" +reset_scenario + +# This variable is used to override the default value in the kubelet mock +export KUBELET_VERSION=v1.26.0-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +expected_cred_provider_api="credentialprovider.kubelet.k8s.io/v1alpha1" +actual=$(jq -r '.providers[0].apiVersion' $CRED_PROVIDER_FILE) +if [[ "$expected_cred_provider_api" != "$actual" ]]; then + echo "❌ Test Failed: expected 1.26 credential provider file to contain $expected_cred_provider_api" + exit 1 +fi + +expected_kubelet_config_api="kubelet.config.k8s.io/v1alpha1" +actual=$(jq -r '.apiVersion' $CRED_PROVIDER_FILE) +if [[ "$expected_kubelet_config_api" != "$actual" ]]; then + echo "❌ Test Failed: expected 1.26 credential provider file to contain $expected_kubelet_config_api" + exit 1 +fi + +echo "--> Should default to credentialprovider.kubelet.k8s.io/v1 and kubelet.config.k8s.io/v1 when at or above k8s version 1.27" +reset_scenario + +export KUBELET_VERSION=v1.27.1-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +expected_cred_provider_api="credentialprovider.kubelet.k8s.io/v1" +actual=$(jq -r '.providers[0].apiVersion' $CRED_PROVIDER_FILE) +if [[ "$expected_cred_provider_api" != "$actual" ]]; then + echo "❌ Test Failed: expected 1.27 credential provider file to contain $expected_cred_provider_api" + exit 1 +fi + +expected_kubelet_config_api="kubelet.config.k8s.io/v1" +actual=$(jq -r '.apiVersion' $CRED_PROVIDER_FILE) +if [[ "$expected_kubelet_config_api" != "$actual" ]]; then + echo "❌ Test Failed: expected 1.27 credential provider file to contain $expected_kubelet_config_api" + exit 1 +fi diff --git a/test/cases/get-ecr-uri.sh b/test/cases/get-ecr-uri.sh new file mode 100755 index 000000000..5b4dd3209 --- /dev/null +++ b/test/cases/get-ecr-uri.sh @@ -0,0 +1,85 @@ +#!/usr/bin/env bash + +set -o nounset +set -o errexit +set -o pipefail + +echo "--> Should use specified account when passed in" +EXPECTED_ECR_URI="999999999999.dkr.ecr.mars-west-1.amazonaws.com.mars" +REGION="mars-west-1" +DOMAIN="amazonaws.com.mars" +ECR_URI=$(/etc/eks/get-ecr-uri.sh "${REGION}" "${DOMAIN}" "999999999999") +if [ ! "$ECR_URI" = "$EXPECTED_ECR_URI" ]; then + echo "❌ Test Failed: expected ecr-uri=$EXPECTED_ECR_URI but got '${ECR_URI}'" + exit 1 +fi + +echo "--> Should use account mapped to the region when set" +EXPECTED_ECR_URI="590381155156.dkr.ecr.eu-south-1.amazonaws.com" +REGION="eu-south-1" +DOMAIN="amazonaws.com" +ECR_URI=$(/etc/eks/get-ecr-uri.sh "${REGION}" "${DOMAIN}") +if [ ! "$ECR_URI" = "$EXPECTED_ECR_URI" ]; then + echo "❌ Test Failed: expected ecr-uri=$EXPECTED_ECR_URI but got '${ECR_URI}'" + exit 1 +fi + +echo "--> Should use non-opt-in account when not opt-in-region" +EXPECTED_ECR_URI="602401143452.dkr.ecr.us-east-2.amazonaws.com" +REGION="us-east-2" +DOMAIN="amazonaws.com" +ECR_URI=$(/etc/eks/get-ecr-uri.sh "${REGION}" "${DOMAIN}") +if [ ! "$ECR_URI" = "$EXPECTED_ECR_URI" ]; then + echo "❌ Test Failed: expected ecr-uri=$EXPECTED_ECR_URI but got '${ECR_URI}'" + exit 1 +fi + +echo "--> Should use us-west-2 account and region when opt-in-region" +EXPECTED_ECR_URI="602401143452.dkr.ecr.us-west-2.amazonaws.com" +REGION="eu-south-100" +DOMAIN="amazonaws.com" +ECR_URI=$(/etc/eks/get-ecr-uri.sh "${REGION}" "${DOMAIN}") +if [ ! "$ECR_URI" = "$EXPECTED_ECR_URI" ]; then + echo "❌ Test Failed: expected ecr-uri=$EXPECTED_ECR_URI but got '${ECR_URI}'" + exit 1 +fi + +echo "--> Should default us-gov-west-1 when unknown amazonaws.com.us-gov region" +EXPECTED_ECR_URI="013241004608.dkr.ecr.us-gov-west-1.amazonaws.com.us-gov" +REGION="us-gov-east-100" +DOMAIN="amazonaws.com.us-gov" +ECR_URI=$(/etc/eks/get-ecr-uri.sh "${REGION}" "${DOMAIN}") +if [ ! "$ECR_URI" = "$EXPECTED_ECR_URI" ]; then + echo "❌ Test Failed: expected ecr-uri=$EXPECTED_ECR_URI but got '${ECR_URI}'" + exit 1 +fi + +echo "--> Should default cn-northwest-1 when unknown amazonaws.com.cn region" +EXPECTED_ECR_URI="961992271922.dkr.ecr.cn-northwest-1.amazonaws.com.cn" +REGION="cn-north-100" +DOMAIN="amazonaws.com.cn" +ECR_URI=$(/etc/eks/get-ecr-uri.sh "${REGION}" "${DOMAIN}") +if [ ! "$ECR_URI" = "$EXPECTED_ECR_URI" ]; then + echo "❌ Test Failed: expected ecr-uri=$EXPECTED_ECR_URI but got '${ECR_URI}'" + exit 1 +fi + +echo "--> Should default us-iso-east-1 when unknown amazonaws.com.iso region" +EXPECTED_ECR_URI="725322719131.dkr.ecr.us-iso-east-1.amazonaws.com.iso" +REGION="us-iso-west-100" +DOMAIN="amazonaws.com.iso" +ECR_URI=$(/etc/eks/get-ecr-uri.sh "${REGION}" "${DOMAIN}") +if [ ! "$ECR_URI" = "$EXPECTED_ECR_URI" ]; then + echo "❌ Test Failed: expected ecr-uri=$EXPECTED_ECR_URI but got '${ECR_URI}'" + exit 1 +fi + +echo "--> Should default us-isob-east-1 when unknown amazonaws.com.isob region" +EXPECTED_ECR_URI="187977181151.dkr.ecr.us-isob-east-1.amazonaws.com.isob" +REGION="us-isob-west-100" +DOMAIN="amazonaws.com.isob" +ECR_URI=$(/etc/eks/get-ecr-uri.sh "${REGION}" "${DOMAIN}") +if [ ! "$ECR_URI" = "$EXPECTED_ECR_URI" ]; then + echo "❌ Test Failed: expected ecr-uri=$EXPECTED_ECR_URI but got '${ECR_URI}'" + exit 1 +fi diff --git a/test/cases/imds-errors.sh b/test/cases/imds-errors.sh new file mode 100755 index 000000000..865ca8dc7 --- /dev/null +++ b/test/cases/imds-errors.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +set -o nounset +set -o errexit +set -o pipefail + +export IMDS_DEBUG=true + +echo "--> Should succeed for known API" +EXIT_CODE=0 +imds /latest/meta-data/instance-id || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got: $EXIT_CODE" + exit 1 +fi + +echo "--> Should fail for unknown API" +EXIT_CODE=0 +imds /foo || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code" + exit 1 +fi + +echo "--> Should fail for invalid endpoint" +EXIT_CODE=0 +export IMDS_ENDPOINT="127.0.0.0:1234" +imds /latest/meta-data/instance-id || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code" + exit 1 +fi diff --git a/test/cases/ip-family-service-ipv6-cidr-mismatch.sh b/test/cases/ip-family-service-ipv6-cidr-mismatch.sh new file mode 100755 index 000000000..fe835ee03 --- /dev/null +++ b/test/cases/ip-family-service-ipv6-cidr-mismatch.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "-> Should fail validation - ip-family mismatch" +exit_code=0 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv4 \ + --service-ipv6-cidr 192.168.0.1/24 \ + test || exit_code=$? + +if [[ ${exit_code} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi diff --git a/test/cases/ipv4-cluster-dns-ip.sh b/test/cases/ipv4-cluster-dns-ip.sh new file mode 100755 index 000000000..b0e05355b --- /dev/null +++ b/test/cases/ipv4-cluster-dns-ip.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "--> Should return IPv4 DNS Cluster IP when given dns-cluster-ip" +exit_code=0 +expected_cluster_dns="192.168.0.1" +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv4 \ + --dns-cluster-ip "${expected_cluster_dns}" \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +actual_cluster_dns=$(jq -r '.clusterDNS[0]' < /etc/kubernetes/kubelet/kubelet-config.json) +if [[ ${actual_cluster_dns} != "${expected_cluster_dns}" ]]; then + echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" + exit 1 +fi diff --git a/test/cases/ipv6-cluster-dns-ip.sh b/test/cases/ipv6-cluster-dns-ip.sh new file mode 100755 index 000000000..bfb5553c9 --- /dev/null +++ b/test/cases/ipv6-cluster-dns-ip.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "-> Should return ipv6 DNS Cluster IP when given dns-cluster-ip" +exit_code=0 +expected_cluster_dns="fe80::2a" +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv6 \ + --dns-cluster-ip "${expected_cluster_dns}" \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +actual_cluster_dns=$(jq -r '.clusterDNS[0]' < /etc/kubernetes/kubelet/kubelet-config.json) +if [[ ${actual_cluster_dns} != "${expected_cluster_dns}" ]]; then + echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" + exit 1 +fi diff --git a/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh b/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh new file mode 100755 index 000000000..ec84ee391 --- /dev/null +++ b/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "-> Should return IPv6 DNS cluster IP when given service-ipv6-cidr" +exit_code=0 +TEMP_DIR=$(mktemp -d) +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv6 \ + --service-ipv6-cidr fe80::1 \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +expected_cluster_dns="fe80::1a" +actual_cluster_dns=$(jq -r '.clusterDNS[0]' < /etc/kubernetes/kubelet/kubelet-config.json) +if [[ ${actual_cluster_dns} != "${expected_cluster_dns}" ]]; then + echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" + exit 1 +fi diff --git a/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh b/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh new file mode 100755 index 000000000..d93561585 --- /dev/null +++ b/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "-> Should fail w/ \"service-ipv6-cidr must be provided when ip-family is specified as ipv6\"" +exit_code=0 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv6 \ + test || exit_code=$? + +if [[ ${exit_code} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi diff --git a/test/cases/max-pods-cni-1-11-2-delegation.sh b/test/cases/max-pods-cni-1-11-2-delegation.sh new file mode 100755 index 000000000..5f64100fa --- /dev/null +++ b/test/cases/max-pods-cni-1-11-2-delegation.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "-> Should calc max-pods successfully for VPC CNI 1.11.2" +exit_code=0 +out=$(/etc/eks/max-pods-calculator.sh \ + --instance-type m5.8xlarge \ + --cni-version 1.11.2 \ + --cni-prefix-delegation-enabled || exit_code=$?) +echo $out + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi +expected_max_pods="250" +actual_max_pods=$(grep -o '[0-9]\+' <<< ${out}) +if [[ ${actual_max_pods} -ne ${expected_max_pods} ]]; then + echo "❌ Test Failed: expected max-pods for m4.xlarge w/ CNI 1.11.2 to be '${expected_max_pods}', but got '${actual_max_pods}'" + exit 1 +fi diff --git a/test/cases/max-pods-cni-1-11-2.sh b/test/cases/max-pods-cni-1-11-2.sh new file mode 100755 index 000000000..c1311b707 --- /dev/null +++ b/test/cases/max-pods-cni-1-11-2.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "-> Should calc max-pods successfully for m5.8xlarge VPC CNI 1.11.2" +exit_code=0 +out=$(/etc/eks/max-pods-calculator.sh \ + --instance-type m5.8xlarge \ + --cni-version 1.11.2 || exit_code=$?) +echo $out + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi +expected_max_pods="234" +actual_max_pods=$(grep -o '[0-9]\+' <<< ${out}) +if [[ ${actual_max_pods} -ne ${expected_max_pods} ]]; then + echo "❌ Test Failed: expected max-pods for m4.xlarge w/ CNI 1.11.2 to be '${expected_max_pods}', but got '${actual_max_pods}'" + exit 1 +fi diff --git a/test/cases/max-pods-cni-1-7-5.sh b/test/cases/max-pods-cni-1-7-5.sh new file mode 100755 index 000000000..262ca81ed --- /dev/null +++ b/test/cases/max-pods-cni-1-7-5.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "-> Should calc max-pods successfully for VPC CNI 1.7.5" +exit_code=0 +export IMDS_DEBUG=true +out=$(/etc/eks/max-pods-calculator.sh \ + --instance-type-from-imds \ + --cni-version 1.7.5 || exit_code=$?) +echo $out + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi +expected_max_pods="58" +actual_max_pods=$(grep -o '[0-9]\+' <<< ${out}) +if [[ ${actual_max_pods} -ne ${expected_max_pods} ]]; then + echo "❌ Test Failed: expected max-pods for m4.xlarge w/ CNI 1.7.5 to be '${expected_max_pods}', but got '${actual_max_pods}'" + exit 1 +fi diff --git a/test/cases/mount-bpf-fs.sh b/test/cases/mount-bpf-fs.sh new file mode 100755 index 000000000..fe6e45907 --- /dev/null +++ b/test/cases/mount-bpf-fs.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash + +set -o nounset +set -o errexit +set -o pipefail + +export MOUNT_BPF_FS_DEBUG=true + +echo "--> Should succeed if bpf type fs already exists" +function mount() { + echo "none on /foo/bar type bpf (rw,nosuid,nodev,noexec,relatime,mode=700)" +} +export -f mount +EXIT_CODE=0 +mount-bpf-fs || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got: $EXIT_CODE" + exit 1 +fi +export -nf mount + +echo "--> Should succeed if mount point already exists" +function mount() { + echo "none on /sys/fs/bpf type foo (rw,nosuid,nodev,noexec,relatime,mode=700)" +} +export -f mount +EXIT_CODE=0 +mount-bpf-fs || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got: $EXIT_CODE" + exit 1 +fi +export -nf mount + +echo "--> Should succeed if systemd unit already exists" +function mount() { + echo "foo" +} +export -f mount +SYSTEMD_UNIT=/etc/systemd/system/sys-fs-bpf.mount +mkdir -p $(dirname $SYSTEMD_UNIT) +echo "foo" > $SYSTEMD_UNIT +EXIT_CODE=0 +mount-bpf-fs || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got: $EXIT_CODE" + exit 1 +fi +export -nf mount +rm $SYSTEMD_UNIT + +echo "--> Should default to true" +export KUBELET_VERSION=v1.27.0-eks-ba74326 +MOUNT_BPF_FS_MOCK=$(mktemp) +function mount-bpf-fs() { + echo "called" >> $MOUNT_BPF_FS_MOCK +} +export MOUNT_BPF_FS_MOCK +export -f mount-bpf-fs +EXIT_CODE=0 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +if [ ! "$(cat $MOUNT_BPF_FS_MOCK)" = "called" ]; then + echo "❌ Test Failed: expected mount-bpf-fs to be called once but it was not!" + exit 1 +fi +export -nf mount-bpf-fs + +echo "--> Should be disabled by flag" +export KUBELET_VERSION=v1.27.0-eks-ba74326 +MOUNT_BPF_FS_MOCK=$(mktemp) +function mount-bpf-fs() { + echo "called" >> $MOUNT_BPF_FS_MOCK +} +export MOUNT_BPF_FS_MOCK +export -f mount-bpf-fs +EXIT_CODE=0 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --mount-bpf-fs false \ + test || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +if [ "$(cat $MOUNT_BPF_FS_MOCK)" = "called" ]; then + echo "❌ Test Failed: expected mount-bpf-fs to not be called but it was!" + exit 1 +fi +export -nf mount-bpf-fs diff --git a/test/cases/private-dns-name.sh b/test/cases/private-dns-name.sh new file mode 100755 index 000000000..c49246b49 --- /dev/null +++ b/test/cases/private-dns-name.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +set -o nounset +set -o errexit +set -o pipefail + +echo "--> Should fetch PrivateDnsName correctly" +EXPECTED_PRIVATE_DNS_NAME="ip-10-0-0-157.us-east-2.compute.internal" +PRIVATE_DNS_NAME=$(private-dns-name) +if [ ! "$PRIVATE_DNS_NAME" = "$EXPECTED_PRIVATE_DNS_NAME" ]; then + echo "❌ Test Failed: expected private-dns-name=$EXPECTED_PRIVATE_DNS_NAME but got '${PRIVATE_DNS_NAME}'" + exit 1 +fi + +echo "--> Should try to fetch PrivateDnsName until timeout is reached" +export PRIVATE_DNS_NAME_ATTEMPT_INTERVAL=3 +export PRIVATE_DNS_NAME_MAX_ATTEMPTS=2 +export AWS_MOCK_FAIL=true +START_TIME=$(date '+%s') +EXIT_CODE=0 +private-dns-name || EXIT_CODE=$? +STOP_TIME=$(date '+%s') +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code" + exit 1 +fi +ELAPSED_TIME=$((STOP_TIME - START_TIME)) +if [[ "$ELAPSED_TIME" -lt 6 ]]; then + echo "❌ Test Failed: expected 6 seconds to elapse, but got: $ELAPSED_TIME" + exit 1 +fi diff --git a/test/cases/provider-id.sh b/test/cases/provider-id.sh new file mode 100755 index 000000000..8707a41ea --- /dev/null +++ b/test/cases/provider-id.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +set -o nounset +set -o errexit +set -o pipefail + +echo "--> Should fetch imds details correctly" +EXPECTED_INSTANCE_ID="i-1234567890abcdef0" +EXPECTED_AVAILABILITY_ZONE="us-east-1a" +EXPECTED_PROVIDER_ID="aws:///$EXPECTED_AVAILABILITY_ZONE/$EXPECTED_INSTANCE_ID" +PROVIDER_ID=$(provider-id) +if [ ! "$PROVIDER_ID" = "$EXPECTED_PROVIDER_ID" ]; then + echo "❌ Test Failed: expected provider-id=$EXPECTED_PROVIDER_ID but got '${PROVIDER_ID}'" + exit 1 +fi + +echo "--> Should fail when imds is unreachable" +echo '#!/usr/bin/sh +exit 1' > $(which imds) +EXIT_CODE=0 +provider-id || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code" + exit 1 +fi diff --git a/test/cases/reserved-cpus-kubelet-arg.sh b/test/cases/reserved-cpus-kubelet-arg.sh new file mode 100755 index 000000000..2002b7060 --- /dev/null +++ b/test/cases/reserved-cpus-kubelet-arg.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "-> Should not set systemReservedCgroup and kubeReservedCgroup when --reserved-cpus is set with containerd" +exit_code=0 +export KUBELET_VERSION=v1.24.15-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --kubelet-extra-args '--node-labels=cnf=cnf1 --reserved-cpus=0-3 --cpu-manager-policy=static' \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +KUBELET_CONFIG=/etc/kubernetes/kubelet/kubelet-config.json +if grep -q systemReservedCgroup ${KUBELET_CONFIG}; then + echo "❌ Test Failed: expected systemReservedCgroup to be absent in ${KUBELET_CONFIG}.Found: $(grep systemReservedCgroup ${KUBELET_CONFIG})" + exit 1 +fi + +if grep -q kubeReservedCgroup ${KUBELET_CONFIG}; then + echo "❌ Test Failed: expected kubeReservedCgroup to be absent ${KUBELET_CONFIG}.Found: $(grep kubeReservedCgroup ${KUBELET_CONFIG})" + exit 1 +fi + +echo "-> Should set systemReservedCgroup and kubeReservedCgroup when --reserved-cpus is not set with containerd" +exit_code=0 +export KUBELET_VERSION=v1.24.15-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +if ! $(grep -q systemReservedCgroup ${KUBELET_CONFIG}); then + echo "❌ Test Failed: expected systemReservedCgroup to be present in ${KUBELET_CONFIG}. Found: $(grep systemReservedCgroup ${KUBELET_CONFIG})" + exit 1 +fi + +if ! $(grep -q kubeReservedCgroup ${KUBELET_CONFIG}); then + echo "❌ Test Failed: expected kubeReservedCgroup to be present ${KUBELET_CONFIG}.Found: $(grep kubeReservedCgroup ${KUBELET_CONFIG})" + exit 1 +fi + +echo "-> Should set systemReservedCgroup and kubeReservedCgroup when --reserved-cpus is set with dockerd" +exit_code=0 +export KUBELET_VERSION=v1.23.15-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +if ! $(grep -q systemReservedCgroup ${KUBELET_CONFIG}); then + echo "❌ Test Failed: expected systemReservedCgroup to be present in ${KUBELET_CONFIG}.Found: $(grep systemReservedCgroup ${KUBELET_CONFIG})" + exit 1 +fi + +if ! $(grep -q kubeReservedCgroup ${KUBELET_CONFIG}); then + echo "❌ Test Failed: expected kubeReservedCgroup to be present ${KUBELET_CONFIG}.Found: $(grep kubeReservedCgroup ${KUBELET_CONFIG})" + exit 1 +fi diff --git a/test/cases/vercmp.sh b/test/cases/vercmp.sh new file mode 100755 index 000000000..93c8b24df --- /dev/null +++ b/test/cases/vercmp.sh @@ -0,0 +1,259 @@ +#!/usr/bin/env bash + +set -o nounset +set -o errexit +set -o pipefail + +echo "--> Should compare strictly less-than" +# should succeed +EXIT_CODE=0 +vercmp "1.0.0" lt "2.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" lt "1.0.1" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" lt "1.1.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "v1.0.0" lt "v1.1.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +# should fail +EXIT_CODE=0 +vercmp "1.0.0" lt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.1" lt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.1.0" lt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "2.0.0" lt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "v2.0.0" lt "v1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi + +echo "--> Should compare less-than-or-equal-to" +# should succeed +EXIT_CODE=0 +vercmp "1.0.0" lteq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" lteq "1.0.1" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" lteq "2.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "v1.0.0" lteq "v2.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +# should fail +EXIT_CODE=0 +vercmp "1.0.1" lteq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.1.0" lteq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "2.0.0" lteq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "v2.0.0" lteq "v1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi + +echo "--> Should compare strictly equal-to" +# should succeed +EXIT_CODE=0 +vercmp "1.0.0" eq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "v1.0.0" eq "v1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +# should fail +EXIT_CODE=0 +vercmp "1.0.1" eq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" eq "1.0.1" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "v1.0.0" eq "v1.0.1" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi + +echo "--> Should compare greater-than-or-equal-to" +# should succeed +EXIT_CODE=0 +vercmp "1.0.0" gteq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.1" gteq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "2.0.0" gteq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "v2.0.0" gteq "v1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +# should fail +EXIT_CODE=0 +vercmp "1.0.0" gteq "1.0.1" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" gteq "1.1.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" gteq "2.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "v1.0.0" gteq "v2.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi + +echo "--> Should compare strictly greater-than" +# should succeed +EXIT_CODE=0 +vercmp "2.0.0" gt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.1" gt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.1.0" gt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "v1.1.0" gt "v1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +# should fail +EXIT_CODE=0 +vercmp "1.0.0" gt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" gt "1.0.1" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" gt "1.1.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" gt "2.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "v1.0.0" gt "v2.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi diff --git a/test/entrypoint.sh b/test/entrypoint.sh new file mode 100755 index 000000000..9cf68701b --- /dev/null +++ b/test/entrypoint.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +set -euo pipefail +SCRIPTPATH="$( + cd "$(dirname "$0")" + pwd -P +)" + +## Start IMDS mock +/sbin/ec2-metadata-mock --imdsv2 &> /var/log/ec2-metadata-mock.log & +sleep 1 + +## execute any other params +/test.sh diff --git a/test/mocks/aws b/test/mocks/aws new file mode 100755 index 000000000..78126330d --- /dev/null +++ b/test/mocks/aws @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +set -euo pipefail +SCRIPTPATH="$( + cd "$(dirname "$0")" + pwd -P +)" + +echo >&2 "mocking 'aws $@'" + +AWS_MOCK_FAIL=${AWS_MOCK_FAIL:-false} +if [ "$AWS_MOCK_FAIL" = "true" ]; then + echo >&2 "failing mocked 'aws $@'" + exit 1 +fi + +if [[ $1 == "ec2" ]]; then + if [[ $2 == "describe-instance-types" ]]; then + instance_type=$(echo "${@}" | grep -o '[a-z]\+[0-9]\+[a-z]*\.[0-9a-z]\+' | tr '.' '-') + if [[ -f "${SCRIPTPATH}/describe-instance-types/${instance_type}.json" ]]; then + cat "${SCRIPTPATH}/describe-instance-types/${instance_type}.json" + exit 0 + fi + echo >&2 "instance type not found" + exit 1 + fi + if [[ $2 == "describe-instances" ]]; then + instance_id=$(echo "${@}" | grep -o 'i\-[a-z0-9]\+') + echo >&2 "instance-id: $instance_id" + if [[ -f "${SCRIPTPATH}/describe-instances/${instance_id}.json" ]]; then + cat "${SCRIPTPATH}/describe-instances/${instance_id}.json" + exit 0 + fi + echo >&2 "instance not found" + exit 1 + fi +fi diff --git a/test/mocks/describe-instance-types/m4-xlarge.json b/test/mocks/describe-instance-types/m4-xlarge.json new file mode 100644 index 000000000..0239966bc --- /dev/null +++ b/test/mocks/describe-instance-types/m4-xlarge.json @@ -0,0 +1,6 @@ +{ + "Hypervisor": "xen", + "EniCount": 4, + "PodsPerEniCount": 15, + "CpuCount": 4 +} diff --git a/test/mocks/describe-instance-types/m5-8xlarge.json b/test/mocks/describe-instance-types/m5-8xlarge.json new file mode 100644 index 000000000..840091225 --- /dev/null +++ b/test/mocks/describe-instance-types/m5-8xlarge.json @@ -0,0 +1,6 @@ +{ + "Hypervisor": "nitro", + "EniCount": 8, + "PodsPerEniCount": 30, + "CpuCount": 32 +} diff --git a/test/mocks/describe-instances/i-1234567890abcdef0.json b/test/mocks/describe-instances/i-1234567890abcdef0.json new file mode 100644 index 000000000..da64601da --- /dev/null +++ b/test/mocks/describe-instances/i-1234567890abcdef0.json @@ -0,0 +1,154 @@ +{ + "Reservations": [ + { + "Groups": [], + "Instances": [ + { + "AmiLaunchIndex": 0, + "ImageId": "ami-0abcdef1234567890", + "InstanceId": "i-1234567890abcdef0", + "InstanceType": "t3.nano", + "KeyName": "my-key-pair", + "LaunchTime": "2022-11-15T10:48:59+00:00", + "Monitoring": { + "State": "disabled" + }, + "Placement": { + "AvailabilityZone": "us-east-2a", + "GroupName": "", + "Tenancy": "default" + }, + "PrivateDnsName": "ip-10-0-0-157.us-east-2.compute.internal", + "PrivateIpAddress": "10-0-0-157", + "ProductCodes": [], + "PublicDnsName": "ec2-34-253-223-13.us-east-2.compute.amazonaws.com", + "PublicIpAddress": "34.253.223.13", + "State": { + "Code": 16, + "Name": "running" + }, + "StateTransitionReason": "", + "SubnetId": "subnet-04a636d18e83cfacb", + "VpcId": "vpc-1234567890abcdef0", + "Architecture": "x86_64", + "BlockDeviceMappings": [ + { + "DeviceName": "/dev/xvda", + "Ebs": { + "AttachTime": "2022-11-15T10:49:00+00:00", + "DeleteOnTermination": true, + "Status": "attached", + "VolumeId": "vol-02e6ccdca7de29cf2" + } + } + ], + "ClientToken": "1234abcd-1234-abcd-1234-d46a8903e9bc", + "EbsOptimized": true, + "EnaSupport": true, + "Hypervisor": "xen", + "IamInstanceProfile": { + "Arn": "arn:aws:iam::111111111111:instance-profile/AmazonSSMRoleForInstancesQuickSetup", + "Id": "111111111111111111111" + }, + "NetworkInterfaces": [ + { + "Association": { + "IpOwnerId": "amazon", + "PublicDnsName": "ec2-34-253-223-13.us-east-2.compute.amazonaws.com", + "PublicIp": "34.253.223.13" + }, + "Attachment": { + "AttachTime": "2022-11-15T10:48:59+00:00", + "AttachmentId": "eni-attach-1234567890abcdefg", + "DeleteOnTermination": true, + "DeviceIndex": 0, + "Status": "attached", + "NetworkCardIndex": 0 + }, + "Description": "", + "Groups": [ + { + "GroupName": "launch-wizard-146", + "GroupId": "sg-1234567890abcdefg" + } + ], + "Ipv6Addresses": [], + "MacAddress": "00:11:22:33:44:55", + "NetworkInterfaceId": "eni-1234567890abcdefg", + "OwnerId": "104024344472", + "PrivateDnsName": "ip-10-0-0-157.us-east-2.compute.internal", + "PrivateIpAddress": "10-0-0-157", + "PrivateIpAddresses": [ + { + "Association": { + "IpOwnerId": "amazon", + "PublicDnsName": "ec2-34-253-223-13.us-east-2.compute.amazonaws.com", + "PublicIp": "34.253.223.13" + }, + "Primary": true, + "PrivateDnsName": "ip-10-0-0-157.us-east-2.compute.internal", + "PrivateIpAddress": "10-0-0-157" + } + ], + "SourceDestCheck": true, + "Status": "in-use", + "SubnetId": "subnet-1234567890abcdefg", + "VpcId": "vpc-1234567890abcdefg", + "InterfaceType": "interface" + } + ], + "RootDeviceName": "/dev/xvda", + "RootDeviceType": "ebs", + "SecurityGroups": [ + { + "GroupName": "launch-wizard-146", + "GroupId": "sg-1234567890abcdefg" + } + ], + "SourceDestCheck": true, + "Tags": [ + { + "Key": "Name", + "Value": "my-instance" + } + ], + "VirtualizationType": "hvm", + "CpuOptions": { + "CoreCount": 1, + "ThreadsPerCore": 2 + }, + "CapacityReservationSpecification": { + "CapacityReservationPreference": "open" + }, + "HibernationOptions": { + "Configured": false + }, + "MetadataOptions": { + "State": "applied", + "HttpTokens": "optional", + "HttpPutResponseHopLimit": 1, + "HttpEndpoint": "enabled", + "HttpProtocolIpv6": "disabled", + "InstanceMetadataTags": "enabled" + }, + "EnclaveOptions": { + "Enabled": false + }, + "PlatformDetails": "Linux/UNIX", + "UsageOperation": "RunInstances", + "UsageOperationUpdateTime": "2022-11-15T10:48:59+00:00", + "PrivateDnsNameOptions": { + "HostnameType": "ip-name", + "EnableResourceNameDnsARecord": true, + "EnableResourceNameDnsAAAARecord": false + }, + "MaintenanceOptions": { + "AutoRecovery": "default" + } + } + ], + "OwnerId": "111111111111", + "ReservationId": "r-1234567890abcdefg" + } + ] +} diff --git a/test/mocks/iptables-save b/test/mocks/iptables-save new file mode 100755 index 000000000..6c02baf7b --- /dev/null +++ b/test/mocks/iptables-save @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +echo >&2 "mocking 'iptables-save $@'" diff --git a/test/mocks/kubelet b/test/mocks/kubelet new file mode 100755 index 000000000..3c7c5dc91 --- /dev/null +++ b/test/mocks/kubelet @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +echo >&2 "mocking 'kubelet $@'" + +# The only use of kubelet directly is to get the Kubernetes version, +# so we'll set a default here to avoid test failures, and you can +# override by setting the KUBELET_VERSION environment variable. +if [ $# == 1 ] && [ $1 == "--version" ]; then + echo "Kubernetes ${KUBELET_VERSION:-v1.23.9-eks-ba74326}" +fi diff --git a/test/mocks/mount b/test/mocks/mount new file mode 100755 index 000000000..7a9170b84 --- /dev/null +++ b/test/mocks/mount @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +echo >&2 "mocking 'mount $@'" + +echo 'sysfs on /sys type sysfs (rw,nosuid,nodev,noexec,relatime) +proc on /proc type proc (rw,nosuid,nodev,noexec,relatime) +devtmpfs on /dev type devtmpfs (rw,nosuid,size=4059512k,nr_inodes=1014878,mode=755) +securityfs on /sys/kernel/security type securityfs (rw,nosuid,nodev,noexec,relatime) +tmpfs on /run type tmpfs (rw,nosuid,nodev,mode=755) +tmpfs on /sys/fs/cgroup type tmpfs (ro,nosuid,nodev,noexec,mode=755) +cgroup on /sys/fs/cgroup/systemd type cgroup (rw,nosuid,nodev,noexec,relatime,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd) +pstore on /sys/fs/pstore type pstore (rw,nosuid,nodev,noexec,relatime) +cgroup on /sys/fs/cgroup/blkio type cgroup (rw,nosuid,nodev,noexec,relatime,blkio) +cgroup on /sys/fs/cgroup/freezer type cgroup (rw,nosuid,nodev,noexec,relatime,freezer) +cgroup on /sys/fs/cgroup/perf_event type cgroup (rw,nosuid,nodev,noexec,relatime,perf_event) +cgroup on /sys/fs/cgroup/hugetlb type cgroup (rw,nosuid,nodev,noexec,relatime,hugetlb) +cgroup on /sys/fs/cgroup/cpuset type cgroup (rw,nosuid,nodev,noexec,relatime,cpuset) +cgroup on /sys/fs/cgroup/net_cls,net_prio type cgroup (rw,nosuid,nodev,noexec,relatime,net_cls,net_prio) +cgroup on /sys/fs/cgroup/cpu,cpuacct type cgroup (rw,nosuid,nodev,noexec,relatime,cpu,cpuacct) +cgroup on /sys/fs/cgroup/memory type cgroup (rw,nosuid,nodev,noexec,relatime,memory) +cgroup on /sys/fs/cgroup/pids type cgroup (rw,nosuid,nodev,noexec,relatime,pids) +cgroup on /sys/fs/cgroup/devices type cgroup (rw,nosuid,nodev,noexec,relatime,devices) +/dev/xvda1 on / type xfs (rw,noatime,attr2,inode64,logbufs=8,logbsize=32k,noquota)' diff --git a/test/mocks/sudo b/test/mocks/sudo new file mode 100755 index 000000000..7b76a82a8 --- /dev/null +++ b/test/mocks/sudo @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +echo >&2 "mocking 'sudo $@'" +exec "$@" diff --git a/test/mocks/systemctl b/test/mocks/systemctl new file mode 100755 index 000000000..3e74c7e2e --- /dev/null +++ b/test/mocks/systemctl @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +echo >&2 "mocking 'systemctl $@'" diff --git a/test/test-harness.sh b/test/test-harness.sh new file mode 100755 index 000000000..c253f562e --- /dev/null +++ b/test/test-harness.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +export SCRIPTPATH="$( + cd "$(dirname "$0")" + pwd -P +)" +set -euo pipefail + +TEST_CASE_SCRIPT="" + +USAGE=$( + cat << 'EOM' + Usage: test-harness.sh [-c ] + Executes the test harness for the EKS Optimized AL2 AMI. + By default the test harness executes all scripts in the cases directory. + Example: test-harness.sh + Optional: + -c A path to a specific test case script +EOM +) + +while getopts "c:h" opt; do + case ${opt} in + c) # Case Script Path + TEST_CASE_SCRIPT="$OPTARG" + ;; + h) # help + echo "$USAGE" 1>&2 + exit + ;; + \?) + echo "$USAGE" 1>&2 + exit + ;; + esac +done + +docker build -t eks-optimized-ami -f "${SCRIPTPATH}/Dockerfile" "${SCRIPTPATH}/../" +overall_status=0 + +test_run_log_file=$(mktemp) + +function run() { + docker run -v "$(realpath $1):/test.sh" \ + --attach STDOUT \ + --attach STDERR \ + --rm \ + eks-optimized-ami > $test_run_log_file 2>&1 +} + +if [[ ! -z ${TEST_CASE_SCRIPT} ]]; then + test_cases=${TEST_CASE_SCRIPT} +else + test_cases=($(find ${SCRIPTPATH}/cases -name "*.sh" -type f)) +fi + +for case in "${test_cases[@]}"; do + status=0 + echo "=================================================================================================================" + echo "-> Executing Test Case: $(basename ${case})" + run ${case} || status=1 + if [[ ${status} -eq 0 ]]; then + echo "✅ ✅ $(basename ${case}) Tests Passed! ✅ ✅" + else + cat $test_run_log_file + echo "❌ ❌ $(basename ${case}) Tests Failed! ❌ ❌" + overall_status=1 + fi + echo "=================================================================================================================" +done + +if [[ ${overall_status} -eq 0 ]]; then + echo "✅ ✅ All Tests Passed! ✅ ✅" +else + echo "❌ ❌ Some Tests Failed! ❌ ❌" +fi +exit $overall_status