Skip to content

Thheinen/efs accesspoints #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: thheinen/efs-accesspoints
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ This file is used to list changes made in each version of the AWS ParallelCluste
------

**ENHANCEMENTS**
- Allow custom actions on Login Nodes.
- Allow custom actions on login nodes.
- Allow DCV connection on login nodes.
- Add new attribute `efs_access_point_ids` to specify optional EFS access points for the mounts

**BUG FIXES**
- Fix EFA kmod installation with RHEL 8.10 or newer.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# IMDS
default['cluster']['head_node_imds_secured'] = 'true'
default['cluster']['head_node_imds_allowed_users'] = ['root', node['cluster']['cluster_admin_user'], node['cluster']['cluster_user'] ]
default['cluster']['head_node_imds_allowed_users'].append('dcv') if node['cluster']['dcv_enabled'] == 'head_node' && dcv_installed?
default['cluster']['head_node_imds_allowed_users'].append('dcv') if (node['cluster']['dcv_enabled'] == 'head_node' || node['cluster']['dcv_enabled'] == 'login_node') && dcv_installed?

# ParallelCluster internal variables to configure active directory service
default['cluster']["directory_service"]["domain_name"] = nil
Expand All @@ -37,6 +37,7 @@
default['cluster']['efs_fs_ids'] = ''
default['cluster']['efs_encryption_in_transits'] = ''
default['cluster']['efs_iam_authorizations'] = ''
default['cluster']['efs_access_point_ids'] = ''
default['cluster']['fsx_shared_dirs'] = ''
default['cluster']['fsx_fs_ids'] = ''
default['cluster']['fsx_dns_names'] = ''
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -296,12 +296,13 @@
],
"platforms": {{ default_platforms | tojson}},
"node_roles": [
"HeadNode"
"HeadNode",
"LoginNode"
],
"feature_conditions": [
{
"dna_key": "dcv_enabled",
"satisfying_values": ["head_node"]
"satisfying_values": ["head_node", "login_node"]
}
]
},
Expand Down Expand Up @@ -379,12 +380,13 @@
],
"platforms": {{ default_platforms | tojson}},
"node_roles": [
"HeadNode"
"HeadNode",
"LoginNode"
],
"feature_conditions": [
{
"dna_key": "dcv_enabled",
"satisfying_values": ["head_node"]
"satisfying_values": ["head_node", "login_node"]
}
]
},
Expand All @@ -398,12 +400,13 @@
],
"platforms": {{ default_platforms | tojson}},
"node_roles": [
"HeadNode"
"HeadNode",
"LoginNode"
],
"feature_conditions": [
{
"dna_key": "dcv_enabled",
"satisfying_values": ["head_node"]
"satisfying_values": ["head_node", "login_node"]
}
]
},
Expand All @@ -417,12 +420,13 @@
],
"platforms": {{ default_platforms | tojson}},
"node_roles": [
"HeadNode"
"HeadNode",
"LoginNode"
],
"feature_conditions": [
{
"dna_key": "dcv_enabled",
"satisfying_values": ["head_node"]
"satisfying_values": ["head_node", "login_node"]
}
]
},
Expand All @@ -436,12 +440,13 @@
],
"platforms": {{ default_platforms | tojson}},
"node_roles": [
"HeadNode"
"HeadNode",
"LoginNode"
],
"feature_conditions": [
{
"dna_key": "dcv_enabled",
"satisfying_values": ["head_node"]
"satisfying_values": ["head_node", "login_node"]
}
]
},
Expand All @@ -455,12 +460,13 @@
],
"platforms": {{ default_platforms | tojson}},
"node_roles": [
"HeadNode"
"HeadNode",
"LoginNode"
],
"feature_conditions": [
{
"dna_key": "dcv_enabled",
"satisfying_values": ["head_node"]
"satisfying_values": ["head_node", "login_node"]
}
]
},
Expand All @@ -474,12 +480,13 @@
],
"platforms": {{ default_platforms | tojson}},
"node_roles": [
"HeadNode"
"HeadNode",
"LoginNode"
],
"feature_conditions": [
{
"dna_key": "dcv_enabled",
"satisfying_values": ["head_node"]
"satisfying_values": ["head_node", "login_node"]
}
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
id_array = node['cluster']['efs_fs_ids'].split(',')
encryption_array = node['cluster']['efs_encryption_in_transits'].split(',')
iam_array = node['cluster']['efs_iam_authorizations'].split(',')
access_point_id_array = node['cluster']['efs_access_point_ids'].split(',')

# Identify the previously mounted filesystems and remove them from the set of filesystems to mount
shared_dir_array.each_with_index do |dir, index|
Expand All @@ -23,6 +24,7 @@
id_array.delete_at(index)
encryption_array.delete_at(index)
iam_array.delete_at(index)
access_point_id_array.delete_at(index)
end

# Mount EFS directories with the efs resource
Expand All @@ -31,6 +33,7 @@
efs_fs_id_array id_array
efs_encryption_in_transit_array encryption_array
efs_iam_authorization_array iam_array
efs_access_point_id_array access_point_id_array
action :mount
not_if { shared_dir_array.empty? }
end
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
efs_encryption_in_transit_array [node['cluster']['efs_encryption_in_transits'].split(',')[index]]
efs_iam_authorization_array [node['cluster']['efs_iam_authorizations'].split(',')[index]]
efs_mount_point_array ['/home']
efs_access_point_id [node['cluster']['efs_access_point_ids'].split(',')[index]]
action :mount
end
break
Expand All @@ -73,6 +74,7 @@
efs_fs_id_array [node['cluster']['efs_fs_ids'].split(',')[index]]
efs_encryption_in_transit_array [node['cluster']['efs_encryption_in_transits'].split(',')[index]]
efs_iam_authorization_array [node['cluster']['efs_iam_authorizations'].split(',')[index]]
efs_access_point_id [node['cluster']['efs_access_point_ids'].split(',')[index]]
action :mount
end
break
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,6 @@ def package_path
execute "cloudwatch-agent-start" do
user 'root'
timeout 300
command "/opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -c file:/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json -s"
not_if "/opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a status | grep status | grep running"
command "/opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a append-config -m ec2 -c file:/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json -s || /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -c file:/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json -s"
end unless node['cluster']['cw_logging_enabled'] != 'true' || on_docker?
end
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
property :efs_fs_id_array, Array, required: %i(mount unmount)
property :efs_encryption_in_transit_array, Array, required: false
property :efs_iam_authorization_array, Array, required: false
property :efs_access_point_id_array, Array, required: false
# This is the mount point on the EFS itself, as opposed to the local system directory, defaults to "/"
property :efs_mount_point_array, Array, required: false
property :efs_unmount_forced_array, Array, required: false
Expand All @@ -28,19 +29,23 @@
efs_fs_id_array = new_resource.efs_fs_id_array.dup
efs_encryption_in_transit_array = new_resource.efs_encryption_in_transit_array.dup
efs_iam_authorization_array = new_resource.efs_iam_authorization_array.dup
efs_access_point_id_array = new_resource.efs_access_point_id_array.dup
efs_mount_point_array = new_resource.efs_mount_point_array.dup

efs_fs_id_array.each_with_index do |efs_fs_id, index|
efs_shared_dir = efs_shared_dir_array[index]
efs_encryption_in_transit = efs_encryption_in_transit_array[index] unless efs_encryption_in_transit_array.nil?
efs_iam_authorization = efs_iam_authorization_array[index] unless efs_iam_authorization_array.nil?
efs_access_point_id = efs_access_point_id_array[index] unless efs_access_point_id_array.nil?

# Path needs to be fully qualified, for example "shared/temp" becomes "/shared/temp"
efs_shared_dir = "/#{efs_shared_dir}" unless efs_shared_dir.start_with?('/')

# See reference of mount options: https://docs.aws.amazon.com/efs/latest/ug/automount-with-efs-mount-helper.html
mount_options = "_netdev,noresvport"
if efs_encryption_in_transit == "true"
if efs_access_point_id
mount_options = "iam,tls,access_point=#{efs_access_point_id}"
elsif efs_encryption_in_transit == "true"
mount_options += ",tls"
if efs_iam_authorization == "true"
mount_options += ",iam"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def self.configure(chef_run)
is_expected.to run_execute("cloudwatch-agent-start").with(
user: 'root',
timeout: 300,
command: "/opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -c file:/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json -s"
command: "/opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a append-config -m ec2 -c file:/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json -s || /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -c file:/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json -s"
)
end
end
Expand Down Expand Up @@ -309,8 +309,12 @@ def self.configure(chef_run)
ConvergeCloudWatch.configure(runner)
end

it 'does not start cloudwatch' do
is_expected.not_to run_execute("cloudwatch-agent-start")
it 'starts cloudwatch agent' do
is_expected.to run_execute("cloudwatch-agent-start").with(
user: 'root',
timeout: 300,
command: "/opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a append-config -m ec2 -c file:/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json -s || /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -c file:/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json -s"
)
end
end
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@ raid:
<% efs_shared_dir_array = node['cluster']['efs_shared_dirs'].split(',') -%>
<% efs_encryption_in_transit_array = node['cluster']['efs_encryption_in_transits'].split(',') -%>
<% efs_iam_authorization_array = node['cluster']['efs_iam_authorizations'].split(',') -%>
<% efs_access_point_id_array = node['cluster']['efs_access_point_ids'].split(',') -%>
efs:
<% efs_fs_ids_array.each_with_index do |efs_fs_id, index| -%>
- efs_fs_id: <%= efs_fs_id %>
mount_dir: <%= efs_shared_dir_array[index] %>
efs_encryption_in_transit: <%= efs_encryption_in_transit_array[index] %>
efs_iam_authorization: <%= efs_iam_authorization_array[index] %>
efs_access_point_id: <%= efs_access_point_id_array[index] %>
<% end -%>
<%# FSX %>
<% fsx_fs_id_array = node['cluster']['fsx_fs_ids'].split(',') -%>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ main() {

# Create a session with session storage enabled.
mkdir -p "${DCV_SESSION_FOLDER}"
dcv_session_file="${DCV_SESSION_FOLDER}/dcv_session"
dcv_session_file="${DCV_SESSION_FOLDER}/dcv_session_$(hostname)"
if [[ ! -e ${dcv_session_file} ]]; then
sessionid=$(_create_dcv_session "${dcv_session_file}" "${shared_folder_path}")
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ suites:
cluster:
log_rotation_enabled: 'true'
node_type: 'LoginNode'
dcv_enabled: "login_node"
directory_service:
generate_ssh_keys_for_users: 'true'
scheduler: 'slurm'
Expand Down
7 changes: 7 additions & 0 deletions cookbooks/aws-parallelcluster-platform/recipes/config/dcv.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,11 @@
action :configure
end
end unless on_docker?
when 'LoginNode'
if node['cluster']['dcv_enabled'] == "login_node"
# Activate DCV on login node
dcv "Configure DCV" do
action :configure
end
end unless on_docker?
end
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@
parallelcluster_supervisord_log_rotation
)

if node['cluster']['dcv_enabled'] == "login_node" && dcv_installed?
config_files += %w(
parallelcluster_dcv_log_rotation
)
end

if node['cluster']["directory_service"]["generate_ssh_keys_for_users"] == 'true'
config_files += %w(
parallelcluster_pam_ssh_key_generator_log_rotation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@
variables(
region: region,
aws_ca_bundle: region.start_with?('us-iso') ? "/etc/pki/#{region}/certs/ca-bundle.pem" : '',
dcv_configured: node['cluster']['dcv_enabled'] == "head_node" && dcv_installed?,
dcv_configured: (node['cluster']['dcv_enabled'] == "head_node" ||
node['cluster']['dcv_enabled'] == "login_node") &&
dcv_installed?,
dcv_auth_virtualenv_path: node['cluster']['dcv']['authenticator']['virtualenv_path'],
dcv_auth_user_home: node['cluster']['dcv']['authenticator']['user_home'],
dcv_port: node['cluster']['dcv_port'],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def optionally_disable_rnd
end

action :configure do
if dcv_supported? && node['cluster']['node_type'] == "HeadNode"
if dcv_supported? && (node['cluster']['node_type'] == "HeadNode" || node['cluster']['node_type'] == "LoginNode")
if dcv_gpu_accel_supported?
# Enable graphic acceleration in dcv conf file for graphic instances.
allow_gpu_acceleration
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,13 +218,15 @@
end
end

context "in the login node when log_rotation enabled and pam ssh key generation is enabled" do
context "in the login node when log_rotation, pam ssh key generation, and dcv are enabled" do
cached(:chef_run) do
runner = runner(platform: platform, version: version) do |node|
node.override['cluster']['node_type'] = "LoginNode"
node.override['cluster']['log_rotation_enabled'] = 'true'
node.override['cluster']['dcv_enabled'] = "login_node"
node.override['cluster']["directory_service"]["generate_ssh_keys_for_users"] = 'true'
node.override['cluster']["scheduler"] = 'slurm'
allow_any_instance_of(Object).to receive(:dcv_installed?).and_return(true)
end
runner.converge(described_recipe)
end
Expand All @@ -235,12 +237,12 @@
parallelcluster_supervisord_log_rotation
parallelcluster_cloud_init_output_log_rotation
parallelcluster_pam_ssh_key_generator_log_rotation
parallelcluster_dcv_log_rotation
)
unexpected_config_files = %w(
parallelcluster_bootstrap_error_msg_log_rotation
parallelcluster_cfn_init_log_rotation
parallelcluster_chef_client_log_rotation
parallelcluster_dcv_log_rotation
parallelcluster_clustermgtd_log_rotation
parallelcluster_clusterstatusmgtd_log_rotation
parallelcluster_slurm_fleet_status_manager_log_rotation
Expand Down
Loading