diff --git a/README.md b/README.md index 51c6110..50690ad 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,8 @@ This JSON file specifies the plugin and Slurm configuration parameters. "LogLevel": "STRING", "LogFileName": "STRING", "SlurmBinPath": "STRING", + "EnableIPv6": "0", + "UseNodeName": "0", "SlurmConf": { "PrivateData": "STRING", "ResumeProgram": "STRING", @@ -60,6 +62,8 @@ This JSON file specifies the plugin and Slurm configuration parameters. * `LogLevel`: Logging level. Possible values are `CRITICAL`, `ERROR`, `WARNING`, `INFO`, `DEBUG`. Default is `DEBUG`. * `LogFileName`: Full path to the log file location. Default is `PLUGIN_PATH\aws_plugin.log`. * `SlurmBinPath`: Full path to the folder that contains Slurm binaries like `scontrol` or `sinfo`. Example: `/slurm/bin`. +* `EnableIPv6`: Deciding flag to use the node's IPv6 address instead of the internal legacy IP. Values "0" do not use IPv6 default value, "1" use the IPv6. +* `UseNodeName`: Use slurm node name for NodeHostName. Values "0" default action use the EC2 internal hostname (ip-xxx-xxx-xxx-xxx), "1" use the slurm node name. * `SlurmConf`: These attributes are used by `generate_conf.py` to generate the content that must be appended to the Slurm configuration file. You must specify at least the following attributes: * `PrivateData`: Must be equal to `CLOUD` such that EC2 compute nodes that are idle are returned by Slurm command outputs such as `sinfo`. * `ResumeProgram`: Full path to the location of `resume.py`. Example: `/slurm/etc/aws/resume.py`. @@ -77,6 +81,8 @@ Example: "LogLevel": "INFO", "LogFileName": "/var/log/slurm/aws.log", "SlurmBinPath": "/slurm/bin", + "EnableIPv6": "0", + "UseNodeName": "0", "SlurmConf": { "PrivateData": "CLOUD", "ResumeProgram": "/slurm/etc/aws/resume.py", diff --git a/common.py b/common.py index 58aa9a7..09dfb52 100644 --- a/common.py +++ b/common.py @@ -135,7 +135,11 @@ def get_common(scriptname): config['LogFileName'] = '%s/aws_plugin.log' %dir_path if not 'LogLevel' in config: config['LogLevel'] = 'DEBUG' - + if not 'EnableIPv6' in config: + config['EnableIPv6'] = '0' + if not 'UseNodeName' in config: + config['UseNodeName'] = '0' + # Make sure that SlurmBinPath ends with a / if 'SlurmBinPath' in config and not config['SlurmBinPath'].endswith('/'): config['SlurmBinPath'] += '/' diff --git a/contribs/update_nodename.sh b/contribs/update_nodename.sh new file mode 100644 index 0000000..54f92b2 --- /dev/null +++ b/contribs/update_nodename.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Update the host's name with the information from the Name Tag +# This is usually is added by the aws-plugin-for-slurm +# Tag: Name=awscpu-0 +# set the hostname to awscpu-0 +# Can be added to the userdata to be used when the instance is instantiated +instanceid=`/usr/bin/curl --fail -m 2 -s 169.254.169.254/latest/meta-data/instance-id` +if [[ ! -z "$instanceid" ]]; then + region=`/usr/bin/curl -s 169.254.169.254/latest/meta-data/placement/availability-zone` + region=${region::-1} + hostname=`/usr/bin/aws ec2 describe-tags --filters "Name=resource-id,Values=$instanceid" "Name=key,Values=Name" --region $region --query "Tags[0].Value" --output=text` +fi +if [ ! -z "$hostname" -a "$hostname" != "None" ]; then + hostnamectl set-hostname ${hostname} + echo $hostname +else + echo `hostname` +fi diff --git a/resume.py b/resume.py index 94ea5f4..1f0cd31 100644 --- a/resume.py +++ b/resume.py @@ -120,9 +120,18 @@ def retry(func, *args, **kwargs): for reservation in response_describe['Reservations']: for instance_details in reservation['Instances']: if instance_details['InstanceId'] == instance_id: - ip_address = instance_details['PrivateIpAddress'] - hostname = 'ip-%s' %'-'.join(ip_address.split('.')) - + if config['EnableIPv6'] == "1": + ip_address = instance_details['Ipv6Address'] + hostname = 'ip-%s' %'-'.join(instance_details['PrivateIpAddress'].split('.')) + if ip_address == '': + logger.critical('IPv6 is enabled but I was unable to find an IPv6 IP') + sys.exit(1) + else: + ip_address = instance_details['PrivateIpAddress'] + hostname = 'ip-%s' %'-'.join(ip_address.split('.')) + if config['UseNodeName'] == '1': + hostname = node_name + logger.info('Launched node %s %s %s' %(node_name, instance_id, ip_address)) # Tag the instance