From 2bab1d5722533dd5a5d52ea4d5f0473d92b37d9f Mon Sep 17 00:00:00 2001 From: smirnp Date: Thu, 1 Nov 2018 16:43:51 +0100 Subject: [PATCH 1/7] cloud branch added --- platform-controller/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platform-controller/pom.xml b/platform-controller/pom.xml index 6dc968f9..4371e763 100644 --- a/platform-controller/pom.xml +++ b/platform-controller/pom.xml @@ -22,7 +22,7 @@ org.hobbit parent - 2.0.6 + 2.0.6-CLOUD ../parent-pom platform-controller From d46e6e82cd9e5abeb06d953983acd37a51547ece Mon Sep 17 00:00:00 2001 From: smirnp Date: Mon, 3 Dec 2018 13:59:25 +0100 Subject: [PATCH 2/7] First code commit --- platform-controller/AWS/bastion.yaml | 705 +++++++++++++++++ platform-controller/AWS/swarm-mode/kms.yaml | 77 ++ .../AWS/swarm-mode/manager.yaml | 707 ++++++++++++++++++ .../AWS/swarm-mode/securitygroups.yaml | 116 +++ .../AWS/swarm-mode/worker.yaml | 487 ++++++++++++ platform-controller/AWS/vpc-1azs.yaml | 326 ++++++++ platform-controller/pom.xml | 12 + .../hobbit/controller/ExperimentManager.java | 229 ++++-- .../hobbit/controller/PlatformController.java | 272 +++++-- .../cloud/CloudSshTunnelsProvider.java | 169 +++++ .../cloud/ClusterManagerProvider.java | 31 + .../cloud/DockerClientProvider.java | 107 +++ .../handlers/BasicClusterStackHandler.java | 41 + .../aws/handlers/BastionStackHandler.java | 20 + .../cloud/aws/handlers/VPCStackHandler.java | 18 + .../cloud/aws/swarm/SwarmClusterManager.java | 235 ++++++ .../handlers/DockerSwarmStackHandler.java | 20 + .../handlers/KeysManagementStackHandler.java | 20 + .../handlers/SecurityGroupsStackHandler.java | 22 + .../handlers/SwarmClusterStackHandler.java | 93 +++ .../handlers/SwarmManagerStackHandler.java | 20 + .../handlers/SwarmWorkerStackHandler.java | 25 + .../controller/data/ExperimentStatus.java | 9 +- .../docker/CloudClusterManager.java | 142 ++++ .../docker/CloudContainerManager.java | 75 ++ .../controller/docker/ClusterManager.java | 17 +- .../controller/docker/ClusterManagerImpl.java | 56 +- .../controller/docker/ContainerManager.java | 34 +- .../docker/ContainerManagerImpl.java | 306 +++++--- .../controller/docker/DockerUtility.java | 20 +- .../docker/FileBasedImageManager.java | 9 +- .../hobbit/controller/docker/MountImpl.java | 117 +++ .../docker/ResourceInformationCollector.java | 35 + .../controller/front/FrontEndApiHandler.java | 8 +- .../gitlab/GitlabControllerImpl.java | 16 +- .../health/ClusterHealthChecker.java | 2 +- .../health/ClusterHealthCheckerImpl.java | 2 +- .../queue/CloudBasedExperimentQueue.java | 35 + .../controller/queue/ExperimentQueue.java | 4 +- .../controller/queue/ExperimentQueueImpl.java | 2 +- .../controller/utils/ServiceLogsReader.java | 186 +++++ .../src/main/resources/AWS/bastion.yaml | 684 +++++++++++++++++ .../main/resources/AWS/swarm-mode/kms.yaml | 76 ++ .../resources/AWS/swarm-mode/manager.yaml | 654 ++++++++++++++++ .../AWS/swarm-mode/securitygroups.yaml | 115 +++ .../main/resources/AWS/swarm-mode/worker.yaml | 457 +++++++++++ .../src/main/resources/AWS/vpc-1azs.yaml | 285 +++++++ .../src/main/resources/log4j.properties | 2 + .../org/hobbit/PlatformControllerTest.java | 175 +++++ .../src/test/java/org/hobbit/QueueClient.java | 69 ++ .../org/hobbit/cloud/DockerClientTest.java | 19 + .../hobbit/cloud/SwarmClusterManagerTest.java | 75 ++ .../hobbit/controller/DockerBasedTest.java | 26 +- .../controller/ExperimentTimeoutTest.java | 41 +- .../docker/ContainerManagerBasedTest.java | 10 +- .../queue/ExperimentQueueImplTest.java | 4 +- .../controller/queue/RedisBasedTest.java | 2 +- .../src/test/resources/log4j.properties | 2 + platform-controller/ssh/.keep | 0 platform-controller/stop-services.sh | 2 + 60 files changed, 7241 insertions(+), 284 deletions(-) create mode 100644 platform-controller/AWS/bastion.yaml create mode 100644 platform-controller/AWS/swarm-mode/kms.yaml create mode 100644 platform-controller/AWS/swarm-mode/manager.yaml create mode 100644 platform-controller/AWS/swarm-mode/securitygroups.yaml create mode 100644 platform-controller/AWS/swarm-mode/worker.yaml create mode 100644 platform-controller/AWS/vpc-1azs.yaml create mode 100644 platform-controller/src/main/java/org/hobbit/controller/cloud/CloudSshTunnelsProvider.java create mode 100644 platform-controller/src/main/java/org/hobbit/controller/cloud/ClusterManagerProvider.java create mode 100644 platform-controller/src/main/java/org/hobbit/controller/cloud/DockerClientProvider.java create mode 100644 platform-controller/src/main/java/org/hobbit/controller/cloud/aws/handlers/BasicClusterStackHandler.java create mode 100644 platform-controller/src/main/java/org/hobbit/controller/cloud/aws/handlers/BastionStackHandler.java create mode 100644 platform-controller/src/main/java/org/hobbit/controller/cloud/aws/handlers/VPCStackHandler.java create mode 100644 platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/SwarmClusterManager.java create mode 100644 platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/DockerSwarmStackHandler.java create mode 100644 platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/KeysManagementStackHandler.java create mode 100644 platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/SecurityGroupsStackHandler.java create mode 100644 platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/SwarmClusterStackHandler.java create mode 100644 platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/SwarmManagerStackHandler.java create mode 100644 platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/SwarmWorkerStackHandler.java create mode 100644 platform-controller/src/main/java/org/hobbit/controller/docker/CloudClusterManager.java create mode 100644 platform-controller/src/main/java/org/hobbit/controller/docker/CloudContainerManager.java create mode 100644 platform-controller/src/main/java/org/hobbit/controller/docker/MountImpl.java create mode 100644 platform-controller/src/main/java/org/hobbit/controller/queue/CloudBasedExperimentQueue.java create mode 100644 platform-controller/src/main/java/org/hobbit/controller/utils/ServiceLogsReader.java create mode 100644 platform-controller/src/main/resources/AWS/bastion.yaml create mode 100644 platform-controller/src/main/resources/AWS/swarm-mode/kms.yaml create mode 100644 platform-controller/src/main/resources/AWS/swarm-mode/manager.yaml create mode 100644 platform-controller/src/main/resources/AWS/swarm-mode/securitygroups.yaml create mode 100644 platform-controller/src/main/resources/AWS/swarm-mode/worker.yaml create mode 100644 platform-controller/src/main/resources/AWS/vpc-1azs.yaml create mode 100644 platform-controller/src/test/java/org/hobbit/PlatformControllerTest.java create mode 100644 platform-controller/src/test/java/org/hobbit/QueueClient.java create mode 100644 platform-controller/src/test/java/org/hobbit/cloud/DockerClientTest.java create mode 100644 platform-controller/src/test/java/org/hobbit/cloud/SwarmClusterManagerTest.java create mode 100644 platform-controller/ssh/.keep create mode 100644 platform-controller/stop-services.sh diff --git a/platform-controller/AWS/bastion.yaml b/platform-controller/AWS/bastion.yaml new file mode 100644 index 00000000..f74e763f --- /dev/null +++ b/platform-controller/AWS/bastion.yaml @@ -0,0 +1,705 @@ +--- +# Copyright 2018 widdix GmbH +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +AWSTemplateFormatVersion: '2010-09-09' +Description: 'VPC: highly available SSH bastion host/instance, a cloudonaut.io template' +Metadata: + 'AWS::CloudFormation::Interface': + ParameterGroups: + - Label: + default: 'Parent Stacks' + Parameters: + - ParentVPCStack + - ParentAlertStack + - ParentZoneStack + - Label: + default: 'EC2 Parameters' + Parameters: + - InstanceType + - KeyName + - IAMUserSSHAccess + - SystemsManagerAccess + - LogsRetentionInDays + - SubDomainNameWithDot +Parameters: + ParentVPCStack: + Description: 'Stack name of parent VPC stack based on vpc/vpc-*azs.yaml template.' + Type: String + ParentAlertStack: + Description: 'Optional but recommended stack name of parent alert stack based on operations/alert.yaml template.' + Type: String + Default: '' + ParentZoneStack: + Description: 'Optional stack name of parent zone stack based on vpc/zone-*.yaml template.' + Type: String + Default: '' + SubnetZone: + Description: 'Subnet zone.' + Type: String + Default: A + AllowedValues: + - A + - B + - C + - D + KeyName: + Description: 'Optional key pair of the ec2-user to establish a SSH connection to the SSH bastion host/instance.' + Type: String + Default: '' + IAMUserSSHAccess: + Description: 'Synchronize public keys of IAM users to enable personalized SSH access (Doc: https://cloudonaut.io/manage-aws-ec2-ssh-access-with-iam/).' + Type: String + Default: false + AllowedValues: + - true + - false + SystemsManagerAccess: + Description: 'Enable AWS Systems Manager agent and authorization.' + Type: String + Default: true + AllowedValues: + - true + - false + InstanceType: + Description: 'Instance type of the SSH bastion host/instance.' + Type: String + Default: 't2.nano' + LogsRetentionInDays: + Description: 'Specifies the number of days you want to retain log events.' + Type: Number + Default: 14 + AllowedValues: [1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, 3653] + SubDomainNameWithDot: + Description: 'Name that is used to create the DNS entry with trailing dot, e.g. ${SubDomainNameWithDot}${HostedZoneName}. Leave blank for naked (or apex and bare) domain. Requires ParentZoneStack parameter!' + Type: String + Default: 'ssh.' + Tag: + Type: String + Default: 'Hobbit' +Mappings: + RegionMap: + 'ap-south-1': + AMI: 'ami-d783a9b8' + 'eu-west-3': + AMI: 'ami-2cf54551' + 'eu-west-2': + AMI: 'ami-b8b45ddf' + 'eu-west-1': + AMI: 'ami-466768ac' + 'ap-northeast-2': + AMI: 'ami-afd86dc1' + 'ap-northeast-1': + AMI: 'ami-e99f4896' + 'sa-east-1': + AMI: 'ami-6dca9001' + 'ca-central-1': + AMI: 'ami-0ee86a6a' + 'ap-southeast-1': + AMI: 'ami-05868579' + 'ap-southeast-2': + AMI: 'ami-39f8215b' + 'eu-central-1': + AMI: 'ami-0097b5eb' + # AMI: 'ami-7c4f7097' + 'us-east-1': + AMI: 'ami-b70554c8' + 'us-east-2': + AMI: 'ami-8c122be9' + 'us-west-1': + AMI: 'ami-e0ba5c83' + 'us-west-2': + AMI: 'ami-a9d09ed1' +Conditions: + HasKeyName: !Not [!Equals [!Ref KeyName, '']] + HasIAMUserSSHAccess: !Equals [!Ref IAMUserSSHAccess, 'true'] + HasSystemsManagerAccess: !Equals [!Ref SystemsManagerAccess, 'true'] + HasAlertTopic: !Not [!Equals [!Ref ParentAlertStack, '']] + HasZone: !Not [!Equals [!Ref ParentZoneStack, '']] +Resources: + RecordSet: + Condition: HasZone + Type: 'AWS::Route53::RecordSet' + Properties: + HostedZoneId: {'Fn::ImportValue': !Sub '${ParentZoneStack}-HostedZoneId'} + Name: !Sub + - '${SubDomainNameWithDot}${HostedZoneName}' + - SubDomainNameWithDot: !Ref SubDomainNameWithDot + HostedZoneName: {'Fn::ImportValue': !Sub '${ParentZoneStack}-HostedZoneName'} + ResourceRecords: + - !Ref EIP + TTL: '60' + Type: A + Tags: + - Key: Cluster + Value: !Ref Tag + EIP: + Type: 'AWS::EC2::EIP' + Properties: + Domain: vpc + Logs: + Type: 'AWS::Logs::LogGroup' + Properties: + RetentionInDays: !Ref LogsRetentionInDays + # SecurityGroup: + # Type: 'AWS::EC2::SecurityGroup' + # Properties: + # GroupDescription: !Ref 'AWS::StackName' + # SecurityGroupIngress: + # - IpProtocol: tcp + # FromPort: 22 + # ToPort: 22 + # CidrIp: '0.0.0.0/0' + # VpcId: {'Fn::ImportValue': !Sub '${ParentVPCStack}-VPC'} + InstanceProfile: + Type: 'AWS::IAM::InstanceProfile' + Properties: + Path: '/' + Roles: + - !Ref IAMRole + IAMRole: + Type: 'AWS::IAM::Role' + Properties: + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: + - 'ec2.amazonaws.com' + Action: + - 'sts:AssumeRole' + Path: '/' + ManagedPolicyArns: !If [HasSystemsManagerAccess, ['arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM'], []] + Policies: + - PolicyName: 'ec2' + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 'ec2:AssociateAddress' + - 'ec2:ModifyInstanceAttribute' + - 'ec2:CreateRoute' + - 'ec2:ReplaceRoute' + Resource: + - '*' + - PolicyName: logs + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 'logs:CreateLogGroup' + - 'logs:CreateLogStream' + - 'logs:PutLogEvents' + - 'logs:DescribeLogStreams' + Resource: + - 'arn:aws:logs:*:*:*' + IAMPolicySSHAccess: + Type: 'AWS::IAM::Policy' + Condition: HasIAMUserSSHAccess + Properties: + Roles: + - !Ref IAMRole + PolicyName: iam + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 'iam:ListUsers' + Resource: + - '*' + - Effect: Allow + Action: + - 'iam:ListSSHPublicKeys' + - 'iam:GetSSHPublicKey' + Resource: + - !Sub 'arn:aws:iam::${AWS::AccountId}:user/*' + BastionLaunchConfiguration: + Type: 'AWS::AutoScaling::LaunchConfiguration' + Metadata: + 'AWS::CloudFormation::Init': + configSets: + #default: !If [HasIAMUserSSHAccess, [awslogs, ssh-access, config, configure-ssh, configure-vpn], [awslogs, config, configure-ssh, configure-vpn]] + default: !If [HasIAMUserSSHAccess, [awslogs, ssh-access, config, configure-ssh, configure-vpn], [awslogs, config, configure-ssh, configure-vpn]] + awslogs: + packages: + yum: + awslogs: [] + files: + '/etc/awslogs/awscli.conf': + content: !Sub | + [default] + region = ${AWS::Region} + [plugins] + cwlogs = cwlogs + mode: '000644' + owner: root + group: root + '/etc/awslogs/awslogs.conf': + content: !Sub | + [general] + state_file = /var/lib/awslogs/agent-state + [/var/log/amazon/ssm/amazon-ssm-agent.log] + datetime_format = %Y-%m-%d %H:%M:%S + file = /var/log/amazon/ssm/amazon-ssm-agent.log + log_stream_name = {instance_id}/var/log/amazon/ssm/amazon-ssm-agent.log + log_group_name = ${Logs} + [/var/log/amazon/ssm/errors.log] + datetime_format = %Y-%m-%d %H:%M:%S + file = /var/log/amazon/ssm/errors.log + log_stream_name = {instance_id}/var/log/amazon/ssm/errors.log + log_group_name = ${Logs} + [/var/log/audit/audit.log] + file = /var/log/audit/audit.log + log_stream_name = {instance_id}/var/log/audit/audit.log + log_group_name = ${Logs} + [/var/log/awslogs.log] + datetime_format = %Y-%m-%d %H:%M:%S + file = /var/log/awslogs.log + log_stream_name = {instance_id}/var/log/awslogs.log + log_group_name = ${Logs} + [/var/log/boot.log] + file = /var/log/boot.log + log_stream_name = {instance_id}/var/log/boot.log + log_group_name = ${Logs} + [/var/log/cfn-hup.log] + datetime_format = %Y-%m-%d %H:%M:%S + file = /var/log/cfn-hup.log + log_stream_name = {instance_id}/var/log/cfn-hup.log + log_group_name = ${Logs} + [/var/log/cfn-init-cmd.log] + datetime_format = %Y-%m-%d %H:%M:%S + file = /var/log/cfn-init-cmd.log + log_stream_name = {instance_id}/var/log/cfn-init-cmd.log + log_group_name = ${Logs} + [/var/log/cfn-init.log] + datetime_format = %Y-%m-%d %H:%M:%S + file = /var/log/cfn-init.log + log_stream_name = {instance_id}/var/log/cfn-init.log + log_group_name = ${Logs} + [/var/log/cfn-wire.log] + datetime_format = %Y-%m-%d %H:%M:%S + file = /var/log/cfn-wire.log + log_stream_name = {instance_id}/var/log/cfn-wire.log + log_group_name = ${Logs} + [/var/log/cloud-init-output.log] + file = /var/log/cloud-init-output.log + log_stream_name = {instance_id}/var/log/cloud-init-output.log + log_group_name = ${Logs} + [/var/log/cloud-init.log] + datetime_format = %b %d %H:%M:%S + file = /var/log/cloud-init.log + log_stream_name = {instance_id}/var/log/cloud-init.log + log_group_name = ${Logs} + [/var/log/cron] + datetime_format = %b %d %H:%M:%S + file = /var/log/cron + log_stream_name = {instance_id}/var/log/cron + log_group_name = ${Logs} + [/var/log/dmesg] + file = /var/log/dmesg + log_stream_name = {instance_id}/var/log/dmesg + log_group_name = ${Logs} + [/var/log/grubby_prune_debug] + file = /var/log/grubby_prune_debug + log_stream_name = {instance_id}/var/log/grubby_prune_debug + log_group_name = ${Logs} + [/var/log/maillog] + datetime_format = %b %d %H:%M:%S + file = /var/log/maillog + log_stream_name = {instance_id}/var/log/maillog + log_group_name = ${Logs} + [/var/log/messages] + datetime_format = %b %d %H:%M:%S + file = /var/log/messages + log_stream_name = {instance_id}/var/log/messages + log_group_name = ${Logs} + [/var/log/secure] + datetime_format = %b %d %H:%M:%S + file = /var/log/secure + log_stream_name = {instance_id}/var/log/secure + log_group_name = ${Logs} + [/var/log/yum.log] + datetime_format = %b %d %H:%M:%S + file = /var/log/yum.log + log_stream_name = {instance_id}/var/log/yum.log + log_group_name = ${Logs} + mode: '000644' + owner: root + group: root + services: + sysvinit: + awslogsd: + enabled: true + ensureRunning: true + packages: + yum: + - awslogs + files: + - '/etc/awslogs/awslogs.conf' + - '/etc/awslogs/awscli.conf' + ssh-access: + files: + '/opt/authorized_keys_command.sh': + content: | + #!/bin/bash -e + if [ -z "$1" ]; then + exit 1 + fi + UnsaveUserName="$1" + UnsaveUserName=${UnsaveUserName//".plus."/"+"} + UnsaveUserName=${UnsaveUserName//".equal."/"="} + UnsaveUserName=${UnsaveUserName//".comma."/","} + UnsaveUserName=${UnsaveUserName//".at."/"@"} + aws iam list-ssh-public-keys --user-name "$UnsaveUserName" --query "SSHPublicKeys[?Status == 'Active'].[SSHPublicKeyId]" --output text | while read -r KeyId; do + aws iam get-ssh-public-key --user-name "$UnsaveUserName" --ssh-public-key-id "$KeyId" --encoding SSH --query "SSHPublicKey.SSHPublicKeyBody" --output text + done + mode: '000755' + owner: root + group: root + '/opt/import_users.sh': + content: | + #!/bin/bash -e + aws iam list-users --query "Users[].[UserName]" --output text | while read User; do + SaveUserName="$User" + SaveUserName=${SaveUserName//"+"/".plus."} + SaveUserName=${SaveUserName//"="/".equal."} + SaveUserName=${SaveUserName//","/".comma."} + SaveUserName=${SaveUserName//"@"/".at."} + if [ "${#SaveUserName}" -le "32" ]; then + if ! id -u "$SaveUserName" > /dev/null 2>&1; then + # don't grant sudo rights on bastion host! + /usr/sbin/useradd "$SaveUserName" + fi + else + echo "Can not import IAM user ${SaveUserName}. User name is longer than 32 characters." + fi + done + mode: '000755' + owner: root + group: root + '/etc/cron.d/import_users': + content: | + */10 * * * * root /opt/import_users.sh + mode: '000644' + owner: root + group: root + commands: + 'a_configure_sshd_command': + command: 'sed -i "s:#AuthorizedKeysCommand none:AuthorizedKeysCommand /opt/authorized_keys_command.sh:g" /etc/ssh/sshd_config' + 'b_configure_sshd_commanduser': + command: 'sed -i "s:#AuthorizedKeysCommandUser nobody:AuthorizedKeysCommandUser nobody:g" /etc/ssh/sshd_config' + 'c_import_users': + command: './import_users.sh' + cwd: '/opt' + services: + sysvinit: + sshd: + enabled: true + ensureRunning: true + commands: + - 'a_configure_sshd_command' + - 'b_configure_sshd_commanduser' + config: + packages: + yum: + mariadb: [] + files: + '/etc/cfn/cfn-hup.conf': + content: !Sub | + [main] + stack=${AWS::StackId} + region=${AWS::Region} + interval=1 + mode: '000400' + owner: root + group: root + '/etc/cfn/hooks.d/cfn-auto-reloader.conf': + content: !Sub | + [cfn-auto-reloader-hook] + triggers=post.update + path=Resources.BastionLaunchConfiguration.Metadata.AWS::CloudFormation::Init + action=/opt/aws/bin/cfn-init --verbose --stack=${AWS::StackName} --region=${AWS::Region} --resource=BastionLaunchConfiguration + runas=root + services: + sysvinit: + cfn-hup: + enabled: true + ensureRunning: true + files: + - '/etc/cfn/cfn-hup.conf' + - '/etc/cfn/hooks.d/cfn-auto-reloader.conf' + amazon-ssm-agent: + enabled: !If [HasSystemsManagerAccess, true, false] + ensureRunning: !If [HasSystemsManagerAccess, true, false] + configure-ssh: + packages: + yum: + mariadb: [] + files: + '/etc/cfn/cfn-hup.conf': + content: !Sub | + [main] + stack=${AWS::StackId} + region=${AWS::Region} + interval=1 + mode: '000400' + owner: root + group: root + '/etc/cfn/hooks.d/cfn-auto-reloader.conf': + content: !Sub | + [cfn-auto-reloader-hook] + triggers=post.update + path=Resources.BastionLaunchConfiguration.Metadata.AWS::CloudFormation::Init + action=/opt/aws/bin/cfn-init --verbose --stack=${AWS::StackName} --region=${AWS::Region} --resource=BastionLaunchConfiguration + runas=root + services: + sysvinit: + cfn-hup: + enabled: true + ensureRunning: true + files: + - '/etc/cfn/cfn-hup.conf' + - '/etc/cfn/hooks.d/cfn-auto-reloader.conf' + amazon-ssm-agent: + enabled: !If [HasSystemsManagerAccess, true, false] + ensureRunning: !If [HasSystemsManagerAccess, true, false] + configure-vpn: + packages: + yum: + openvpn: [openswan, xl2tpd] + files: + '/etc/ipsec.conf': + content: !Sub | + version 2.0 + + config setup + dumpdir=/var/run/pluto/ + nat_traversal=yes + virtual_private=%v4:10.0.0.0/8,%v4:192.168.0.0/16,%v4:172.16.0.0/12,%v4:25.0.0.0/8,%v6:fd00::/8,%v6:fe80::/10 + oe=off + protostack=netkey + nhelpers=0 + interfaces=%defaultroute + + conn vpnpsk + auto=add + left=$PRIVATE_IP + leftid=$PUBLIC_IP + leftsubnet=$PRIVATE_IP/32 + leftnexthop=%defaultroute + leftprotoport=17/1701 + rightprotoport=17/%any + right=%any + rightsubnetwithin=0.0.0.0/0 + forceencaps=yes + authby=secret + pfs=no + type=transport + auth=esp + ike=3des-sha1 + phase2alg=3des-sha1 + dpddelay=30 + dpdtimeout=120 + dpdaction=clear + mode: '000400' + owner: root + group: root + + '/etc/ipsec.secrets': + content: !Sub | + $PUBLIC_IP %any : PSK \"$IPSEC_PSK\" + mode: '000400' + owner: root + group: root + + '/etc/xl2tpd/xl2tpd.conf': + content: !Sub | + [global] + port = 1701 + + ;debug avp = yes + ;debug network = yes + ;debug state = yes + ;debug tunnel = yes + + [lns default] + ip range = 192.168.42.10-192.168.42.250 + local ip = 192.168.42.1 + require chap = yes + refuse pap = yes + require authentication = yes + name = l2tpd + ;ppp debug = yes + pppoptfile = /etc/ppp/options.xl2tpd + length bit = yes + mode: '000400' + owner: root + group: root + '/etc/ppp/options.xl2tpd': + content: !Sub | + ipcp-accept-local + ipcp-accept-remote + ms-dns 8.8.8.8 + ms-dns 8.8.4.4 + noccp + auth + crtscts + idle 1800 + mtu 1280 + mru 1280 + lock + connect-delay 5000 + mode: '000400' + owner: root + group: root + '/etc/ppp/chap-secrets': + content: !Sub | + # Secrets for authentication using CHAP + # client\tserver\tsecret\t\t\tIP addresses + + $VPN_USER\tl2tpd $VPN_PASSWORD * + mode: '000400' + owner: root + group: root + + services: + sysvinit: + cfn-hup: + enabled: true + ensureRunning: true + files: + - '/etc/cfn/cfn-hup.conf' + - '/etc/cfn/hooks.d/cfn-auto-reloader.conf' + amazon-ssm-agent: + enabled: !If [HasSystemsManagerAccess, true, false] + ensureRunning: !If [HasSystemsManagerAccess, true, false] + Properties: + AssociatePublicIpAddress: true + EbsOptimized: false + IamInstanceProfile: !Ref InstanceProfile + ImageId: !FindInMap [RegionMap, !Ref 'AWS::Region', AMI] + InstanceType: !Ref InstanceType + SecurityGroups: + - 'Fn::ImportValue': !Sub '${ParentVPCStack}-BastionSecurityGroup' + # - !Ref SecurityGroup + KeyName: !If [HasKeyName, !Ref KeyName, !Ref 'AWS::NoValue'] + UserData: + 'Fn::Base64': !Sub + - | + #!/bin/bash -ex + trap '/opt/aws/bin/cfn-signal -e 1 --region ${Region} --stack ${StackName} --resource BastionAutoScalingGroup' ERR + INSTANCEID=$(curl -s -m 60 http://169.254.169.254/latest/meta-data/instance-id) + echo "INSTANCEID=$(curl -s -m 60 http://169.254.169.254/latest/meta-data/instance-id)" >> /home/ec2-user/init.log + echo "aws --region ${Region} ec2 associate-address --instance-id $INSTANCEID --allocation-id ${EIPAllocationId}" >> /home/ec2-user/init.log + echo '/opt/aws/bin/cfn-init -v --stack ${StackName} --resource BastionLaunchConfiguration --region ${Region}' >> /home/ec2-user/init.log + echo '/opt/aws/bin/cfn-signal -e 0 --region ${Region} --stack ${StackName} --resource BastionAutoScalingGroup' >> /home/ec2-user/init.log + echo 'aws --region ${Region} ec2 modify-instance-attribute --instance-id $INSTANCEID --source-dest-check "{\"Value\": false}"' >> /home/ec2-user/init.log + echo 'aws --region ${Region} ec2 replace-route --route-table-id ${RouteTablePrivate} --destination-cidr-block "0.0.0.0/0" --instance-id $INSTANCEID || aws --region ${Region} ec2 create-route --route-table-id ${RouteTablePrivate} --destination-cidr-block "0.0.0.0/0" --instance-id $INSTANCEID' >> /home/ec2-user/init.log + + aws --region ${Region} ec2 associate-address --instance-id $INSTANCEID --allocation-id ${EIPAllocationId} + aws --region ${Region} ec2 modify-instance-attribute --instance-id $INSTANCEID --source-dest-check "{\"Value\": false}" + aws --region ${Region} ec2 replace-route --route-table-id ${RouteTablePrivate} --destination-cidr-block "0.0.0.0/0" --instance-id $INSTANCEID || aws --region ${Region} ec2 create-route --route-table-id ${RouteTablePrivate} --destination-cidr-block "0.0.0.0/0" --instance-id $INSTANCEID + + /opt/aws/bin/cfn-init -v --stack ${StackName} --resource BastionLaunchConfiguration --region ${Region} & + /opt/aws/bin/cfn-signal -e 0 --region ${Region} --stack ${StackName} --resource BastionAutoScalingGroup & + + echo "All commands were executed" >> /home/ec2-user/init.log + - RouteTablePrivate: {'Fn::ImportValue': !Sub '${ParentVPCStack}-RouteTable${SubnetZone}Private'} + Region: !Ref 'AWS::Region' + StackName: !Ref 'AWS::StackName' + EIPAllocationId: !GetAtt 'EIP.AllocationId' + #!/bin/bash -ex + # trap '/opt/aws/bin/cfn-signal -e 1 --stack ${AWS::StackName} --resource AutoScalingGroup --region ${AWS::Region}' ERR + # + # /opt/aws/bin/cfn-init -v --stack ${AWS::StackName} --resource LaunchConfiguration --region ${AWS::Region} + # /opt/aws/bin/cfn-signal -e 0 --stack ${AWS::StackName} --resource AutoScalingGroup --region ${AWS::Region} + + BastionAutoScalingGroup: + Type: 'AWS::AutoScaling::AutoScalingGroup' + Properties: + DesiredCapacity: '1' + LaunchConfigurationName: !Ref BastionLaunchConfiguration + MaxSize: '1' + MinSize: '1' + Tags: + - Key: Cluster + Value: !Ref Tag + PropagateAtLaunch: true + - Key: Name + Value: !Sub + - '${AWS::StackName} ${CidrBlock}' + - CidrBlock: {'Fn::ImportValue': !Sub '${ParentVPCStack}-CidrBlock'} + PropagateAtLaunch: true + NotificationConfigurations: !If + - HasAlertTopic + - - NotificationTypes: + - 'autoscaling:EC2_INSTANCE_LAUNCH_ERROR' + - 'autoscaling:EC2_INSTANCE_TERMINATE_ERROR' + TopicARN: {'Fn::ImportValue': !Sub '${ParentAlertStack}-TopicARN'} + - [] + #VPCZoneIdentifier: !Split [',', {'Fn::ImportValue': !Sub '${ParentVPCStack}-SubnetsPublic'}] + VPCZoneIdentifier: + - {'Fn::ImportValue': !Sub '${ParentVPCStack}-Subnet${SubnetZone}Public'} + CreationPolicy: + ResourceSignal: + Count: 1 + Timeout: PT180M + UpdatePolicy: + AutoScalingRollingUpdate: + PauseTime: PT10M + SuspendProcesses: + - HealthCheck + - ReplaceUnhealthy + - AZRebalance + - AlarmNotification + - ScheduledActions + WaitOnResourceSignals: true + CPUTooHighAlarm: + Condition: HasAlertTopic + Type: 'AWS::CloudWatch::Alarm' + Properties: + AlarmDescription: 'Average CPU utilization over last 10 minutes higher than 80%' + Namespace: 'AWS/EC2' + MetricName: CPUUtilization + Statistic: Average + Period: 600 + EvaluationPeriods: 1 + ComparisonOperator: GreaterThanThreshold + Threshold: 80 + AlarmActions: + - {'Fn::ImportValue': !Sub '${ParentAlertStack}-TopicARN'} + Dimensions: + - Name: AutoScalingGroupName + Value: !Ref BastionAutoScalingGroup + Tags: + - Key: Cluster + Value: !Ref Tag +Outputs: + TemplateID: + Description: 'cloudonaut.io template id.' + Value: 'vpc/vpc-ssh-bastion' + TemplateVersion: + Description: 'cloudonaut.io template version.' + Value: 'latest' + StackName: + Description: 'Stack name.' + Value: !Sub '${AWS::StackName}' + IPAddress: + Description: 'The public IP address of the SSH bastion host/instance.' + Value: !Ref EIP + Export: + Name: !Sub '${AWS::StackName}-IPAddress' \ No newline at end of file diff --git a/platform-controller/AWS/swarm-mode/kms.yaml b/platform-controller/AWS/swarm-mode/kms.yaml new file mode 100644 index 00000000..584cfd40 --- /dev/null +++ b/platform-controller/AWS/swarm-mode/kms.yaml @@ -0,0 +1,77 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: 'Docker Swarm - KMS' + +# https://github.com/pgarbe/containers_on_aws + +Resources: + + SwarmTokenKey: + Type: "AWS::KMS::Key" + Properties: + Description: "KMS key to encrypt swarm join tokens" + KeyPolicy: + Id: key-docker-swarm + Version: '2012-10-17' + Statement: + - Sid: Enable IAM User Permissions + Effect: Allow + Principal: + AWS: + - !Sub arn:aws:iam::${AWS::AccountId}:root + Action: kms:* + Resource: "*" + # - Sid: Allow access for Key Administrators + # Effect: Allow + # Principal: + # AWS: + # - !Sub arn:aws:iam::${AWS::AccountId}:root + # Action: + # - kms:Create* + # - kms:Describe* + # - kms:Enable* + # - kms:List* + # - kms:Put* + # - kms:Update* + # - kms:Revoke* + # - kms:Disable* + # - kms:Get* + # - kms:Delete* + # - kms:TagResource + # - kms:UntagResource + # - kms:ScheduleKeyDeletion + # - kms:CancelKeyDeletion + # Resource: "*" + # - Sid: Allow use of the key + # Effect: Allow + # Principal: + # AWS: + # - !Sub arn:aws:iam::${AWS::AccountId}:root + # Action: + # - kms:Encrypt + # - kms:Decrypt + # - kms:ReEncrypt* + # - kms:GenerateDataKey* + # - kms:DescribeKey + # Resource: "*" + # - Sid: Allow attachment of persistent resources + # Effect: Allow + # Principal: + # AWS: + # - !Sub arn:aws:iam::${AWS::AccountId}:root + # Action: + # - kms:CreateGrant + # - kms:ListGrants + # - kms:RevokeGrant + # Resource: "*" + # Condition: + # Bool: + # kms:GrantIsForAWSResource: true +Outputs: + + SwarmTokenKey: + Value: !Ref SwarmTokenKey + + SwarmTokenKeyArn: + Value: !GetAtt SwarmTokenKey.Arn + Export: + Name: !Sub '${AWS::StackName}-SwarmTokenKeyArn' diff --git a/platform-controller/AWS/swarm-mode/manager.yaml b/platform-controller/AWS/swarm-mode/manager.yaml new file mode 100644 index 00000000..92e5460e --- /dev/null +++ b/platform-controller/AWS/swarm-mode/manager.yaml @@ -0,0 +1,707 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: 'Docker Swarm - Manager' + +# https://github.com/pgarbe/containers_on_aws + +Parameters: + + ParentVPCStack: + Description: 'Stack name of parent VPC stack based on vpc/vpc-*azs.yaml template.' + Type: String + + KeyName: + Description: 'Optional key pair of the ec2-user to establish a SSH connection to the EC2 instance.' + Type: String + + IAMUserSSHAccess: + Description: 'Synchronize public keys of IAM users to enable personalized SSH access (Doc: https://cloudonaut.io/manage-aws-ec2-ssh-access-with-iam/).' + Type: String + Default: false + AllowedValues: + - true + - false + + InstanceType: + Description: 'The instance type for the EC2 instance.' + Type: String + Default: 't2.medium' + + DesiredCapacity: + Description: 'The number of manager nodes' + Type: Number + Default: 1 + AllowedValues: [1,3,5,7] + + MaxSize: + Description: 'MaxSize of manager nodes' + Type: Number + Default: 1 + + ManagerSubnetsReach: + Description: 'Should the managers have direct access to the Internet or do you prefer private subnets with NAT?' + Type: String + Default: Private + AllowedValues: + - Public + - Private + +# SwarmManagerAutoScalingGroup: +# Description: AutoScaling Group of Swarm managers +# Type: String + +# SecurityGroups: +# Description: Security group for which are allowed to talk to ASG +# Type: CommaDelimitedList + + ParentSecurityGroupsStack: + Description: 'ParentSecurityGroupsStack' + Type: String + + TargetGroups: + Description: Security group for which are allowed to talk to ASG + Type: CommaDelimitedList + Default: '' + + DockerVersion: + Description: 'Specifies the version of the Docker engine' + Type: String + Default: "17.12.1" + + DockerRepository: + Description: 'Specifies if stable or edge repository should be used' + Type: String + Default: stable + AllowedValues: + - stable + - edge + + JoinToken: + Description: 'The token to join the swarm cluster as a manager node' + Type: String + Default: '' + NoEcho: true + +# JoinTokenKmsKey: +# Description: 'KMS key to decrypt swarm join tokens' +# Type: String + + ParentKeysManagementStack: + Description: 'ParentKeysManagementStack' + Type: String + + BucketName: + Description: 'Bucket name for placing join tokens' + Type: String + Default: '' + NoEcho: true + Tag: + Type: String + Default: 'Hobbit' + +# ParentVPCClusterId: +# Description: 'ID of parent VPC cluster based on vpc/vpc-*azs.yaml template.' +# Type: String + +# NatInstanceIP: +# Description: 'Public IP address of VPC NAT to access the S3 bucket' +# Type: String + +Conditions: + + HasKeyName: !Not [!Equals [!Ref KeyName, '']] + HasIAMUserSSHAccess: !Equals [!Ref IAMUserSSHAccess, 'true'] + HasSwarmJoinToken: !Not [!Equals [!Ref JoinToken, '']] + + +Resources: + + InstanceProfile: + Type: 'AWS::IAM::InstanceProfile' + Properties: + Path: '/' + Roles: + - !Ref IAMRole + + IAMRole: + Type: 'AWS::IAM::Role' + Properties: + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: + - 'ec2.amazonaws.com' + Action: + - 'sts:AssumeRole' + Path: '/' + Policies: + - PolicyName: logs + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 'logs:CreateLogGroup' + - 'logs:CreateLogStream' + - 'logs:PutLogEvents' + - 'logs:DescribeLogStreams' + Resource: + - 'arn:aws:logs:*:*:*' + - PolicyName: asg + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 'autoscaling:DescribeAutoScalingGroups' + - 'autoscaling:DescribeAutoScalingInstances' + - 'ec2:DescribeInstances' + Resource: + - '*' + - PolicyName: kms + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 'kms:Decrypt' + - 'kms:DescribeKey' + Resource: + 'Fn::ImportValue': !Sub '${ParentKeysManagementStack}-SwarmTokenKeyArn' + #- !Ref JoinTokenKmsKey + + IAMPolicySSHAccess: + Type: 'AWS::IAM::Policy' + Condition: HasIAMUserSSHAccess + Properties: + Roles: + - !Ref IAMRole + PolicyName: iam + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 'iam:ListUsers' + Resource: + - '*' + - Effect: Allow + Action: + - 'iam:ListSSHPublicKeys' + - 'iam:GetSSHPublicKey' + Resource: + - !Sub 'arn:aws:iam::${AWS::AccountId}:user/*' + + S3Endpoint: + Type: 'AWS::EC2::VPCEndpoint' + Properties: + VpcId: {'Fn::ImportValue': !Sub '${ParentVPCStack}-VPC'} + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - 's3:PutObject' + - 's3:GetObject' + Resource: + - !Sub 'arn:aws:s3:::${BucketName}' + - !Sub 'arn:aws:s3:::${BucketName}/*' + Effect: Allow + Principal: '*' + RouteTableIds: + - {'Fn::ImportValue': !Sub '${ParentVPCStack}-RouteTableAPrivate'} + - {'Fn::ImportValue': !Sub '${ParentVPCStack}-RouteTableAPublic' } + ServiceName: !Sub 'com.amazonaws.${AWS::Region}.s3' + + S3BucketPolicy: + Type: 'AWS::S3::BucketPolicy' + Properties: + Bucket: !Sub '${BucketName}' + PolicyDocument: + Statement: + - Sid: Access-to-specific-VPCE-only + Effect: Allow + Principal: "*" + Action: + - 's3:PutObject' + - 's3:GetObject' + Resource: + - !Sub 'arn:aws:s3:::${BucketName}' + - !Sub 'arn:aws:s3:::${BucketName}/*' + Condition: + StringEquals: + 'aws:sourceVpce': !Ref S3Endpoint + + AutoScalingGroup: + Type: AWS::AutoScaling::AutoScalingGroup + Properties: + #AutoScalingGroupName: !Ref SwarmManagerAutoScalingGroup + # AvailabilityZones: !Ref AvailabilityZones + VPCZoneIdentifier: + - 'Fn::ImportValue': !Sub '${ParentVPCStack}-SubnetA${ManagerSubnetsReach}' + #- 'Fn::ImportValue': !Sub '${ParentVPCStack}-SubnetB${ManagerSubnetsReach}' + #- 'Fn::ImportValue': !Sub '${ParentVPCStack}-SubnetC${ManagerSubnetsReach}' + LaunchConfigurationName: !Ref LaunchConfiguration + MinSize: 0 + MaxSize: !Ref MaxSize + DesiredCapacity: !Ref DesiredCapacity + #TargetGroupARNs: !Ref TargetGroups + MetricsCollection: + - Granularity: 1Minute + Metrics: + - GroupInServiceInstances + Tags: + - Key: Cluster + Value: !Ref Tag + PropagateAtLaunch: true + - Key: Name + Value: !Sub ${AWS::StackName} + PropagateAtLaunch: 'true' + CreationPolicy: + ResourceSignal: + Timeout: PT180M + UpdatePolicy: + AutoScalingRollingUpdate: + MinInstancesInService: !Ref DesiredCapacity + MaxBatchSize: '1' + PauseTime: PT180M + SuspendProcesses: + - AlarmNotification + WaitOnResourceSignals: 'true' + + LaunchConfiguration: + Type: AWS::AutoScaling::LaunchConfiguration + Metadata: + AWS::CloudFormation::Init: + configSets: + default: + !If + - HasSwarmJoinToken + - !If [HasIAMUserSSHAccess, [docker-ubuntu, swarm-join], [docker-ubuntu, swarm-join]] + - !If [HasIAMUserSSHAccess, [docker-ubuntu, swarm-init, hobbit], [docker-ubuntu, swarm-init, hobbit]] +# - !If [HasIAMUserSSHAccess, [hobbit], [hobbit]] + + ssh-access: + files: + '/opt/authorized_keys_command.sh': + content: | + #!/bin/bash -e + if [ -z "$1" ]; then + exit 1 + fi + SaveUserName="$1" + SaveUserName=${SaveUserName//"+"/".plus."} + SaveUserName=${SaveUserName//"="/".equal."} + SaveUserName=${SaveUserName//","/".comma."} + SaveUserName=${SaveUserName//"@"/".at."} + aws iam list-ssh-public-keys --user-name "$SaveUserName" --query "SSHPublicKeys[?Status == 'Active'].[SSHPublicKeyId]" --output text | while read KeyId; do + aws iam get-ssh-public-key --user-name "$SaveUserName" --ssh-public-key-id "$KeyId" --encoding SSH --query "SSHPublicKey.SSHPublicKeyBody" --output text + done + mode: '000755' + owner: root + group: root + '/opt/import_users.sh': + content: | + #!/bin/bash + aws iam list-users --query "Users[].[UserName]" --output text | while read User; do + SaveUserName="$User" + SaveUserName=${SaveUserName//"+"/".plus."} + SaveUserName=${SaveUserName//"="/".equal."} + SaveUserName=${SaveUserName//","/".comma."} + SaveUserName=${SaveUserName//"@"/".at."} + if id -u "$SaveUserName" >/dev/null 2>&1; then + echo "$SaveUserName exists" + else + #sudo will read each file in /etc/sudoers.d, skipping file names that end in ‘~’ or contain a ‘.’ character to avoid causing problems with package manager or editor temporary/backup files. + SaveUserFileName=$(echo "$SaveUserName" | tr "." " ") + /usr/sbin/adduser "$SaveUserName" + echo "$SaveUserName ALL=(ALL) NOPASSWD:ALL" > "/etc/sudoers.d/$SaveUserFileName" + fi + done + mode: '000755' + owner: root + group: root + '/etc/cron.d/import_users': + content: | + */10 * * * * root /opt/import_users.sh + mode: '000644' + owner: root + group: root + commands: + 'a_configure_sshd_command': + command: 'sed -i "s:#AuthorizedKeysCommand none:AuthorizedKeysCommand /opt/authorized_keys_command.sh:g" /etc/ssh/sshd_config' + 'b_configure_sshd_commanduser': + command: 'sed -i "s:#AuthorizedKeysCommandUser nobody:AuthorizedKeysCommandUser nobody:g" /etc/ssh/sshd_config' + 'c_import_users': + command: './import_users.sh' + cwd: '/opt' + services: + sysvinit: + sshd: + enabled: true + ensureRunning: true + commands: + - 'a_configure_sshd_command' + - 'b_configure_sshd_commanduser' + + docker-ubuntu: + commands: + 'a_start_installation': + command: 'echo "docker-ubuntu started" >> /home/ubuntu/docker.log' + 'b_get_certificates': + command: 'sudo apt-get install apt-transport-https ca-certificates curl software-properties-common htop socat -y' + 'c_set_gpg_key': + command: 'curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -' + 'd_add_fingerprint': + command: 'sudo apt-key fingerprint 0EBFCD88' + 'e_add_docker_repo': + command: !Sub 'sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) ${DockerRepository}"' + 'f_update_aptget': + command: 'sudo apt-get update' + 'g_install_docker': + command: !Sub 'sudo apt-get install -y docker-ce=${DockerVersion}~ce-0~ubuntu' +# 'g_start_service': +# command: 'sudo service docker start' + 'h_add_ubuntu_user_to_docker_group': + command: 'sudo usermod -aG docker ubuntu' + 'i_verify_installation': + command: 'sudo docker run hello-world' + 'k_verify_installation': + command: 'docker run hello-world >> /home/ubuntu/docker.log' + 'm_report_installation': + command: 'echo "docker-ubuntu finished correctly" >> /home/ubuntu/docker.log' + swarm-init: + commands: + 'a_init_swarm': + command: !Sub | + echo "init_swarm" >> /home/ubuntu/init.log + docker swarm init >> /home/ubuntu/swarm.log + docker swarm join-token worker | grep token | awk '{ print $5 }' > /home/ubuntu/token + + docker swarm join-token worker | awk '{ if(NR==3) print }' >> /home/ubuntu/worker_join.sh + echo "Uploading token to S3" >> /home/ubuntu/swarm.log + echo "sudo aws s3 cp /home/ubuntu/token s3://${BucketName}/token --region ${AWS::Region}" >> /home/ubuntu/swarm.log + aws s3 cp /home/ubuntu/worker_join.sh s3://${BucketName}/worker_join.sh --region ${AWS::Region} > /home/ubuntu/swarm.log + echo "Upload to S3 should be finished" >> /home/ubuntu/swarm.log + + NODE_ID=$(docker info | grep NodeID | awk '{print $2}') + echo "Adding labels (master/data) to $NODE_ID" >> /home/ubuntu/swarm.log + + docker node update $NODE_ID --label-add org.hobbit.type=master + docker node update $NODE_ID --label-add org.hobbit.workergroup=master + docker node update $NODE_ID --label-add org.hobbit.name=master + #aws s3 mb s3://${BucketName} --region ${AWS::Region} +# #aws s3 cp /home/ubuntu/token s3://${BucketName}/token --region ${AWS::Region} > /home/ubuntu/swarm.log +# docker node update $NODE_ID --label-add org.hobbit.type=data +# docker node update $NODE_ID --label-add org.hobbit.workergroup=data +# docker node update $NODE_ID --label-add org.hobbit.name=data + + 'b_swarm_healthcheck': + command: 'docker node ls >> /home/ubuntu/swarm.log' + + swarm-join: + commands: + 'a_join_swarm': + command: !Sub | + echo "swarm-join -> a_join_swarm" >> /home/ubuntu/init.log + # Decrypt join token via KMS + echo -n "${JoinToken}" | base64 --decode > ciphertextblob + JOIN_TOKEN=$(aws kms decrypt --region ${AWS::Region} --ciphertext-blob fileb://ciphertextblob --query Plaintext --output text | base64 --decode) + + INSTANCE_ID="`wget -q -O - http://instance-data/latest/meta-data/instance-id`" + ASG_NAME=$(aws autoscaling describe-auto-scaling-instances --instance-ids $INSTANCE_ID --region ${AWS::Region} --query AutoScalingInstances[].AutoScalingGroupName --output text) + + for ID in $(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names $ASG_NAME --region ${AWS::Region} --query AutoScalingGroups[].Instances[].InstanceId --output text); + do + # Ignore "myself" + if [ "$ID" == "$INSTANCE_ID" ] ; then + continue; + fi + + IP=$(aws ec2 describe-instances --instance-ids $ID --region ${AWS::Region} --query Reservations[].Instances[].PrivateIpAddress --output text) + if [ ! -z "$IP" ] ; then + echo "Try to join swarm with IP $IP" + + # Join the swarm; if it fails try the next one + docker swarm join --token $JOIN_TOKEN $IP:2377 && break || continue + fi + done + + 'b_swarm_healthcheck': + command: 'docker node ls >> /home/ubuntu/swarm.log' + + hobbit: + commands: + 'a_install_prereqs': + command: !Sub | + echo "installing prereqs" >> /home/ubuntu/init.log + sudo apt-get install make maven supervisor socat -y + echo "installing docker compose" >> /home/ubuntu/init.log + sudo curl -L https://github.com/docker/compose/releases/download/1.22.0/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose + sudo chmod +x /usr/local/bin/docker-compose >> /home/ubuntu/docker.log + sudo ln /usr/local/bin/docker-compose /usr/bin/docker-compose >> /home/ubuntu/docker.log + echo "prereqs installed" >> /home/ubuntu/init.log + + 'b_start_socat_daemon': + command: !Sub | + cat > /opt/getNodeIps.sh << 'EOL' + NODES=$(docker node ls --format "{{.Hostname}} {{.Status}}" | grep Ready | awk '{print $1}') + LINE="" + for NODE in $NODES; + do + CID=$(echo $NODE | cut -c4-15) + CID2=$(echo $CID | tr - .) + LINE="$LINE '$CID2:$2'," + done + echo $LINE + EOL + + cat > /opt/updatePrometheus.sh << 'EOL' + cp /opt/hobbit-platform-2.0.5/config/prometheus/prometheus.conf.template /opt/hobbit-platform-2.0.5/config/prometheus/prometheus.conf + LINE=$(sh /opt/getNodeIps.sh container-exporter 9104) + sed -i "s~'container-exporter:9104'~$LINE~g" /opt/hobbit-platform-2.0.5/config/prometheus/prometheus.conf + LINE=$(sh /opt/getNodeIps.sh cAdvisor 8081) + sed -i "s~'cAdvisor:8081'~$LINE~g" /opt/hobbit-platform-2.0.5/config/prometheus/prometheus.conf + LINE=$(sh /opt/getNodeIps.sh node-exporter 9100) + sed -i "s~'node-exporter:9100'~$LINE~g" /opt/hobbit-platform-2.0.5/config/prometheus/prometheus.conf + sed -i "s~',]~']~g" /opt/hobbit-platform-2.0.5/config/prometheus/prometheus.conf + echo "stopping working prometheus" >> /var/log/prometheusLoop.log + sudo docker stop $(sudo docker ps --filter "name=prometheus" --format "{{.ID}}") + cAdvisorID=$(sudo docker ps --filter "name=cAdvisor" --format "{{.ID}}") + exporterID=$(sudo docker ps --filter "name=node-exporter" --format "{{.ID}}") + echo "starting prometheus" >> /var/log/prometheusLoop.log + echo "docker run --name prometheus -d --net hobbit-core -p 9090:9090 --link $cAdvisorID:cAdvisor --link $exporterID:node-exporter --rm -v /opt/hobbit-platform-2.0.5/config/prometheus:/config prom/prometheus --config.file=/config/prometheus.conf" >> /home/ubuntu/hobbit.log + docker run --name prometheus -d --net hobbit-core -p 9090:9090 --link $cAdvisorID:cAdvisor --link $exporterID:node-exporter --rm -v /opt/hobbit-platform-2.0.5/config/prometheus:/config prom/prometheus --config.file=/config/prometheus.conf + EOL + + sudo install -m 777 /dev/null /var/log/socat.log + echo "creating /opt/getmsg.sh" >> /home/ubuntu/init.log + cat > /opt/getmsg.sh << 'EOL' + read MESSAGE + COMMAND="sudo docker node update $MESSAGE" + echo $COMMAND >> /var/log/socat.log + exec $COMMAND + EOL + sudo chmod +x /opt/getmsg.sh + + sudo install -m 777 /dev/null /var/log/prometheusLoop.log + cat > /home/ubuntu/prometheusLoop.sh << 'EOL' + while : + do + LINE=$(sh /opt/getNodeIps.sh container-exporter 9104) + if [ "$LINE" != "$PREVLINE" ] + then + date >> /var/log/prometheusLoop.log + sudo sh /opt/updatePrometheus.sh >> /var/log/prometheusLoop.log + fi + PREVLINE=$LINE + sleep 15 + done + EOL + + echo "Configuring /etc/supervisor/supervisord.conf" >> /home/ubuntu/init.log + cat > /etc/supervisor/supervisord.conf << 'EOL' + [supervisord] + [program:hobbit-socat] + command=/usr/bin/socat -u tcp-l:4444,fork system:/opt/getmsg.sh + autostart=true + autorestart=true + [program:update-prometheus] + command=/bin/bash /home/ubuntu/prometheusLoop.sh + autostart=true + autorestart=true + EOL + + echo "restarting supervisor service" >> /home/ubuntu/init.log + sudo service supervisor restart + echo "socat_daemon should be started" >> /home/ubuntu/init.log + 'c_install_hobbit': + command: !Sub | + echo "modifying docker service" >> /home/ubuntu/hobbit.log + sudo sed -i "s~-H fd://~-H tcp://0.0.0.0:2376 -H unix:///var/run/docker.sock~g" /lib/systemd/system/docker.service + echo "reloading service daemon" >> /home/ubuntu/hobbit.log + sudo systemctl daemon-reload + echo "restaring service" >> /home/ubuntu/hobbit.log + sudo service docker restart + echo "checking 2376 port with netstat" >> /home/ubuntu/hobbit.log + netstat -atn | grep 2376 >> /home/ubuntu/hobbit.log + + echo "cloning hobbit_platform" >> /home/ubuntu/init.log + sudo git clone https://github.com/hobbit-project/platform.git /opt/hobbit-platform-2.0.5 >> /home/ubuntu/hobbit.log + cd /opt/hobbit-platform-2.0.5/ + echo "switching branch to 2.0.5" >> /home/ubuntu/hobbit.log + sudo git checkout tags/v2.0.5 >> /home/ubuntu/hobbit.log + + echo "creating networks" >> /home/ubuntu/hobbit.log + sudo make create-networks + + echo "creating prometheus.conf.template" >> /home/ubuntu/hobbit.log + sudo mkdir /opt/hobbit-platform-2.0.5/config/prometheus + cat > /opt/hobbit-platform-2.0.5/config/prometheus/prometheus.conf.template << 'EOL' + global: + scrape_interval: 15s + evaluation_interval: 15s + scrape_configs: + - job_name: container-metrics + static_configs: + - targets: ['container-exporter:9104'] + - job_name: cadvisor-metrics + static_configs: + - targets: ['cAdvisor:8081'] + - job_name: node-metrics + static_configs: + - targets: ['node-exporter:9100'] + EOL + + echo "Rabbit should container started. Checking port 5672" >> /home/ubuntu/hobbit.log + netstat -atn | grep 5672 >> /home/ubuntu/hobbit.log + + echo "starting node-exporter" >> /home/ubuntu/hobbit.log + docker run -d --name node-exporter -p 9100:9100 --rm prom/node-exporter + + echo "starting container-exporter" >> /home/ubuntu/hobbit.log + docker run -d --name container-exporter -p 9104:9104 --rm -v /var/run/docker.sock:/var/run/docker.sock:ro -v /sys/fs/cgroup:/cgroup:rw prom/container-exporter + + echo "starting cAdvisor" >> /home/ubuntu/hobbit.log + docker run -d --name cAdvisor -p 8081:8080 --rm -v /:/rootfs:ro -v /var/run:/var/run:rw -v /sys:/sys:ro -v /var/lib/docker/:/var/lib/docker:ro -v /dev/disk:/dev/disk:ro google/cadvisor + + echo "install_hobbit finished. exiting" >> /home/ubuntu/init.log + cat > /opt/hobbit-platform-2.0.5/docker-compose-rabbit.yml << 'EOL' + version: '3.3' + services: + rabbit: + image: rabbitmq:management + deploy: + replicas: 1 + restart_policy: + condition: any + delay: 15s + placement: + constraints: + - node.labels.org.hobbit.type == master + networks: + - hobbit + - hobbit-core + ports: + - "8081:15672" + - "5672:5672" + networks: + hobbit: + external: + name: hobbit + hobbit-core: + external: + name: hobbit-core + EOL + sudo docker stack deploy --compose-file /opt/hobbit-platform-2.0.5/docker-compose-rabbit.yml hobbit + +# echo "starting rabbit service" >> /home/ubuntu/hobbit.log +# sudo sed -i "s~8081:15672~8082:15672~g" /opt/hobbit-platform-2.0.5/docker-compose.yml +# sudo docker-compose up -d rabbit + +#docker service create -d --name cAdvisor -p 8081:8080 --rm -v /:/rootfs:ro -v /var/run:/var/run:rw -v /sys:/sys:ro -v /var/lib/docker/:/var/lib/docker:ro -v /dev/disk:/dev/disk:ro google/cadvisor +# echo "starting keycloak & gui" >> /home/ubuntu/hobbit.log +# sudo docker-compose up -d keycloak gui +# sudo sed -i "s~-H fd://~-H tcp://0.0.0.0:2376 -H unix:///var/run/docker.sock~g" /etc/systemd/system/docker.service +# echo "export GITLAB_USER=${GitlabUser} >> /etc/environment" +# echo "export GITLAB_EMAIL=${GitlabEmail} >> /etc/environment" +# echo "export GITLAB_TOKEN=${GitlabToken} >> /etc/environment" +# +# source /etc/environment +# +# echo "creating networks" >> /home/ubuntu/hobbit.log +# make create-networks >> /home/ubuntu/hobbit.log +# +# echo "pulling images" >> /home/ubuntu/hobbit.log +# sudo docker-compose pull >> /home/ubuntu/hobbit.log +# +# echo "pulling elk images" >> /home/ubuntu/hobbit.log +# sudo docker-compose -f /opt/hobbit-platform-2.0.5/docker-compose-elk.yml pull >> /home/ubuntu/hobbit.log +# +# echo "configuring virtuoso" >> /home/ubuntu/hobbit.log +# make setup-virtuoso >> /home/ubuntu/hobbit.log +# +# sudo chmod 777 /etc/sysctl.conf +# sudo echo "vm.max_map_count=262144" >> /etc/sysctl.conf +# sudo sysctl -p +# +# sudo sed -i "s/-Xms8g/-Xms2g/g" /opt/hobbit-platform-2.0.5/config/elk/jvm.options +# sudo sed -i "s/-Xmx8g/-Xmx2g/g" /opt/hobbit-platform-2.0.5/config/elk/jvm.options +# echo "starting elk" >> /home/ubuntu/hobbit.log +# sudo docker stack deploy --compose-file /opt/hobbit-platform-2.0.5/docker-compose-elk.yml elk >> /home/ubuntu/hobbit.log +# +# echo "starting platform" >> /home/ubuntu/hobbit.log +# sudo docker stack deploy --compose-file /opt/hobbit-platform-2.0.5/docker-compose.yml platform >> /home/ubuntu/hobbit.log +# +# echo "everything should be started" >> /home/ubuntu/hobbit.log +# echo "killing all docker daemons" >> /home/ubuntu/hobbit.log +# sudo kill -9 $(sudo ps -aux | grep docker | awk '{print $2}') + Properties: + ImageId: ami-de8fb135 # Ubuntu 16.04 + InstanceType: !Ref InstanceType + #SecurityGroups: !Ref SecurityGroups + SecurityGroups: + - 'Fn::ImportValue': !Sub '${ParentSecurityGroupsStack}-SecurityGroup' +# - 'Fn::ImportValue': !Sub '${ParentNATStack}-SecurityGroup' + + IamInstanceProfile: !Ref InstanceProfile + KeyName: !If [HasKeyName, !Ref KeyName, !Ref 'AWS::NoValue'] + BlockDeviceMappings: + - DeviceName: "/dev/sda1" + Ebs: + VolumeSize: '30' +# - DeviceName: "/dev/xvdcz" +# Ebs: +# VolumeSize: '22' + UserData: + "Fn::Base64": !Sub | + #!/bin/bash -xe + echo "Executing user data" >> /home/ubuntu/init.log + + echo "Checking internet connection" >> /home/ubuntu/init.log + cat > /opt/ping.sh << 'EOL' + ping -c4 8.8.8.8 + if [ $? -eq 0 ]; then + echo "8.8.8.8 is reachable" >> /home/ubuntu/init.log + else + echo "8.8.8.8 is not reachable" >> /home/ubuntu/init.log + sudo sh /opt/ping.sh + fi + EOL + sudo sh /opt/ping.sh + + echo "sudo apt-get update" >> /home/ubuntu/init.log + sudo apt-get update + + #echo "sudo apt-get -y upgrade" >> /home/ubuntu/init.log + #sudo apt-get -y upgrade + + echo "sudo apt install -y awscli" >> /home/ubuntu/init.log + # Install AWSCli + sudo apt install -y awscli + + # Install cfn-init for Ubuntu + apt-get -y install python-setuptools + echo "apt-get -y install python-setuptools" >> /home/ubuntu/init.log + easy_install https://s3.amazonaws.com/cloudformation-examples/aws-cfn-bootstrap-latest.tar.gz + ln -s /root/aws-cfn-bootstrap-latest/init/ubuntu/cfn-hup /etc/init.d/cfn-hup + + echo "Creating cfn script" >> /home/ubuntu/init.log + echo "echo 'Starting launch configuration' >> /home/ubuntu/init.log" >> /home/ubuntu/cfn.sh + echo "cfn-init -v --region ${AWS::Region} --stack ${AWS::StackName} --resource LaunchConfiguration" >> /home/ubuntu/cfn.sh + echo "cfn-signal -e $? --region ${AWS::Region} --stack ${AWS::StackName} --resource AutoScalingGroup" >> /home/ubuntu/cfn.sh + echo "echo 'Signals should be sent' >> /home/ubuntu/init.log" >> /home/ubuntu/cfn.sh + echo "Executing cfn script" >> /home/ubuntu/init.log + sudo sh /home/ubuntu/cfn.sh >> /home/ubuntu/init.log + + +# cfn-init -v --region eu-central-1 --stack swarm-manager --resource LaunchConfiguration +# cfn-signal -s true --region eu-central-1 --stack swarm-manager --resource AutoScalingGroup + +#Outputs: +# AutoScalingGroup: +# Description: 'Use this AutoScaling Group to identify Swarm Managers.' +# Value: !Ref AutoScalingGroup +# Export: +# Name: !Sub '${AWS::StackName}-AutoScalingGroup' + diff --git a/platform-controller/AWS/swarm-mode/securitygroups.yaml b/platform-controller/AWS/swarm-mode/securitygroups.yaml new file mode 100644 index 00000000..9d3a7085 --- /dev/null +++ b/platform-controller/AWS/swarm-mode/securitygroups.yaml @@ -0,0 +1,116 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: 'Docker Swarm - SecurityGroups' + +# https://github.com/pgarbe/containers_on_aws + +Parameters: + + ParentVPCStack: + Description: 'Stack name of parent VPC stack based on vpc/vpc-*azs.yaml template.' + Type: String + + ParentSSHBastionStack: + Description: 'Optional Stack name of parent SSH bastion host/instance stack based on vpc/vpc-ssh-bastion.yaml template.' + Type: String + Default: '' + + +Conditions: + HasSSHBastionSecurityGroup: !Not [!Equals [!Ref ParentSSHBastionStack, '']] + HasNotSSHBastionSecurityGroup: !Equals [!Ref ParentSSHBastionStack, ''] + +Resources: + + SwarmSecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupDescription: !Sub ${AWS::StackName} + VpcId: + 'Fn::ImportValue': !Sub '${ParentVPCStack}-VPC' + + SecurityGroupInSSHBastion: + Type: 'AWS::EC2::SecurityGroupIngress' + Condition: HasSSHBastionSecurityGroup + Properties: + GroupId: !Ref SwarmSecurityGroup + IpProtocol: tcp + FromPort: 22 + ToPort: 22 + SourceSecurityGroupId: + 'Fn::ImportValue': !Sub '${ParentSSHBastionStack}-SecurityGroup' + SecurityGroupInSSHBastion2: + Type: 'AWS::EC2::SecurityGroupIngress' + Condition: HasNotSSHBastionSecurityGroup + Properties: + GroupId: !Ref SwarmSecurityGroup + IpProtocol: tcp + FromPort: 22 + ToPort: 22 + SourceSecurityGroupId: + 'Fn::ImportValue': !Sub '${ParentVPCStack}-BastionSecurityGroup' + + SecurityGroupPing: + Type: 'AWS::EC2::SecurityGroupIngress' + Properties: + GroupId: !Ref SwarmSecurityGroup + IpProtocol: icmp + FromPort: 8 + ToPort: 0 + CidrIp: {'Fn::ImportValue': !Sub '${ParentVPCStack}-CidrBlock'} + Description: "Echo requests from other machines of the VPC cluster" + + SecurityGroupInClusterDocker1: + Type: 'AWS::EC2::SecurityGroupIngress' + Properties: + GroupId: !Ref SwarmSecurityGroup + IpProtocol: tcp + FromPort: 2376 + ToPort: 2376 + SourceSecurityGroupId: + 'Fn::ImportValue': !Sub '${ParentVPCStack}-BastionSecurityGroup' + + SecurityGroupInClusterRabbit: + Type: 'AWS::EC2::SecurityGroupIngress' + Properties: + GroupId: !Ref SwarmSecurityGroup + IpProtocol: tcp + FromPort: 5672 + ToPort: 5672 + SourceSecurityGroupId: + 'Fn::ImportValue': !Sub '${ParentVPCStack}-BastionSecurityGroup' + + SecurityGroupSelfIngress: + Type: 'AWS::EC2::SecurityGroupIngress' + Properties: + GroupId: !Ref SwarmSecurityGroup + IpProtocol: tcp + FromPort: 0 + ToPort: 65535 + SourceSecurityGroupId: !Ref SwarmSecurityGroup + + SecurityGroupInClusterDocker7946Udp: + Type: 'AWS::EC2::SecurityGroupIngress' + Properties: + GroupId: !Ref SwarmSecurityGroup + IpProtocol: udp + FromPort: 7946 + ToPort: 7946 + SourceSecurityGroupId: !Ref SwarmSecurityGroup + + SecurityGroupInSwarmOverlayNetwork: + Type: 'AWS::EC2::SecurityGroupIngress' + Properties: + GroupId: !Ref SwarmSecurityGroup + IpProtocol: udp + FromPort: 4789 + ToPort: 4789 + SourceSecurityGroupId: !Ref SwarmSecurityGroup + + +Outputs: + + SecurityGroup: + Description: Security group for internal swarm communication + Value: !Ref SwarmSecurityGroup + Export: + Name: !Sub '${AWS::StackName}-SecurityGroup' \ No newline at end of file diff --git a/platform-controller/AWS/swarm-mode/worker.yaml b/platform-controller/AWS/swarm-mode/worker.yaml new file mode 100644 index 00000000..e30d6434 --- /dev/null +++ b/platform-controller/AWS/swarm-mode/worker.yaml @@ -0,0 +1,487 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: 'Docker Swarm - Worker' + +# https://github.com/pgarbe/containers_on_aws + +Parameters: + + ParentVPCStack: + Description: 'Stack name of parent VPC stack based on vpc/vpc-*azs.yaml template.' + Type: String + + KeyName: + Description: 'Optional key pair of the ec2-user to establish a SSH connection to the EC2 instance.' + Type: String + + IAMUserSSHAccess: + Description: 'Synchronize public keys of IAM users to enable personalized SSH access (Doc: https://cloudonaut.io/manage-aws-ec2-ssh-access-with-iam/).' + Type: String + Default: false + AllowedValues: + - true + - false + + InstanceType: + Description: 'The instance type for the EC2 instance.' + Type: String + Default: 't2.micro' + + DesiredCapacity: + Description: 'The number of worker nodes' + Type: Number + Default: 1 + + MaxSize: + Description: 'MaxSize of worker nodes' + Type: Number + Default: 1 + + WorkerSubnetsReach: + Description: 'Should the managers have direct access to the Internet or do you prefer private subnets with NAT?' + Type: String + Default: Private + AllowedValues: + - Public + - Private + + WorkerType: + Description: 'WorkerType to be specified in HOBBIT labels' + Type: String + Default: '' + NoEcho: true + +# SecurityGroups: +# Description: Security group for which are allowed to talk to ASG +# Type: CommaDelimitedList + + ParentSecurityGroupsStack: + Description: 'ParentSecurityGroupsStack' + Type: String + + TargetGroups: + Description: Security group for which are allowed to talk to ASG + Type: CommaDelimitedList + Default: '' + + DockerVersion: + Description: 'Specifies the version of the Docker engine' + Type: String + Default: "17.03.0" + + DockerRepository: + Description: 'Specifies if stable or edge repository should be used' + Type: String + Default: stable + AllowedValues: + - stable + - edge + + ParentKeysManagementStack: + Description: 'ParentKeysManagementStack' + Type: String + + BucketName: + Description: 'Bucket name for placing join tokens' + Type: String + Default: '' + Tag: + Type: String + Default: 'Hobbit' +Conditions: + + HasKeyName: !Not [!Equals [!Ref KeyName, '']] + HasIAMUserSSHAccess: !Equals [!Ref IAMUserSSHAccess, 'true'] + +Resources: + InstanceProfile: + Type: 'AWS::IAM::InstanceProfile' + Properties: + Path: '/' + Roles: + - !Ref IAMRole + + IAMRole: + Type: 'AWS::IAM::Role' + Properties: + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: + - 'ec2.amazonaws.com' + Action: + - 'sts:AssumeRole' + Path: '/' + Policies: + - PolicyName: logs + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 'logs:CreateLogGroup' + - 'logs:CreateLogStream' + - 'logs:PutLogEvents' + - 'logs:DescribeLogStreams' + Resource: + - 'arn:aws:logs:*:*:*' + - PolicyName: asg + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 'autoscaling:DescribeAutoScalingGroups' + - 'autoscaling:DescribeAutoScalingInstances' + - 'ec2:DescribeInstances' + Resource: + - '*' + - PolicyName: kms + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 'kms:Decrypt' + - 'kms:DescribeKey' + Resource: + 'Fn::ImportValue': !Sub '${ParentKeysManagementStack}-SwarmTokenKeyArn' + #- !Ref JoinTokenKmsKey + + IAMPolicySSHAccess: + Type: 'AWS::IAM::Policy' + Condition: HasIAMUserSSHAccess + Properties: + Roles: + - !Ref IAMRole + PolicyName: iam + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 'iam:ListUsers' + Resource: + - '*' + - Effect: Allow + Action: + - 'iam:ListSSHPublicKeys' + - 'iam:GetSSHPublicKey' + Resource: + - !Sub 'arn:aws:iam::${AWS::AccountId}:user/*' + + AutoScalingGroup: + Type: AWS::AutoScaling::AutoScalingGroup + Properties: + # AvailabilityZones: !Ref AvailabilityZones + VPCZoneIdentifier: + - 'Fn::ImportValue': !Sub '${ParentVPCStack}-SubnetA${WorkerSubnetsReach}' + #- 'Fn::ImportValue': !Sub '${ParentVPCStack}-SubnetB${WorkerSubnetsReach}' + #- 'Fn::ImportValue': !Sub '${ParentVPCStack}-SubnetC${WorkerSubnetsReach}' + LaunchConfigurationName: !Ref LaunchConfiguration + MinSize: 0 + MaxSize: !Ref MaxSize + DesiredCapacity: !Ref DesiredCapacity + #TargetGroupARNs: !Ref TargetGroups + MetricsCollection: + - Granularity: 1Minute + Metrics: + - GroupInServiceInstances + Tags: + - Key: Cluster + Value: !Ref Tag + PropagateAtLaunch: true + - Key: Name + Value: !Sub ${AWS::StackName} + PropagateAtLaunch: 'true' + CreationPolicy: + ResourceSignal: + Timeout: PT10M + UpdatePolicy: + AutoScalingRollingUpdate: + MinInstancesInService: 1 + MaxBatchSize: '1' + PauseTime: PT10M + SuspendProcesses: + - AlarmNotification + WaitOnResourceSignals: 'true' + + LaunchConfiguration: + Type: AWS::AutoScaling::LaunchConfiguration + Metadata: + AWS::CloudFormation::Init: + configSets: + default: + #!If [HasIAMUserSSHAccess, [docker-ubuntu, swarm], [docker-ubuntu, swarm]] + !If [HasIAMUserSSHAccess, [docker-ubuntu, swarm], [docker-ubuntu, swarm]] + #!If [HasIAMUserSSHAccess, [ssh-access, docker-ubuntu, swarm], [docker-ubuntu, swarm]] +# ssh-access: +# files: +# '/opt/authorized_keys_command.sh': +# content: | +# #!/bin/bash -e +# if [ -z "$1" ]; then +# exit 1 +# fi +# SaveUserName="$1" +# SaveUserName=${SaveUserName//"+"/".plus."} +# SaveUserName=${SaveUserName//"="/".equal."} +# SaveUserName=${SaveUserName//","/".comma."} +# SaveUserName=${SaveUserName//"@"/".at."} +# aws iam list-ssh-public-keys --user-name "$SaveUserName" --query "SSHPublicKeys[?Status == 'Active'].[SSHPublicKeyId]" --output text | while read KeyId; do +# aws iam get-ssh-public-key --user-name "$SaveUserName" --ssh-public-key-id "$KeyId" --encoding SSH --query "SSHPublicKey.SSHPublicKeyBody" --output text +# done +# mode: '000755' +# owner: root +# group: root +# '/opt/import_users.sh': +# content: | +# #!/bin/bash +# aws iam list-users --query "Users[].[UserName]" --output text | while read User; do +# SaveUserName="$User" +# SaveUserName=${SaveUserName//"+"/".plus."} +# SaveUserName=${SaveUserName//"="/".equal."} +# SaveUserName=${SaveUserName//","/".comma."} +# SaveUserName=${SaveUserName//"@"/".at."} +# if id -u "$SaveUserName" >/dev/null 2>&1; then +# echo "$SaveUserName exists" +# else +# #sudo will read each file in /etc/sudoers.d, skipping file names that end in ‘~’ or contain a ‘.’ character to avoid causing problems with package manager or editor temporary/backup files. +# SaveUserFileName=$(echo "$SaveUserName" | tr "." " ") +# /usr/sbin/adduser "$SaveUserName" +# echo "$SaveUserName ALL=(ALL) NOPASSWD:ALL" > "/etc/sudoers.d/$SaveUserFileName" +# fi +# done +# mode: '000755' +# owner: root +# group: root +# '/etc/cron.d/import_users': +# content: | +# */10 * * * * root /opt/import_users.sh +# mode: '000644' +# owner: root +# group: root +# commands: +# 'a_configure_sshd_command': +# command: 'sed -i "s:#AuthorizedKeysCommand none:AuthorizedKeysCommand /opt/authorized_keys_command.sh:g" /etc/ssh/sshd_config' +# 'b_configure_sshd_commanduser': +# command: 'sed -i "s:#AuthorizedKeysCommandUser nobody:AuthorizedKeysCommandUser nobody:g" /etc/ssh/sshd_config' +# 'c_import_users': +# command: './import_users.sh' +# cwd: '/opt' +# services: +# sysvinit: +# sshd: +# enabled: true +# ensureRunning: true +# commands: +# - 'a_configure_sshd_command' +# - 'b_configure_sshd_commanduser' + + docker-ubuntu: + commands: + 'a_install_prereqs': + command: 'sudo apt-get install apt-transport-https ca-certificates curl software-properties-common htop supervisor -y' + 'b_set_gpg_key': + command: 'curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -' + 'c_add_fingerprint': + command: 'sudo apt-key fingerprint 0EBFCD88' + 'd_add_docker_repo': + command: !Sub 'sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) ${DockerRepository}"' + 'e_update_aptget': + command: 'sudo apt-get update' + 'f_install_docker': + command: !Sub 'sudo apt-get install -y docker-ce=${DockerVersion}~ce-0~ubuntu' + 'g_create_service': + command: 'sudo service docker start' + 'h_add_ubuntu_user_to_docker_group': + command: 'sudo usermod -aG docker ubuntu' + 'i_verify_installation': + command: 'sudo docker run hello-world' + 'k_verify_installation': + command: 'docker run hello-world' + swarm: + commands: + 'a_join_swarm': + command: !Sub | + echo "modifying docker service" >> /home/ubuntu/hobbit.log + sudo sed -i "s~-H fd://~-H tcp://0.0.0.0:2376 -H unix:///var/run/docker.sock~g" /lib/systemd/system/docker.service + echo "reloading service daemon" >> /home/ubuntu/hobbit.log + sudo systemctl daemon-reload + echo "restaring service" >> /home/ubuntu/hobbit.log + sudo service docker restart + echo "checking 2376 port with netstat" >> /home/ubuntu/hobbit.log + netstat -atn | grep 2376 >> /home/ubuntu/hobbit.log + + echo "swarm -> a_join_swarm" >> /home/ubuntu/swarm.log + + cat > /home/ubuntu/joinSwarmIfNeeded.sh << 'EOL' + date + FORMERMANAGER=$(cat /home/ubuntu/worker_join.sh | awk '{print $6}' | tr ":" "\n" | awk '{if(NR==1)print}') + echo "Former manager: $FORMERMANAGER" + echo "Downloading worker_join.sh from the s3" + aws s3api get-object --bucket ${BucketName} --key worker_join.sh /home/ubuntu/worker_join.sh --region=${AWS::Region} + NEWMANAGER=$(cat /home/ubuntu/worker_join.sh | awk '{print $6}' | tr ":" "\n" | awk '{if(NR==1)print}') + echo "New manager: $NEWMANAGER" + if [ "$NEWMANAGER" != "$FORMERMANAGER" ] + then + echo "Leaving former swarm at $FORMERMANAGER" + docker swarm leave + echo "Killing all running containers" + sudo docker rm $(sudo docker stop $(sudo docker ps -a | awk '{print $1}')) + echo "Joining new swarm at $NEWMANAGER" + sh /home/ubuntu/worker_join.sh + echo "sudo sh /home/ubuntu/sendLabels.sh ${WorkerType} $NEWMANAGER" + #sudo chmod 777 /etc/hosts + #sudo sed -i "s~$FORMERMANAGER~#$FORMERMANAGER~g" /etc/hosts + #sudo echo "$NEWMANAGER rabbit" >> /etc/hosts + fi + sudo sh /home/ubuntu/sendLabels.sh ${WorkerType} $NEWMANAGER + EOL + + cat > /home/ubuntu/sendLabels.sh << 'EOL' + echo "Sending labels to master $2" + NODE_ID=$(docker info | grep NodeID | awk '{print $2}') + IP=$(docker info | grep Name | awk '{print $2}' | cut -c4-15) + + echo 'echo "$NODE_ID --label-add org.hobbit.type=worker" | netcat $2 4444' + echo 'echo "$NODE_ID --label-add org.hobbit.workergroup=$1" | netcat $2 4444' + echo 'echo "$NODE_ID --label-add org.hobbit.name=$1_worker_$IP" | netcat $2 4444' + + echo "$NODE_ID --label-add org.hobbit.type=worker" | netcat $2 4444 + echo "$NODE_ID --label-add org.hobbit.workergroup=$1" | netcat $2 4444 + echo "$NODE_ID --label-add org.hobbit.name=$1_worker_$IP" | netcat $2 4444 + echo "Labels should be sent" + EOL + + cat > /home/ubuntu/joinLoop.sh << 'EOL' + sudo install -m 777 /dev/null /var/log/joinLoop.log + while : + do + sudo sh /home/ubuntu/joinSwarmIfNeeded.sh > /var/log/joinLoop.log + sleep 15 + done + EOL + + echo "adding joinLoop to /etc/supervisor/supervisord.conf" >> /home/ubuntu/init.log + cat > /etc/supervisor/supervisord.conf << 'EOL' + [supervisord] + [program:joinLoop] + command=/bin/bash /home/ubuntu/joinLoop.sh + autostart=true + autorestart=true + EOL + + echo "restarting supervisor service" >> /home/ubuntu/init.log + sudo service supervisor restart & + echo "join_loop should be started" >> /home/ubuntu/init.log + + echo "starting node-exporter" >> /home/ubuntu/hobbit.log + docker run -d --name node-exporter -p 9100:9100 --rm prom/node-exporter + + echo "starting container-exporter" >> /home/ubuntu/hobbit.log + docker run -d --name container-exporter -p 9104:9104 --rm -v /var/run/docker.sock:/var/run/docker.sock:ro -v /sys/fs/cgroup:/cgroup:rw prom/container-exporter + + echo "starting cAdvisor" >> /home/ubuntu/hobbit.log + docker run -d --name cAdvisor -p 8081:8080 --rm -v /:/rootfs:ro -v /var/run:/var/run:rw -v /sys:/sys:ro -v /var/lib/docker/:/var/lib/docker:ro -v /dev/disk:/dev/disk:ro google/cadvisor + + + # for ID in $(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names ${SwarmManagerAutoScalingGroup} --region ${AWS::Region} --query AutoScalingGroups[].Instances[].InstanceId --output text); +# do +# IP=$(aws ec2 describe-instances --instance-ids $ID --region ${AWS::Region} --query Reservations[].Instances[].PrivateIpAddress --output text) +# if [ ! -z "$IP" ] ; then +# #echo "Try to join swarm with IP $IP" >> /home/ubuntu/init.log +# # Join the swarm; if it fails try the next one +# +# #echo "docker swarm join --token $JOIN_TOKEN $IP:2377" >> /home/ubuntu/swarm.log +# #docker swarm join --token $JOIN_TOKEN $IP:2377 && break || continue +# +# #NODE_ADDRESS=$(docker info | grep "Node Address" | awk '{print $3}') +# echo "Sending labels to master (NODE_ID is $NODE_ID)" >> /home/ubuntu/swarm.log +# +# cat > /home/ubuntu/labels.sh << 'EOL' +# NODE_ID=$(docker info | grep NodeID | awk '{print $2}') +# echo 'echo "$NODE_ID --label-add org.hobbit.type=worker" | netcat $2 4444' +# echo "$NODE_ID --label-add org.hobbit.type=worker" | netcat $2 4444 +# echo 'echo "$NODE_ID --label-add org.hobbit.workergroup=$1" | netcat $2 4444' +# echo "$NODE_ID --label-add org.hobbit.workergroup=$1" | netcat $2 4444 +# echo 'echo "$NODE_ID --label-add org.hobbit.name=worker" | netcat $2 4444' +# echo "$NODE_ID --label-add org.hobbit.name=worker" | netcat $2 4444 +# EOL +# +# echo "sudo /home/ubuntu/labels.sh ${WorkerType} $IP" >> /home/ubuntu/swarm.log +# sudo sh /home/ubuntu/labels.sh ${WorkerType} $IP >> /home/ubuntu/swarm.log +# +# #echo "abc --label-add org.hobbit.type=data" | netcat localhost 4444 +# #echo "$NODE_ID --label-add org.hobbit.type=worker" | netcat $IP 4444 +# #echo "$NODE_ID --label-add org.hobbit.workergroup=${WorkerType}" | netcat $IP 4444 +# #echo "$NODE_ID --label-add org.hobbit.name=worker" | netcat $IP 4444 +# +# echo "Labels should be sent to master" >> /home/ubuntu/swarm.log +# +# echo "$IP rabbit" >> /etc/hosts +# fi +# done + + 'b_swarm_healthcheck': + command: + echo "swarm -> b_swarm_healthcheck" >> /home/ubuntu/init.log + docker info --format "{{.Swarm.NodeID}}" >> /home/ubuntu/init.log + + Properties: + ImageId: ami-de8fb135 # Ubuntu 16.04 + InstanceType: !Ref InstanceType + SecurityGroups: + - 'Fn::ImportValue': !Sub '${ParentSecurityGroupsStack}-SecurityGroup' +# - !Ref SwarmSecurityGroup + + IamInstanceProfile: !Ref InstanceProfile + KeyName: !If [HasKeyName, !Ref KeyName, !Ref 'AWS::NoValue'] + BlockDeviceMappings: + - DeviceName: "/dev/sda1" + Ebs: + VolumeSize: '30' + UserData: + "Fn::Base64": !Sub | + #!/bin/bash -xe + + echo "Executing user data" >> /home/ubuntu/init.log + + echo "Checking internet connection" >> /home/ubuntu/init.log + cat > /opt/ping.sh << 'EOL' + ping -c4 8.8.8.8 + if [ $? -eq 0 ]; then + echo "8.8.8.8 is reachable" >> /home/ubuntu/init.log + else + echo "8.8.8.8 is not reachable" >> /home/ubuntu/init.log + sudo sh /opt/ping.sh + fi + EOL + sudo sh /opt/ping.sh + + echo "sudo apt-get update" >> /home/ubuntu/init.log + sudo apt-get update + + #echo "sudo apt-get -y upgrade" >> /home/ubuntu/init.log + #sudo apt-get -y upgrade + + echo "sudo apt install -y awscli" >> /home/ubuntu/init.log + # Install AWSCli + sudo apt install -y awscli + + # Install cfn-init for Ubuntu + apt-get -y install python-setuptools + easy_install https://s3.amazonaws.com/cloudformation-examples/aws-cfn-bootstrap-latest.tar.gz + ln -s /root/aws-cfn-bootstrap-latest/init/ubuntu/cfn-hup /etc/init.d/cfn-hup + + echo "Creating cfn script" >> /home/ubuntu/init.log + echo "cfn-init -v --region ${AWS::Region} --stack ${AWS::StackName} --resource LaunchConfiguration" >> /home/ubuntu/cfn.sh + echo "cfn-signal -e $? --region ${AWS::Region} --stack ${AWS::StackName} --resource AutoScalingGroup" >> /home/ubuntu/cfn.sh + echo "echo 'Signals should be sent' >> /home/ubuntu/init.log" >> /home/ubuntu/cfn.sh + + sh /home/ubuntu/cfn.sh >> /home/ubuntu/init.log + +#Outputs: +# SwarmManagerAutoScalingGroup: +# Value: !Sub 'arn:aws:s3:::${BucketName}/*' diff --git a/platform-controller/AWS/vpc-1azs.yaml b/platform-controller/AWS/vpc-1azs.yaml new file mode 100644 index 00000000..20fe98b0 --- /dev/null +++ b/platform-controller/AWS/vpc-1azs.yaml @@ -0,0 +1,326 @@ +--- +# Copyright 2018 widdix GmbH +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +AWSTemplateFormatVersion: '2010-09-09' +Description: 'VPC: public and private subnets in one availability zone, a cloudonaut.io template' +Metadata: + 'AWS::CloudFormation::Interface': + ParameterGroups: + - Label: + default: 'VPC Parameters' + Parameters: + - ClassB +Parameters: + ClassB: + Description: 'Class B of VPC (10.XXX.0.0/16)' + Type: Number + Default: 0 + ConstraintDescription: 'Must be in the range [0-255]' + MinValue: 0 + MaxValue: 255 + Tag: + Type: String + Default: 'Hobbit' +Resources: + VPC: + Type: 'AWS::EC2::VPC' + Properties: + CidrBlock: !Sub '10.${ClassB}.0.0/16' + EnableDnsSupport: true + EnableDnsHostnames: true + InstanceTenancy: default + Tags: + - Key: Cluster + Value: !Ref Tag + InternetGateway: + Type: 'AWS::EC2::InternetGateway' + Properties: + Tags: + - Key: Name + Value: !Sub '${AWS::StackName}-10.${ClassB}.0.0/16' + - Key: Cluster + Value: !Ref Tag + VPCGatewayAttachment: + Type: 'AWS::EC2::VPCGatewayAttachment' + Properties: + VpcId: !Ref VPC + InternetGatewayId: !Ref InternetGateway + RouteTablePublicInternetRoute: # should be RouteTablePublicAInternetRoute, but logical id was not changed for backward compatibility + Type: 'AWS::EC2::Route' + DependsOn: VPCGatewayAttachment + Properties: + RouteTableId: !Ref RouteTablePublic + DestinationCidrBlock: '0.0.0.0/0' + GatewayId: !Ref InternetGateway + SubnetAPublic: + Type: 'AWS::EC2::Subnet' + Properties: + AvailabilityZone: !Select [0, !GetAZs ''] + CidrBlock: !Sub '10.${ClassB}.0.0/20' + MapPublicIpOnLaunch: true + VpcId: !Ref VPC + Tags: + - Key: Name + Value: !Sub '${AWS::StackName}-A public' + - Key: Reach + Value: public + - Key: Cluster + Value: !Ref Tag + SubnetAPrivate: + Type: 'AWS::EC2::Subnet' + Properties: + AvailabilityZone: !Select [0, !GetAZs ''] + CidrBlock: !Sub '10.${ClassB}.16.0/20' + VpcId: !Ref VPC + Tags: + - Key: Name + Value: !Sub '${AWS::StackName}-A private' + - Key: Reach + Value: private + - Key: Cluster + Value: !Ref Tag + RouteTablePublic: # should be RouteTableAPublic, but logical id was not changed for backward compatibility + Type: 'AWS::EC2::RouteTable' + Properties: + VpcId: !Ref VPC + Tags: + - Key: Name + Value: !Sub '${AWS::StackName}-A public' + - Key: Cluster + Value: !Ref Tag + RouteTablePrivate: # should be RouteTableAPrivate, but logical id was not changed for backward compatibility + Type: 'AWS::EC2::RouteTable' + Properties: + VpcId: !Ref VPC + Tags: + - Key: Name + Value: !Sub '${AWS::StackName}-A private' + - Key: Cluster + Value: !Ref Tag + RouteTableAssociationAPublic: + Type: 'AWS::EC2::SubnetRouteTableAssociation' + Properties: + SubnetId: !Ref SubnetAPublic + RouteTableId: !Ref RouteTablePublic + RouteTableAssociationAPrivate: + Type: 'AWS::EC2::SubnetRouteTableAssociation' + Properties: + SubnetId: !Ref SubnetAPrivate + RouteTableId: !Ref RouteTablePrivate + NetworkAclPublic: + Type: 'AWS::EC2::NetworkAcl' + Properties: + VpcId: !Ref VPC + Tags: + - Key: Name + Value: Public + - Key: Cluster + Value: !Ref Tag + NetworkAclPrivate: + Type: 'AWS::EC2::NetworkAcl' + Properties: + VpcId: !Ref VPC + Tags: + - Key: Name + Value: Private + - Key: Cluster + Value: !Ref Tag + SubnetNetworkAclAssociationAPublic: + Type: 'AWS::EC2::SubnetNetworkAclAssociation' + Properties: + SubnetId: !Ref SubnetAPublic + NetworkAclId: !Ref NetworkAclPublic + SubnetNetworkAclAssociationAPrivate: + Type: 'AWS::EC2::SubnetNetworkAclAssociation' + Properties: + SubnetId: !Ref SubnetAPrivate + NetworkAclId: !Ref NetworkAclPrivate + NetworkAclEntryInPublicAllowAll: + Type: 'AWS::EC2::NetworkAclEntry' + Properties: + NetworkAclId: !Ref NetworkAclPublic + RuleNumber: 99 + Protocol: -1 + RuleAction: allow + Egress: false + CidrBlock: '0.0.0.0/0' + NetworkAclEntryOutPublicAllowAll: + Type: 'AWS::EC2::NetworkAclEntry' + Properties: + NetworkAclId: !Ref NetworkAclPublic + RuleNumber: 99 + Protocol: -1 + RuleAction: allow + Egress: true + CidrBlock: '0.0.0.0/0' + NetworkAclEntryInPrivateAllowVPC: + Type: 'AWS::EC2::NetworkAclEntry' + Properties: + NetworkAclId: !Ref NetworkAclPrivate + RuleNumber: 99 + Protocol: -1 + RuleAction: allow + Egress: false + CidrBlock: '0.0.0.0/0' + NetworkAclEntryOutPrivateAllowVPC: + Type: 'AWS::EC2::NetworkAclEntry' + Properties: + NetworkAclId: !Ref NetworkAclPrivate + RuleNumber: 99 + Protocol: -1 + RuleAction: allow + Egress: true + CidrBlock: '0.0.0.0/0' +# BastionSecurityGroup: +# Type: 'AWS::EC2::SecurityGroup' +# Properties: +# GroupDescription: !Sub '${AWS::StackName}-bastion-security-group' +# SecurityGroupIngress: +# - IpProtocol: tcp +# FromPort: 22 +# ToPort: 22 +# CidrIp: '0.0.0.0/0' +## - IpProtocol: tcp +## FromPort: 2376 +## ToPort: 2376 +## CidrIp: '0.0.0.0/0' +# VpcId: !Ref VPC +# Tags: +# - Key: Cluster +# Value: !Ref Tag + BastionSecurityGroup: + Type: 'AWS::EC2::SecurityGroup' + Properties: + GroupDescription: !Sub '${AWS::StackName}-bastion-security-group' + SecurityGroupEgress: +# - IpProtocol: udp +# FromPort: 123 +# ToPort: 123 +# CidrIp: '0.0.0.0/0' + - IpProtocol: tcp + FromPort: 0 + ToPort: 65535 + CidrIp: '0.0.0.0/0' + Description: "All tcp ports in outside world" + - IpProtocol: icmp + FromPort: 8 + ToPort: 0 + CidrIp: '0.0.0.0/0' + Description: "Echo requests to any host in outside world" + SecurityGroupIngress: + - IpProtocol: tcp + FromPort: 22 + ToPort: 22 + CidrIp: '0.0.0.0/0' +# - IpProtocol: udp +# FromPort: 123 +# ToPort: 123 +# CidrIp: !GetAtt 'VPC.CidrBlock' + - IpProtocol: tcp + FromPort: 0 + ToPort: 65535 + CidrIp: !GetAtt 'VPC.CidrBlock' + Description: "All TCP requests from other machines of the cluster" + - IpProtocol: icmp + FromPort: 8 + ToPort: 0 + CidrIp: !GetAtt 'VPC.CidrBlock' + Description: "Echo requests from other machines of the cluster" + VpcId: !Ref VPC + Tags: + - Key: Cluster + Value: !Ref Tag +Outputs: + TemplateID: + Description: 'cloudonaut.io template id.' + Value: 'vpc/vpc-1azs' + TemplateVersion: + Description: 'cloudonaut.io template version.' + Value: 'latest' + StackName: + Description: 'Stack name.' + Value: !Sub '${AWS::StackName}' + AZs: + Description: 'AZs' + Value: 1 + Export: + Name: !Sub '${AWS::StackName}-AZs' + AZA: + Description: 'AZ of A' + Value: !Select [0, !GetAZs ''] + Export: + Name: !Sub '${AWS::StackName}-AZA' + ClassB: + Description: 'Deprecated in v7, will be removed in v8, use CidrBlock instead! Class B.' + Value: !Ref ClassB + Export: + Name: !Sub '${AWS::StackName}-ClassB' + CidrBlock: + Description: 'The set of IP addresses for the VPC.' + Value: !GetAtt 'VPC.CidrBlock' + Export: + Name: !Sub '${AWS::StackName}-CidrBlock' + VPC: + Description: 'VPC.' + Value: !Ref VPC + Export: + Name: !Sub '${AWS::StackName}-VPC' + SubnetsPublic: + Description: 'Subnets public.' + Value: !Join [',', [!Ref SubnetAPublic]] + Export: + Name: !Sub '${AWS::StackName}-SubnetsPublic' + SubnetsPrivate: + Description: 'Subnets private.' + Value: !Join [',', [!Ref SubnetAPrivate]] + Export: + Name: !Sub '${AWS::StackName}-SubnetsPrivate' + + RouteTablesPrivate: + Description: 'Route tables private.' + Value: !Join [',', [!Ref RouteTablePrivate]] + Export: + Name: !Sub '${AWS::StackName}-RouteTablesPrivate' + RouteTablesPublic: + Description: 'Route tables public.' + Value: !Join [',', [!Ref RouteTablePublic]] + Export: + Name: !Sub '${AWS::StackName}-RouteTablesPublic' + + SubnetAPublic: + Description: 'Subnet A public.' + Value: !Ref SubnetAPublic + Export: + Name: !Sub '${AWS::StackName}-SubnetAPublic' + RouteTableAPublic: + Description: 'Route table A public.' + Value: !Ref RouteTablePublic + Export: + Name: !Sub '${AWS::StackName}-RouteTableAPublic' + + SubnetAPrivate: + Description: 'Subnet A private.' + Value: !Ref SubnetAPrivate + Export: + Name: !Sub '${AWS::StackName}-SubnetAPrivate' + RouteTableAPrivate: + Description: 'Route table A private.' + Value: !Ref RouteTablePrivate + Export: + Name: !Sub '${AWS::StackName}-RouteTableAPrivate' + SecurityGroup: + Description: 'Use this Security Group to reference incoming traffic from the SSH bastion host/instance.' + Value: !Ref BastionSecurityGroup + Export: + Name: !Sub '${AWS::StackName}-BastionSecurityGroup' \ No newline at end of file diff --git a/platform-controller/pom.xml b/platform-controller/pom.xml index 4371e763..966b18bf 100644 --- a/platform-controller/pom.xml +++ b/platform-controller/pom.xml @@ -43,6 +43,18 @@ org.hobbit core + + + + org.hobbit + aws-controller + 1.0.0-SNAPSHOT + commons-io diff --git a/platform-controller/src/main/java/org/hobbit/controller/ExperimentManager.java b/platform-controller/src/main/java/org/hobbit/controller/ExperimentManager.java index 7c29675a..3ad973f4 100644 --- a/platform-controller/src/main/java/org/hobbit/controller/ExperimentManager.java +++ b/platform-controller/src/main/java/org/hobbit/controller/ExperimentManager.java @@ -19,21 +19,16 @@ import java.io.Closeable; import java.io.IOException; import java.io.StringWriter; -import java.util.HashSet; -import java.util.Objects; -import java.util.Set; -import java.util.Timer; -import java.util.TimerTask; +import java.util.*; import java.util.concurrent.Semaphore; import java.util.function.Function; +import com.spotify.docker.client.messages.Volume; import org.apache.commons.io.IOUtils; -import org.apache.jena.rdf.model.Model; -import org.apache.jena.rdf.model.NodeIterator; -import org.apache.jena.rdf.model.Property; -import org.apache.jena.rdf.model.ResIterator; -import org.apache.jena.rdf.model.Resource; +import org.apache.jena.rdf.model.*; +import org.apache.jena.rdf.model.impl.PropertyImpl; import org.apache.jena.vocabulary.RDF; +import org.hobbit.controller.docker.CloudClusterManager; import org.hobbit.controller.config.HobbitConfig; import org.hobbit.controller.data.ExperimentConfiguration; import org.hobbit.controller.data.ExperimentStatus; @@ -56,6 +51,8 @@ import com.spotify.docker.client.exceptions.DockerException; +import static org.hobbit.controller.docker.DockerUtility.getDockerClient; + /** * This class encapsulates (and synchronizes) all methods that are applied on a * running experiment. @@ -66,6 +63,8 @@ public class ExperimentManager implements Closeable { private static final Logger LOGGER = LoggerFactory.getLogger(ExperimentManager.class); private static final int DEFAULT_MAX_EXECUTION_TIME = 20 * 60 * 1000; + public static final String MAX_EXECUTION_TIME_KEY = "MAX_EXECUTION_TIME"; + /** * Time interval the experiment manager waits before it checks for the an @@ -82,7 +81,7 @@ public class ExperimentManager implements Closeable { /** * Default time an experiment has to terminate after it has been started. */ - public long defaultMaxExecutionTime = DEFAULT_MAX_EXECUTION_TIME; + public long defaultMaxExecutionTime; /** * The controller this manager belongs to. */ @@ -101,6 +100,8 @@ public class ExperimentManager implements Closeable { * Timer used to trigger the creation of the next benchmark. */ private Timer expStartTimer; + private String systemTaskId; + private Volume systemAdapterVolume; public ExperimentManager(PlatformController controller) { this(controller, CHECK_FOR_FIRST_EXPERIMENT, CHECK_FOR_NEW_EXPERIMENT); @@ -113,8 +114,9 @@ protected ExperimentManager(PlatformController controller, long checkForFirstExp try { // TODO environment variable should have been used there // TODO global static method in hobbit core for retrieving values like this - defaultMaxExecutionTime = Long - .parseLong(System.getProperty("MAX_EXECUTION_TIME", Long.toString(DEFAULT_MAX_EXECUTION_TIME))); + defaultMaxExecutionTime = (System.getenv().containsKey(MAX_EXECUTION_TIME_KEY) ? Long + .parseLong(System.getenv().get(MAX_EXECUTION_TIME_KEY)):DEFAULT_MAX_EXECUTION_TIME); + } catch (Exception e) { LOGGER.debug("Could not get execution time from env, using default value.."); } @@ -142,7 +144,7 @@ public void run() { * Creates the next experiment if there is no experiment running and there is an * experiment waiting in the queue. */ - public void createNextExperiment() { + public void createNextExperiment(){ try { experimentMutex.acquire(); } catch (InterruptedException e) { @@ -151,27 +153,56 @@ public void createNextExperiment() { } try { // if there is no benchmark running, the queue has been - // initialized and cluster is healthy + // initialized and interfaces is healthy + + //ResourceUsageInformation su = controller.getResInfoCollector().getSystemUsageInformation(); if ((experimentStatus == null) && (controller.queue != null)) { ClusterManager clusterManager = this.controller.clusterManager; - boolean isClusterHealthy = clusterManager.isClusterHealthy(); - if(!isClusterHealthy) { - LOGGER.error("Can not start next experiment in the queue, cluster is NOT HEALTHY. " + - "Check your cluster consistency or adjust SWARM_NODE_NUMBER environment variable." + - " Expected number of nodes: "+clusterManager.getExpectedNumberOfNodes()+ - " Current number of nodes: "+clusterManager.getNumberOfNodes()); + + if(clusterManager instanceof CloudClusterManager) + ((CloudClusterManager) clusterManager).reactOnQueue(controller.queue); + + + if(controller.queue.listAll().size()==0) return; - } + + ExperimentConfiguration config = controller.queue.getNextExperiment(); LOGGER.debug("Trying to start the next benchmark."); if (config == null) { LOGGER.debug("There is no experiment to start."); return; } + + + if(!(clusterManager instanceof CloudClusterManager) && !clusterManager.isClusterHealthy()) { + LOGGER.error("Can not start next experiment in the queue, cluster is NOT HEALTHY. " + + "Check your cluster consistency or adjust SWARM_NODE_NUMBER environment variable." + + " Expected number of nodes: " + clusterManager.getExpectedNumberOfNodes() + + " Current number of nodes: " + clusterManager.getNumberOfNodes()); + return; + } + + LOGGER.info("Creating next experiment " + config.id + " with benchmark " + config.benchmarkUri + " and system " + config.systemUri + " to the queue."); + experimentStatus = new ExperimentStatus(config, PlatformController.generateExperimentUri(config.id)); + if(clusterManager instanceof CloudClusterManager){ + CloudClusterManager cloudClusterManager = (CloudClusterManager)clusterManager; + String clusterConfiguration = getClusterConfiguration(config); + try { + experimentStatus.setState(States.CLOUD_RESOURCES_PREPARATION); + cloudClusterManager.createCluster(clusterConfiguration); + experimentStatus.setState(States.PREPARATION); + } + catch (Exception e) { + LOGGER.error("Could not create cluster: {}",e.getLocalizedMessage()); + return; + } + } + BenchmarkMetaData benchmark = controller.imageManager().getBenchmark(config.benchmarkUri); if ((benchmark == null) || (benchmark.mainImage == null)) { experimentStatus = new ExperimentStatus(config, PlatformController.generateExperimentUri(config.id), @@ -225,13 +256,14 @@ public void createNextExperiment() { experimentStatus.startAbortionTimer(this, maxExecutionTime); experimentStatus.setState(States.INIT); + String serializedBenchmarkParams = getSerializedBenchmarkParams(config, benchmark); LOGGER.info("Creating benchmark controller " + benchmark.mainImage); String containerId = controller.containerManager.startContainer(benchmark.mainImage, Constants.CONTAINER_TYPE_BENCHMARK, null, new String[] { Constants.RABBIT_MQ_HOST_NAME_KEY + "=" + controller.rabbitMQHostName(), Constants.HOBBIT_SESSION_ID_KEY + "=" + config.id, Constants.HOBBIT_EXPERIMENT_URI_KEY + "=" + experimentStatus.experimentUri, - Constants.BENCHMARK_PARAMETERS_MODEL_KEY + "=" + config.serializedBenchParams, + Constants.BENCHMARK_PARAMETERS_MODEL_KEY + "=" + serializedBenchmarkParams /*config.serializedBenchParams*/, Constants.SYSTEM_URI_KEY + "=" + config.systemUri }, null, config.id); if (containerId == null) { @@ -242,19 +274,22 @@ public void createNextExperiment() { experimentStatus.setBenchmarkContainer(containerId); LOGGER.info("Creating system " + system.mainImage); + String serializedSystemParams = getSerializedSystemParams(config, benchmark, system); - containerId = controller.containerManager.startContainer(system.mainImage, + systemAdapterVolume = getDockerClient().createVolume(); + systemTaskId = controller.containerManager.startContainer(system.mainImage, Constants.CONTAINER_TYPE_SYSTEM, experimentStatus.getBenchmarkContainer(), new String[] { Constants.RABBIT_MQ_HOST_NAME_KEY + "=" + controller.rabbitMQHostName(), Constants.HOBBIT_SESSION_ID_KEY + "=" + config.id, Constants.SYSTEM_PARAMETERS_MODEL_KEY + "=" + serializedSystemParams }, - null, config.id); - if (containerId == null) { + null, config.id, new String[]{ systemAdapterVolume.name()+":/share" }); + + if (systemTaskId == null) { LOGGER.error("Couldn't start the system. Trying to cancel the benchmark."); forceBenchmarkTerminate_unsecured(HobbitErrors.SystemCreationError); throw new Exception("Couldn't start the system " + config.systemUri); } else { - experimentStatus.setSystemContainer(containerId); + experimentStatus.setSystemContainer(systemTaskId); } } } catch (Exception e) { @@ -269,19 +304,49 @@ public void createNextExperiment() { } } + protected static String getSerializedBenchmarkParams(ExperimentConfiguration config, BenchmarkMetaData benchmark) { + + + Model benchParams = RabbitMQUtils.readModel(config.serializedBenchParams); + Resource experiment = benchParams.getResource(Constants.NEW_EXPERIMENT_URI); + Property defaultValProperty = benchmark.rdfModel.getProperty("http://w3id.org/hobbit/vocab#defaultValue"); + if (benchmark.rdfModel.contains(null, RDF.type, HOBBIT.Parameter)){ + + // Get an iterator for all these parameters + ResIterator iterator = benchmark.rdfModel.listResourcesWithProperty(RDF.type, HOBBIT.Parameter); + while (iterator.hasNext()) { + // Get the parameter + Property parameter = benchParams.getProperty(((Resource)iterator.next()).getURI()); + NodeIterator benchParamValue = benchParams.listObjectsOfProperty(experiment, parameter); + if(!benchParamValue.hasNext()){ + + NodeIterator defaultParamValue = benchmark.rdfModel.listObjectsOfProperty(parameter, defaultValProperty); + while (defaultParamValue.hasNext()) { + Literal valueLiteral = (Literal) defaultParamValue.next(); + benchParams.add(experiment, parameter, valueLiteral.getString()); + } + } + } + } + + return RabbitMQUtils.writeModel2String(benchParams); + } + // FIXME add javadoc // Static method for easier testing protected static String getSerializedSystemParams(ExperimentConfiguration config, BenchmarkMetaData benchmark, SystemMetaData system) { + Model systemModel = MetaDataFactory.getModelWithUniqueSystem(system.rdfModel, config.systemUri); + Model benchParams = RabbitMQUtils.readModel(config.serializedBenchParams); + Resource systemResource = systemModel.getResource(config.systemUri); + Resource experiment = benchParams.getResource(Constants.NEW_EXPERIMENT_URI); + // Check the benchmark model for parameters that should be forwarded to the // system - if (benchmark.rdfModel.contains(null, RDF.type, HOBBIT.ForwardedParameter)) { - Model benchParams = RabbitMQUtils.readModel(config.serializedBenchParams); + if (benchmark.rdfModel.contains(null, RDF.type, HOBBIT.ForwardedParameter)){ Property parameter; NodeIterator objIterator; - Resource systemResource = systemModel.getResource(config.systemUri); - Resource experiment = benchParams.getResource(Constants.NEW_EXPERIMENT_URI); // Get an iterator for all these parameters ResIterator iterator = benchmark.rdfModel.listResourcesWithProperty(RDF.type, HOBBIT.ForwardedParameter); while (iterator.hasNext()) { @@ -295,9 +360,47 @@ protected static String getSerializedSystemParams(ExperimentConfiguration config } } } + //adding parameters from system.ttl + if (system.rdfModel.contains(null, RDF.type, HOBBIT.Parameter)){ + NodeIterator objIterator; + // Get an iterator for all these parameters + Property defaultValProperty = system.rdfModel.getProperty("http://w3id.org/hobbit/vocab#defaultValue"); + ResIterator iterator = system.rdfModel.listResourcesWithProperty(RDF.type, HOBBIT.Parameter); + while (iterator.hasNext()) { + Property parameter = system.rdfModel.getProperty(((Resource)iterator.next()).getURI()); + objIterator = system.rdfModel.listObjectsOfProperty(parameter, defaultValProperty); + // If there is a value, add it to the system model + while (objIterator.hasNext()) { + Literal valueLiteral = (Literal)objIterator.next(); + systemModel.add(systemResource, parameter, valueLiteral.getString()); + //benchParams.remove(experiment, parameter, node); + } + } + } + //config.serializedBenchParams = RabbitMQUtils.writeModel2String(benchParams); return RabbitMQUtils.writeModel2String(systemModel); } + public static String getClusterConfiguration(ExperimentConfiguration config) { + + // Check the benchmark model for parameters that should be forwarded to the + // system + + Model benchParams = RabbitMQUtils.readModel(config.serializedBenchParams); + + Property parameter; + //NodeIterator objIterator; + //Resource systemResource = systemModel.getResource(config.systemUri); + Resource experiment = benchParams.getResource(Constants.NEW_EXPERIMENT_URI); + NodeIterator objIterator = benchParams.listObjectsOfProperty(experiment, new PropertyImpl(config.benchmarkUri+"#clusterConfig")); + while (objIterator.hasNext()) { + String ret = objIterator.next().asLiteral().getString(); + return ret; + } + + return "";//RabbitMQUtils.writeModel2String(systemModel); + } + protected void prefetchImages(BenchmarkMetaData benchmark, SystemMetaData system) throws Exception { Set usedImages = new HashSet(); usedImages.add(benchmark.mainImage); @@ -306,6 +409,7 @@ protected void prefetchImages(BenchmarkMetaData benchmark, SystemMetaData system usedImages.addAll(benchmark.usedImages); // pull all used images for (String image : usedImages) { + LOGGER.info("Prefetching image {}", image); controller.containerManager.pullImage(image); } } @@ -449,8 +553,7 @@ private synchronized void handleExperimentTermination_unsecured() { if (graphUri.equals(Constants.PUBLIC_RESULT_GRAPH_URI)) { try { controller.analyzeExperiment(experimentStatus.experimentUri); - LOGGER.info("Sent {} to the analysis component.", experimentStatus.experimentUri); - } catch (IOException e) { + } catch (Exception e) { LOGGER.error("Could not send task \"{}\" to AnalyseQueue.", experimentStatus.getConfig().challengeTaskUri); } @@ -459,6 +562,14 @@ private synchronized void handleExperimentTermination_unsecured() { // controller.publishChallengeForExperiment(experimentStatus.config); // Remove the experiment status object experimentStatus = null; + + if(systemAdapterVolume!=null) { + try { + getDockerClient().removeVolume(systemAdapterVolume); + } catch (Exception e) { + LOGGER.error("Failed to remove volume: {}", e.getLocalizedMessage()); + } + } } } @@ -530,7 +641,7 @@ public void notifyTermination(String containerId, int exitCode) { // data) comprising a command that indicates that a // container terminated and the container name String containerName = controller.containerManager.getContainerName(containerId); - if (containerName != null) { + if (containerName != null){ try { controller.sendToCmdQueue(Constants.HOBBIT_SESSION_ID_FOR_BROADCASTS, Commands.DOCKER_CONTAINER_TERMINATED, @@ -711,14 +822,14 @@ public void notifyExpRuntimeExpired(ExperimentStatus expiredState) { * the id of the experiment that should be stopped */ public void stopExperimentIfRunning(String experimentId) { - try { - experimentMutex.acquire(); - } catch (InterruptedException e) { - LOGGER.error( - "Interrupted while waiting for the experiment mutex. Won't check the experiment regarding the requested termination.", - e); - return; - } +// try { +// experimentMutex.acquire(); +// } catch (InterruptedException e) { +// LOGGER.error( +// "Interrupted while waiting for the experiment mutex. Won't check the experiment regarding the requested termination.", +// e); +// return false; +// } try { // If this is the currently running experiment if ((experimentStatus != null) && (experimentStatus.config.id.equals(experimentId))) { @@ -727,11 +838,43 @@ public void stopExperimentIfRunning(String experimentId) { LOGGER.error("The experiment {} was stopped by the user. Forcing termination.", experimentStatus.experimentUri); forceBenchmarkTerminate_unsecured(HobbitErrors.TerminatedByUser); + //experimentStatus.setState(States.STOPPED); + handleExperimentTermination_unsecured(); } } } finally { - experimentMutex.release(); + //experimentMutex.release(); } + + } + + public String getSystemTaskId() { + return systemTaskId; + } + +// public String getSystemContainerId() { +// +// if(systemTaskId==null) +// return null; +// try { +// Task task = controller.containerManager.getContainerInfo(systemTaskId); +// while(task.status()==null || +// task.status().containerStatus()==null || +// task.status().containerStatus().containerId()==null){ +// LOGGER.debug("Waiting for task status"); +// Thread.sleep(500); +// } +// String ret = task.status().containerStatus().containerId().substring(0, 12); +// return ret; +// +// } catch (Exception e) { +// LOGGER.error("Failed to get systemContainerId: {}", e.getLocalizedMessage()); +// } +// return null; +// } + + public Volume getSystemContainerVolume() { + return systemAdapterVolume; } @Override diff --git a/platform-controller/src/main/java/org/hobbit/controller/PlatformController.java b/platform-controller/src/main/java/org/hobbit/controller/PlatformController.java index bd5859cc..404388c5 100644 --- a/platform-controller/src/main/java/org/hobbit/controller/PlatformController.java +++ b/platform-controller/src/main/java/org/hobbit/controller/PlatformController.java @@ -22,14 +22,7 @@ import java.nio.ByteBuffer; import java.text.SimpleDateFormat; import java.time.Duration; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.HashSet; -import java.util.List; -import java.util.Properties; -import java.util.Set; -import java.util.Timer; -import java.util.TimerTask; +import java.util.*; import java.util.concurrent.Semaphore; import org.apache.commons.io.Charsets; @@ -46,42 +39,27 @@ import org.apache.jena.vocabulary.RDF; import org.hobbit.controller.analyze.ExperimentAnalyzer; import org.hobbit.controller.data.ExperimentConfiguration; -import org.hobbit.controller.docker.ClusterManager; -import org.hobbit.controller.docker.ClusterManagerImpl; -import org.hobbit.controller.docker.ContainerManager; -import org.hobbit.controller.docker.ContainerManagerImpl; -import org.hobbit.controller.docker.ContainerStateObserver; -import org.hobbit.controller.docker.ContainerStateObserverImpl; -import org.hobbit.controller.docker.ContainerTerminationCallback; -import org.hobbit.controller.docker.FileBasedImageManager; -import org.hobbit.controller.docker.GitlabBasedImageManager; -import org.hobbit.controller.docker.ImageManager; -import org.hobbit.controller.docker.ImageManagerFacade; -import org.hobbit.controller.docker.ResourceInformationCollector; +import org.hobbit.controller.docker.*; import org.hobbit.controller.front.FrontEndApiHandler; import org.hobbit.controller.health.ClusterHealthChecker; import org.hobbit.controller.health.ClusterHealthCheckerImpl; +import org.hobbit.controller.queue.CloudBasedExperimentQueue; import org.hobbit.controller.queue.ExperimentQueue; import org.hobbit.controller.queue.ExperimentQueueImpl; +import org.hobbit.controller.utils.ServiceLogsReader; import org.hobbit.core.Commands; import org.hobbit.core.Constants; import org.hobbit.core.FrontEndApiCommands; import org.hobbit.core.components.AbstractCommandReceivingComponent; -import org.hobbit.core.data.BenchmarkMetaData; -import org.hobbit.core.data.StartCommandData; -import org.hobbit.core.data.StopCommandData; -import org.hobbit.core.data.SystemMetaData; +import org.hobbit.core.data.*; import org.hobbit.core.data.status.ControllerStatus; import org.hobbit.core.data.status.QueuedExperiment; import org.hobbit.core.data.status.RunningExperiment; import org.hobbit.core.data.usage.ResourceUsageInformation; -import org.hobbit.core.rabbit.DataSender; -import org.hobbit.core.rabbit.DataSenderImpl; import org.hobbit.core.rabbit.RabbitMQUtils; import org.hobbit.core.rabbit.RabbitQueueFactoryImpl; import org.hobbit.storage.client.StorageServiceClient; import org.hobbit.storage.queries.SparqlQueries; -import org.hobbit.utils.EnvVariables; import org.hobbit.utils.rdf.RdfHelper; import org.hobbit.vocab.HOBBIT; import org.slf4j.Logger; @@ -113,15 +91,23 @@ public class PlatformController extends AbstractCommandReceivingComponent */ public static final String PLATFORM_VERSION = readVersion(); - private static final String DEPLOY_ENV = System.getProperty("DEPLOY_ENV", "production"); + private static final String DEPLOY_ENV = System.getenv().containsKey("DEPLOY_ENV")?System.getenv().get("DEPLOY_ENV"): "production"; private static final String DEPLOY_ENV_TESTING = "testing"; private static final String DEPLOY_ENV_DEVELOP = "develop"; private static final String CONTAINER_PARENT_CHECK_ENV_KEY = "CONTAINER_PARENT_CHECK"; private static final boolean CONTAINER_PARENT_CHECK = System.getenv().containsKey(CONTAINER_PARENT_CHECK_ENV_KEY) ? System.getenv().get(CONTAINER_PARENT_CHECK_ENV_KEY) == "1" : true; + + public static final String FILE_BASED_IMAGE_MANAGER_KEY ="FILE_BASED_IMAGE_MANAGER"; + public static final String ALLOW_ASYNC_CONTAINER_COMMANDS_KEY ="ALLOW_ASYNC_CONTAINER_COMMANDS"; + public static final String SERVICE_LOGS_READER_KEY = "SERVICE_LOGS_READER"; + public static final String USE_CLOUD_KEY ="USE_CLOUD"; + + + private static boolean ALLOW_ASYNC_CONTAINER_COMMANDS; private static final String RABBIT_MQ_EXPERIMENTS_HOST_NAME_KEY = "HOBBIT_RABBIT_EXPERIMENTS_HOST"; - private static final String LOCAL_METADATA_DIRECTORY_KEY = "LOCAL_METADATA_DIRECTORY"; + // every 60 mins public static final long PUBLISH_CHALLENGES = 60 * 60 * 1000; @@ -135,9 +121,9 @@ public class PlatformController extends AbstractCommandReceivingComponent */ protected FrontEndApiHandler frontEndApiHandler; /** - * RabbitMQ data sender to the analyser platform. + * RabbitMQ channel between front end and platform controller. */ - protected DataSender sender2Analysis; + protected Channel controller2Analysis; /** * A manager for Docker containers. */ @@ -177,7 +163,7 @@ public class PlatformController extends AbstractCommandReceivingComponent protected ExperimentManager expManager; - protected ResourceInformationCollector resInfoCollector; + private ResourceInformationCollector resInfoCollector; protected ClusterManager clusterManager; @@ -186,7 +172,11 @@ public class PlatformController extends AbstractCommandReceivingComponent */ protected Timer challengePublishTimer; - protected String rabbitMQExperimentsHostName; + protected String prometheusHost; + protected String prometheusPort; + + public String rabbitMQExperimentsHostName; + protected ContainerStateObserver serviceLogsReader; @Override public void init() throws Exception { @@ -196,7 +186,7 @@ public void init() throws Exception { if (System.getenv().containsKey(RABBIT_MQ_EXPERIMENTS_HOST_NAME_KEY)) { rabbitMQExperimentsHostName = System.getenv().get(RABBIT_MQ_EXPERIMENTS_HOST_NAME_KEY); if (!rabbitMQHostName.equals(rabbitMQExperimentsHostName)) { - switchCmdToExpRabbit(); + switchCmdToExpRabbit(rabbitMQExperimentsHostName); LOGGER.info("Using {} as message broker for experiments.", rabbitMQExperimentsHostName); } else { LOGGER.warn( @@ -211,49 +201,77 @@ public void init() throws Exception { rabbitMQHostName); } + + if(System.getenv().containsKey(USE_CLOUD_KEY) + && (System.getenv().get(USE_CLOUD_KEY).toLowerCase().equals("true") + || System.getenv().get(USE_CLOUD_KEY).equals("1"))){ + clusterManager = new CloudClusterManager(this); + containerManager = new CloudContainerManager(clusterManager); + queue = new CloudBasedExperimentQueue(); + }else{ + clusterManager = new ClusterManagerImpl(); + containerManager = new ContainerManagerImpl(clusterManager); + queue = new ExperimentQueueImpl(); + } + + ALLOW_ASYNC_CONTAINER_COMMANDS = ( + System.getenv().containsKey(ALLOW_ASYNC_CONTAINER_COMMANDS_KEY) + && (System.getenv().get(ALLOW_ASYNC_CONTAINER_COMMANDS_KEY).toLowerCase().equals("true") + || System.getenv().get(ALLOW_ASYNC_CONTAINER_COMMANDS_KEY).equals("1")) ? true: false); + + if(ALLOW_ASYNC_CONTAINER_COMMANDS) + LOGGER.info("Async container commands are allowed"); + else + LOGGER.info("Async container commands are prohibited"); + + LOGGER.info("Container manager initialized."); + // Set task history limit for swarm cluster to 0 (will remove all terminated // containers) // Only for prod mode - clusterManager = new ClusterManagerImpl(); + if (DEPLOY_ENV.equals(DEPLOY_ENV_TESTING) || DEPLOY_ENV.equals(DEPLOY_ENV_DEVELOP)) { LOGGER.debug("Ignoring task history limit parameter. Will remain default (run 'docker info' for details)."); + } else { LOGGER.debug( "Production mode. Setting task history limit to 0. All terminated containers will be removed."); - clusterManager.setTaskHistoryLimit(0); + try { + clusterManager.setTaskHistoryLimit(0); + } + catch (Exception e){ + LOGGER.error("Failed to set task history limit: {}",e.getLocalizedMessage()); + } } - // create container manager - containerManager = new ContainerManagerImpl(); - LOGGER.debug("Container manager initialized."); + if(System.getenv().containsKey(SERVICE_LOGS_READER_KEY) && + (System.getenv().get(SERVICE_LOGS_READER_KEY).toLowerCase().equals("true") || System.getenv().get(SERVICE_LOGS_READER_KEY).equals("1"))) { + LOGGER.debug("Enabling service logs output to console"); + serviceLogsReader = new ServiceLogsReader(containerManager, 1000); + containerManager.addContainerObserver(serviceLogsReader); + serviceLogsReader.startObserving(); + } + + // Create container observer (polls status every 5s) containerObserver = new ContainerStateObserverImpl(containerManager, 5 * 1000); containerObserver.addTerminationCallback(this); // Tell the manager to add container to the observer containerManager.addContainerObserver(containerObserver); - resInfoCollector = new ResourceInformationCollector(containerManager); containerObserver.startObserving(); + LOGGER.debug("Container observer initialized."); - // Create the image manager including a local directory or not - String localMetaDir = EnvVariables.getString(LOCAL_METADATA_DIRECTORY_KEY, (String) null); - if (localMetaDir != null) { - imageManager = new ImageManagerFacade(new FileBasedImageManager(localMetaDir), - new GitlabBasedImageManager()); - } else { - imageManager = new GitlabBasedImageManager(); - } + imageManager = (System.getenv().containsKey(FILE_BASED_IMAGE_MANAGER_KEY)?new FileBasedImageManager(): new GitlabBasedImageManager()) ; LOGGER.debug("Image manager initialized."); frontEnd2Controller = incomingDataQueueFactory.getConnection().createChannel(); frontEndApiHandler = (new FrontEndApiHandler.Builder()).platformController(this) .queue(incomingDataQueueFactory, Constants.FRONT_END_2_CONTROLLER_QUEUE_NAME).build(); - sender2Analysis = DataSenderImpl.builder() - .queue(outgoingDataQueuefactory, Constants.CONTROLLER_2_ANALYSIS_QUEUE_NAME).build(); - - queue = new ExperimentQueueImpl(); + controller2Analysis = cmdQueueFactory.getConnection().createChannel(); + controller2Analysis.queueDeclare(Constants.CONTROLLER_2_ANALYSIS_QUEUE_NAME, false, false, true, null); storage = StorageServiceClient.create(outgoingDataQueuefactory.getConnection()); @@ -273,7 +291,7 @@ public void run() { } }, PUBLISH_CHALLENGES, PUBLISH_CHALLENGES); - LOGGER.info("Platform controller initialized."); + LOGGER.info("Platform controller initialized. {} experiments in the queue", queue.listAll().size()); } /** @@ -282,20 +300,29 @@ public void run() { * * @throws Exception */ - private void switchCmdToExpRabbit() throws Exception { + public void switchCmdToExpRabbit(String targetHost) throws Exception { + if(targetHost==null) + targetHost = rabbitMQHostName; // We have to close the existing command queue try { - cmdChannel.close(); + if(cmdChannel.isOpen()) + cmdChannel.close(); } catch (Exception e) { LOGGER.warn("Exception while closing command queue. It will be ignored.", e); } + IOUtils.closeQuietly(cmdQueueFactory); // temporarily create a new factory to the second broker but keep the reference // to the first broker (XXX this is a dirty workaround to make use of methods // like createConnection()) ConnectionFactory tempFactory = connectionFactory; connectionFactory = new ConnectionFactory(); - connectionFactory.setHost(rabbitMQExperimentsHostName); + if(targetHost.contains(":")){ + String[] splitted = targetHost.split(":"); + connectionFactory.setHost(splitted[0]); + connectionFactory.setPort(Integer.parseInt(splitted[1])); + }else + connectionFactory.setHost(targetHost); connectionFactory.setAutomaticRecoveryEnabled(true); // attempt recovery every 10 seconds connectionFactory.setNetworkRecoveryInterval(10000); @@ -325,6 +352,15 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp } } + + public void setPrometheusHost(String targetHost){ + prometheusHost = targetHost; + } + + public void setPrometheusPort(String targetPort){ + prometheusPort = targetPort; + } + /** * Handles incoming command request from the hobbit command queue. * @@ -404,8 +440,9 @@ public void receiveCommand(byte command, byte[] data, String sessionId, String r case Commands.REQUEST_SYSTEM_RESOURCES_USAGE: { // FIXME use the session id to make sure that only containers of this session // are observed - ResourceUsageInformation resUsage = resInfoCollector.getSystemUsageInformation(); - LOGGER.info("Returning usage information: {}", resUsage != null ? resUsage.toString() : "null"); + + ResourceUsageInformation resUsage = getResInfoCollector().getSystemUsageInformation(); + LOGGER.debug("Returning system usage information: {}", resUsage != null ? resUsage.toString() : "null"); if (replyTo != null) { byte[] response; if (resUsage != null) { @@ -423,10 +460,70 @@ public void receiveCommand(byte command, byte[] data, String sessionId, String r LOGGER.error(errMsgBuilder.toString(), e); } } + break; } + + //functions below require an extended Core interfaces (see also other sections on this file) +// case Commands.REQUEST_BENCHMARK_RESOURCES_USAGE: { +// // FIXME use the session id to make sure that only containers of this session +// // are observed +// +// ResourceUsageInformation resUsage = getResInfoCollector().getBenchmarkUsageInformation(); +// LOGGER.debug("Returning benchmark usage information: {}", resUsage != null ? resUsage.toString() : "null"); +// if (replyTo != null) { +// byte[] response; +// if (resUsage != null) { +// response = RabbitMQUtils.writeString(gson.toJson(resUsage)); +// } else { +// response = new byte[0]; +// } +// try { +// cmdChannel.basicPublish("", replyTo, MessageProperties.PERSISTENT_BASIC, response); +// } catch (IOException e) { +// StringBuilder errMsgBuilder = new StringBuilder(); +// errMsgBuilder.append("Error, couldn't sent the request resource usage statistics to replyTo="); +// errMsgBuilder.append(replyTo); +// errMsgBuilder.append("."); +// LOGGER.error(errMsgBuilder.toString(), e); +// } +// } +// break; +// } +// case Commands.EXECUTE_ASYNC_COMMAND: { +// if (ALLOW_ASYNC_CONTAINER_COMMANDS){ +// ExecuteCommandData executeCommandParams = deserializeExecuteCommandData(data); +// String taskId = executeCommandParams.containerId; +// LOGGER.debug("Executing command to container: {}", taskId); +// Boolean result0 = containerManager.execAsyncCommand(taskId, executeCommandParams.command); +// String result = (result0?"Succeeded":"Failed"); +// LOGGER.debug("Sending {} result for command to container: {}", result, executeCommandParams.containerId); +// if (replyTo != null){ +// try { +// cmdChannel.basicPublish("", replyTo, MessageProperties.PERSISTENT_BASIC, RabbitMQUtils.writeString(result)); +// } catch (IOException e) { +// StringBuilder errMsgBuilder = new StringBuilder(); +// errMsgBuilder.append("Error, couldn't sent response after creation of container ("); +// errMsgBuilder.append(executeCommandParams.toString()); +// errMsgBuilder.append(") to replyTo="); +// errMsgBuilder.append(replyTo); +// errMsgBuilder.append("."); +// LOGGER.error(errMsgBuilder.toString(), e); +// } +// } +// +// }else +// LOGGER.warn("Command execution for containers is prohibited"); +// break; +// } } } + public ResourceInformationCollector getResInfoCollector() { + if(resInfoCollector==null || !resInfoCollector.getPrometheusHost().equals(prometheusHost) || !resInfoCollector.getPrometheusPort().equals(prometheusPort)) + resInfoCollector = new ResourceInformationCollector(containerManager, prometheusHost, prometheusPort); + return resInfoCollector; + } + private StopCommandData deserializeStopCommandData(byte[] data) { if (data == null) { return null; @@ -443,6 +540,15 @@ private StartCommandData deserializeStartCommandData(byte[] data) { return gson.fromJson(dataString, StartCommandData.class); } + //functions below require an extended Core interfaces (see also other sections on this file) +// private ExecuteCommandData deserializeExecuteCommandData(byte[] data) { +// if (data == null) { +// return null; +// } +// String dataString = RabbitMQUtils.readString(data); +// return gson.fromJson(dataString, ExecuteCommandData.class); +// } + /** * Creates and starts a container based on the given {@link StartCommandData} * instance. @@ -452,17 +558,32 @@ private StartCommandData deserializeStartCommandData(byte[] data) { * @return the name of the created container */ private String createContainer(StartCommandData data) { - String parentId = containerManager.getContainerId(data.parent); + String parentId = (data.parent!=null? containerManager.getContainerId(data.parent): null); if ((parentId == null) && (CONTAINER_PARENT_CHECK)) { LOGGER.error("Couldn't create container because the parent \"{}\" is not known.", data.parent); return null; } - String containerId = containerManager.startContainer(data.image, data.type, parentId, data.environmentVariables, - null); - if (containerId == null) { + + String[] volumes = new String[]{}; + + //functions below require an extended Core interfaces (see also other sections on this file) +// String[] command = null; +// if(ALLOW_ASYNC_CONTAINER_COMMANDS) +// command = data.command; +// else +// LOGGER.warn("Command execution for containers is prohibited"); +// +// +// if(expManager.getSystemTaskId()!=null && expManager.getSystemTaskId().equals(parentId)) +// volumes = new String[]{ expManager.getSystemContainerVolume().name()+":/share" }; + + String taskId = containerManager.startContainer(data.image, data.type, parentId, data.environmentVariables, null, volumes); + if (taskId == null) { return null; } else { - return containerManager.getContainerName(containerId); + //String ret = containerManager.getContainerId(taskId); + String ret = containerManager.getContainerName(taskId); + return ret; } } @@ -492,6 +613,9 @@ public void notifyTermination(String containerId, int exitCode) { expManager.notifyTermination(containerId, exitCode); // Remove the container from the observer containerObserver.removedObservedContainer(containerId); + if(serviceLogsReader!=null) + serviceLogsReader.removedObservedContainer(containerId); + // If we should remove all containers created by us if (!DEPLOY_ENV.equals(DEPLOY_ENV_TESTING)) { // If we remove this container, we have to make sure that there are @@ -510,6 +634,15 @@ public void close() throws IOException { } catch (Exception e) { LOGGER.error("Couldn't stop the container observer.", e); } + + try { + if (serviceLogsReader != null) { + serviceLogsReader.stopObserving(); + } + } catch (Exception e) { + LOGGER.error("Couldn't stop the container logs reader.", e); + } + // get all remaining containers from the observer, terminate and remove them. Do // not try to get the list from the container manager since he will return all // containers regardless whether the platform created them or not. @@ -523,6 +656,7 @@ public void close() throws IOException { } } } + // Close the storage client IOUtils.closeQuietly(storage); // Close the queue if this is needed @@ -542,9 +676,9 @@ public void close() throws IOException { } catch (Exception e) { } } - if (sender2Analysis != null) { + if (controller2Analysis != null) { try { - sender2Analysis.close(); + controller2Analysis.close(); } catch (Exception e) { } } @@ -557,7 +691,8 @@ public void close() throws IOException { @Override public void analyzeExperiment(String uri) throws IOException { - sender2Analysis.sendData(RabbitMQUtils.writeString(uri)); + controller2Analysis.basicPublish("", Constants.CONTROLLER_2_ANALYSIS_QUEUE_NAME, + MessageProperties.PERSISTENT_BASIC, RabbitMQUtils.writeString(uri)); } /** @@ -694,6 +829,7 @@ public void handleFrontEndCmd(byte bytes[], String replyTo, BasicProperties repl // Check whether the use has the right to terminate the experiment if ((config != null) && (config.userName != null) && (config.userName.equals(userName))) { // Remove the experiment from the queue + if (queue.remove(config)) { // call the Experiment Manager to cancel the experiment if it is running expManager.stopExperimentIfRunning(experimentId); @@ -1133,8 +1269,8 @@ protected String addExperimentToQueue(String benchmarkUri, String systemUri, Str String experimentId = generateExperimentId(); LOGGER.info("Adding experiment {} with benchmark {}, system {} and user {} to the queue.", experimentId, benchmarkUri, systemUri, userName); - queue.add(new ExperimentConfiguration(experimentId, benchmarkUri, serializedBenchParams, systemUri, userName, - challengUri, challengTaskUri, executionDate)); + ExperimentConfiguration configuration = new ExperimentConfiguration(experimentId, benchmarkUri, serializedBenchParams, systemUri, userName, challengUri, challengTaskUri, executionDate); + queue.add(configuration); return experimentId; } diff --git a/platform-controller/src/main/java/org/hobbit/controller/cloud/CloudSshTunnelsProvider.java b/platform-controller/src/main/java/org/hobbit/controller/cloud/CloudSshTunnelsProvider.java new file mode 100644 index 00000000..c654ca7e --- /dev/null +++ b/platform-controller/src/main/java/org/hobbit/controller/cloud/CloudSshTunnelsProvider.java @@ -0,0 +1,169 @@ +package org.hobbit.controller.cloud; + +import com.spotify.docker.client.DefaultDockerClient; +import com.spotify.docker.client.DockerClient; +import com.spotify.docker.client.exceptions.DockerCertificateException; +import org.hobbit.awscontroller.SSH.HSession; +import org.hobbit.awscontroller.SSH.SshConnector; +import org.hobbit.awscontroller.SSH.SshTunnelsProvider; +import org.hobbit.cloud.interfaces.Node; +import org.hobbit.controller.PlatformController; +import org.hobbit.controller.cloud.aws.swarm.SwarmClusterManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.Map; +import java.util.concurrent.Semaphore; +import java.util.function.Function; + +import static org.hobbit.controller.docker.CloudClusterManager.CLOUD_SSH_KEY_FILE_PATH_KEY; + +/** + * @author Pavel Smirnov. (psmirnov@agtinternational.com / smirnp@gmail.com) + */ +public class CloudSshTunnelsProvider { + private static final Logger LOGGER = LoggerFactory.getLogger(CloudSshTunnelsProvider.class); + static PlatformController _platformController; + static String bastionHostUser = "ec2-user"; + static String targetHostUser = "ubuntu"; + static String keyfilepath; + static String[] portsToForward = new String[]{ "2376", "5672", "9090" }; + static int defaultSshPort = 22; + static Boolean providerInitilized=false; + private static Semaphore initFinishedMutex = new Semaphore(0); + private static HSession bastionHostSession; + + public CloudSshTunnelsProvider(PlatformController platformController){ + + _platformController = platformController; + keyfilepath = System.getenv(CLOUD_SSH_KEY_FILE_PATH_KEY); + } + +// public static SshTunnelsProvider getTunnelsProvider(){ +// return SshTunnelsProvider; +//// if(sshTunnelsProvider) +//// return sshTunnelsProvider; +// } + + public static Boolean isProviderInitialized() { + return providerInitilized; + } + + public static Boolean isConnected() { + return SshTunnelsProvider.isConnected(); + } + + public static void initSshProvider(){ + + providerInitilized = false; + + SwarmClusterManager clusterManager = ClusterManagerProvider.getManager(); + + Node managerHost = null; + try { + List managers = clusterManager.getManagerNodes(); + managerHost = managers.get(0); + } catch (Exception e) { + LOGGER.info("Unable to get manager nodes: {}", e.getLocalizedMessage()); + return; + } + + Node bastionHost = null; + try { + bastionHost = clusterManager.getBastion(); + } catch (Exception e) { + LOGGER.error("Could not get bastion host: {}", e.getLocalizedMessage()); + return; + } + + String bastionHostIp = bastionHost.getPublicIpAddress(); + String managerHostIp = managerHost.getIngernalIpAddress(); + + LOGGER.info("initSshConnection {}, {}", bastionHostIp, managerHostIp); + + bastionHostSession = new HSession(bastionHostUser, bastionHostIp, defaultSshPort, keyfilepath); + HSession swarmManagerSession = new HSession(targetHostUser, managerHostIp, defaultSshPort, keyfilepath, portsToForward, bastionHostSession); + + SshTunnelsProvider.init(swarmManagerSession, new Function(){ + @Override + public String apply(HSession hSession){ + initFinishedMutex.release(); + Map portForwadings = hSession.getForwardings(); + LOGGER.info("SSH connection to {} established. Ports forwardings: {}", hSession.getHost(), portForwadings.toString()); + try { + String rabbitHost = "localhost:" + portForwadings.get(5672); + LOGGER.info("Switching platform controller to remote rabbitMQ {}", rabbitHost); + _platformController.switchCmdToExpRabbit(rabbitHost); + } catch (Exception e) { + LOGGER.error("Cannot switch platform controller {}", e.getLocalizedMessage()); + } + + try { + LOGGER.info("Switching resInfoCollector to {}", "localhost:" + portForwadings.get(9090).toString()); + _platformController.setPrometheusHost("localhost"); + _platformController.setPrometheusPort(portForwadings.get(9090).toString()); + } catch (Exception e) { + LOGGER.error("Cannot switch resInfoCollector {}", e.getLocalizedMessage()); + } + return null; + } + }, new Function() { + @Override + public String apply(HSession hSession){ + providerInitilized = false; + DockerClientProvider.flushDockerClient(); + try { + LOGGER.info("Switching platform controller to local rabbitMQ"); + _platformController.switchCmdToExpRabbit(null); + } catch (Exception e) { + LOGGER.error("Cannot switch platform controller {}", e.getLocalizedMessage()); + } + return null; + } + }); + providerInitilized = true; + + } + + public static void newSshTunnel(Function onConnectHandler, Function onDisconnectConnectHandler){ + SshTunnelsProvider.newSshTunnel(onConnectHandler, onDisconnectConnectHandler); + } + + public static void execAsyncCommand(String nodeIP, Function onConnect) { + + SshConnector sshConnector = SshTunnelsProvider.getSshConnector(); + HSession targetHostSession; + if(sshConnector.getOpenedConnections().containsKey(nodeIP)) + targetHostSession = sshConnector.getOpenedConnections().get(nodeIP); + else + targetHostSession = new HSession(targetHostUser, nodeIP, defaultSshPort, keyfilepath, portsToForward, bastionHostSession); + + try { + sshConnector.openTunnel(targetHostSession, 30000, new Function() { + @Override + public String apply(HSession hSession) { + Map portForwadings = hSession.getForwardings(); + String dockerHost = "http://localhost:" + portForwadings.get(2376); + try { + DockerClient dockerClient = DefaultDockerClient.fromEnv() + .uri(dockerHost) + .connectionPoolSize(5000) + .connectTimeoutMillis(1000).build(); + onConnect.apply(dockerClient); + dockerClient.close(); + } catch (DockerCertificateException e) { + LOGGER.error("Failed to init docker client to {}: {}", dockerHost, e.getLocalizedMessage()); + } + + return null; + } + }); + } catch (Exception e) { + LOGGER.error("Failed to open tunnel to {}: {}", nodeIP, e.getLocalizedMessage()); + } + + } + + +} diff --git a/platform-controller/src/main/java/org/hobbit/controller/cloud/ClusterManagerProvider.java b/platform-controller/src/main/java/org/hobbit/controller/cloud/ClusterManagerProvider.java new file mode 100644 index 00000000..985995fb --- /dev/null +++ b/platform-controller/src/main/java/org/hobbit/controller/cloud/ClusterManagerProvider.java @@ -0,0 +1,31 @@ +package org.hobbit.controller.cloud; + +import org.hobbit.controller.cloud.aws.swarm.SwarmClusterManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * @author Pavel Smirnov. (psmirnov@agtinternational.com / smirnp@gmail.com) + */ + + +public class ClusterManagerProvider { + public static final String CLOUD_VPC_CLUSTER_NAME_KEY ="CLOUD_VPC_CLUSTER_NAME"; + private static Logger LOGGER = LoggerFactory.getLogger(ClusterManagerProvider.class); + static SwarmClusterManager swarmClusterManager; + + public static SwarmClusterManager getManager(){ + if (swarmClusterManager ==null) { + try { + String vpcClusterName = (System.getenv().containsKey(CLOUD_VPC_CLUSTER_NAME_KEY)?System.getenv().get(CLOUD_VPC_CLUSTER_NAME_KEY):"hobbit"); + + swarmClusterManager = new SwarmClusterManager(vpcClusterName); + } + catch (Exception e){ + LOGGER.error("Failed to initialize swarmClusterManager"); + } + } + return swarmClusterManager; + } +} diff --git a/platform-controller/src/main/java/org/hobbit/controller/cloud/DockerClientProvider.java b/platform-controller/src/main/java/org/hobbit/controller/cloud/DockerClientProvider.java new file mode 100644 index 00000000..6ef4e864 --- /dev/null +++ b/platform-controller/src/main/java/org/hobbit/controller/cloud/DockerClientProvider.java @@ -0,0 +1,107 @@ +package org.hobbit.controller.cloud; + +import com.spotify.docker.client.DockerClient; +import com.spotify.docker.client.exceptions.DockerCertificateException; +import org.hobbit.awscontroller.SSH.HSession; +import org.hobbit.awscontroller.SSH.SshTunnelsProvider; +import org.hobbit.controller.docker.DockerUtility; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; +import java.util.concurrent.Semaphore; +import java.util.function.Function; + +/** + * @author Pavel Smirnov. (psmirnov@agtinternational.com / smirnp@gmail.com) + */ + +public class DockerClientProvider { + + private static Logger LOGGER = LoggerFactory.getLogger(DockerClientProvider.class); + static DockerClient dockerClient; + static String dockerHost; + private static boolean alreadyRequested =false; + private static Semaphore initFinishedMutex = new Semaphore(0); + + + public static DockerClient getDockerClient(){ + if(dockerClient!=null && SshTunnelsProvider.isConnected()){ + try { + dockerClient.ping(); + return dockerClient; + } + catch (Exception e){ + LOGGER.info("Failed to ping docker host. Closing existing dockerClient", e.getLocalizedMessage()); + dockerClient.close(); + + if(dockerHost!=null){ + LOGGER.info("Trying to get docker client to host {}", dockerHost); + try { + dockerClient = DockerUtility.getDockerClient(dockerHost); + dockerClient.ping(); + return dockerClient; + } catch (Exception e2) { + LOGGER.error("Failed to connect to existing docker client at {}: {}. Closing the existing client", dockerHost, e2.getLocalizedMessage()); + dockerClient.close(); + } + } + } + } + + if(!alreadyRequested){ + alreadyRequested = true; + + if(!CloudSshTunnelsProvider.isProviderInitialized()) { + LOGGER.debug("Initilizing ssh provider"); + CloudSshTunnelsProvider.initSshProvider(); + } + + if(CloudSshTunnelsProvider.isProviderInitialized()){ + LOGGER.debug("Ssh tunnel required. Trying to create it"); + CloudSshTunnelsProvider.newSshTunnel(new Function() { + @Override + public String apply(HSession hSession) { + Map portForwadings = hSession.getForwardings(); + dockerHost = "http://localhost:" + portForwadings.get(2376); + try { + dockerClient = DockerUtility.getDockerClient(dockerHost); + LOGGER.info("Initialized new docker client to {}", dockerHost); + initFinishedMutex.release(); + + } catch (DockerCertificateException e) { + LOGGER.error("Could not init new docker client: {}", e.getLocalizedMessage()); + } + return null; + } + }, new Function() { + @Override + public String apply(HSession hSession) { + flushDockerClient(); + return null; + } + }); + } + + }else { + LOGGER.debug("Waiting ssh tunnel from other thread"); + try { + initFinishedMutex.acquire(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + alreadyRequested =false; + + if(dockerClient==null) + LOGGER.warn("Returning dockerClient=null"); + return dockerClient; + } + + + public static void flushDockerClient(){ + LOGGER.debug("Flushing docker client"); + dockerClient = null; + } + +} diff --git a/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/handlers/BasicClusterStackHandler.java b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/handlers/BasicClusterStackHandler.java new file mode 100644 index 00000000..6f059384 --- /dev/null +++ b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/handlers/BasicClusterStackHandler.java @@ -0,0 +1,41 @@ +package org.hobbit.controller.cloud.aws.handlers; + +import org.hobbit.awscontroller.StackHandlers.SSHDependentStackHandler; + +/** + * @author Pavel Smirnov. (psmirnov@agtinternational.com / smirnp@gmail.com) + */ + +public class BasicClusterStackHandler extends SSHDependentStackHandler{ + + public BasicClusterStackHandler(SSHDependentStackHandler.Builder builder) { + super(builder); + } + + public static class Builder + extends SSHDependentStackHandler.Builder + { + + protected String bastionStackName; + protected String natStackName; + + public String getBastionStackName() { + return bastionStackName; + } + + public String getNatStackName() { + return natStackName; + } + + public Builder bastionStackName(String value){ + this.bastionStackName = value; + return this; + } + + public Builder natStackName (String value){ + this.natStackName = value; + return this; + } + + } +} \ No newline at end of file diff --git a/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/handlers/BastionStackHandler.java b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/handlers/BastionStackHandler.java new file mode 100644 index 00000000..a9a6f7f1 --- /dev/null +++ b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/handlers/BastionStackHandler.java @@ -0,0 +1,20 @@ +package org.hobbit.controller.cloud.aws.handlers; + + +import org.hobbit.awscontroller.StackHandlers.SSHDependentStackHandler; + +/** + * @author Pavel Smirnov. (psmirnov@agtinternational.com / smirnp@gmail.com) + */ + +public class BastionStackHandler extends SSHDependentStackHandler { + + + public BastionStackHandler(BasicClusterStackHandler.Builder builder){ + super(builder); + name = builder.bastionStackName; + bodyFilePath = "AWS/bastion.yaml"; + } + + +} diff --git a/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/handlers/VPCStackHandler.java b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/handlers/VPCStackHandler.java new file mode 100644 index 00000000..66ce9686 --- /dev/null +++ b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/handlers/VPCStackHandler.java @@ -0,0 +1,18 @@ +package org.hobbit.controller.cloud.aws.handlers; + +import org.hobbit.awscontroller.StackHandlers.AbstractStackHandler; + +/** + * @author Pavel Smirnov. (psmirnov@agtinternational.com / smirnp@gmail.com) + */ + +public class VPCStackHandler extends AbstractStackHandler { + + + public VPCStackHandler(BasicClusterStackHandler.Builder builder){ + super(builder); + name = builder.getVpcStackName(); + bodyFilePath = "AWS/vpc-1azs.yaml"; + } + +} diff --git a/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/SwarmClusterManager.java b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/SwarmClusterManager.java new file mode 100644 index 00000000..3f16da19 --- /dev/null +++ b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/SwarmClusterManager.java @@ -0,0 +1,235 @@ +package org.hobbit.controller.cloud.aws.swarm; + +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import org.hobbit.awscontroller.StackHandlers.AbstractStackHandler; +import org.hobbit.cloud.interfaces.Node; +import org.hobbit.cloud.vpc.VpcClusterManager; +import org.hobbit.controller.cloud.aws.handlers.BasicClusterStackHandler; +import org.hobbit.controller.cloud.aws.handlers.BastionStackHandler; +import org.hobbit.controller.cloud.aws.handlers.VPCStackHandler; +import org.hobbit.controller.cloud.aws.swarm.handlers.*; +import org.hobbit.controller.docker.ClusterManagerImpl; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; + +import static org.hobbit.controller.docker.CloudClusterManager.CLOUD_SSH_KEY_NAME_KEY; + + +/** + * ClusterManager implementation on AWS resources + * + * @author Pavel Smirnov (smirnp@gmail.com) + * + */ +public class SwarmClusterManager extends VpcClusterManager { + private static final Logger LOGGER = LoggerFactory.getLogger(ClusterManagerImpl.class); + private final BasicClusterStackHandler.Builder basicStacksBuilder; + private final SwarmClusterStackHandler.Builder swarmStacksBuilder; + + protected AbstractStackHandler securityGroupsStackHandler; + protected AbstractStackHandler keysManagementStackHandler; + protected AbstractStackHandler swarmManagerStackHandler; + protected AbstractStackHandler swarmBenchmarkWorkersStackHandler; + protected AbstractStackHandler swarmSystemWorkersStackHandler; + //protected SwarmClusterStackHandler.Builder swarmStacksBuilder; + + protected String clusterName; + protected String swarmClusterName; + protected String bucketName; + protected String sshKeyName; + + boolean clusterReady = false; + + public SwarmClusterManager(String clusterName){ + super(clusterName, System.getenv(CLOUD_SSH_KEY_NAME_KEY)); + + swarmClusterName = clusterName+"-swarm"; + bucketName = swarmClusterName+"-bucket"; + + basicStacksBuilder = new BasicClusterStackHandler.Builder() + .vpcStackName(clusterName+"-vpc") + .bastionStackName(clusterName+"-bastion") + .natStackName(clusterName+"-nat") + .sshKeyName(System.getenv(CLOUD_SSH_KEY_NAME_KEY)); + + swarmStacksBuilder = new SwarmClusterStackHandler.Builder(basicStacksBuilder) + .securityGroupsStackName(swarmClusterName+"-security-group") + .keysManagementStackName(swarmClusterName+"-keys-management") + .bucketName(bucketName) + .managersStackName(swarmClusterName+"-manager") + //.managersStackName(vpcClusterName+"2-swarm-manager") + .benchmarkWorkersStackName(swarmClusterName+"-benchmark-worker") + .systemWorkersStackName(swarmClusterName+"-system-worker"); + + initStackHandlers(null); + } + + protected void initStackHandlers(JsonObject configuration){ + + Map bastionConfig = getStackConfig(configuration, clusterName+"-bastion"); + Map natConfig = getStackConfig(configuration, clusterName+"-nat"); + Map managerConfig = getStackConfig(configuration, swarmClusterName+"-manager"); + Map benchmarkWorkersConfig = getStackConfig(configuration, swarmClusterName+"-benchmark-worker"); + Map systemWorkersConfig = getStackConfig(configuration, swarmClusterName+"-system-worker"); + + vpcStackHandler = new VPCStackHandler(basicStacksBuilder); + bastionStackHandler = new BastionStackHandler(basicStacksBuilder).appendParameters(bastionConfig); + + securityGroupsStackHandler = new SecurityGroupsStackHandler(swarmStacksBuilder); + keysManagementStackHandler = new KeysManagementStackHandler(swarmStacksBuilder); + swarmManagerStackHandler = new SwarmManagerStackHandler(swarmStacksBuilder).appendParameters(managerConfig); + swarmBenchmarkWorkersStackHandler = new SwarmWorkerStackHandler(swarmStacksBuilder.workerType("benchmark")).appendParameters(benchmarkWorkersConfig); + swarmSystemWorkersStackHandler = new SwarmWorkerStackHandler(swarmStacksBuilder.workerType("system")).appendParameters(systemWorkersConfig); + + stackList = new ArrayList>() {{ + add(Arrays.asList(new AbstractStackHandler[]{vpcStackHandler})); + add(Arrays.asList(new AbstractStackHandler[]{ + securityGroupsStackHandler, + keysManagementStackHandler + })); + add(Arrays.asList(new AbstractStackHandler[]{ + bastionStackHandler, + //natStackHandler, + swarmManagerStackHandler, + swarmBenchmarkWorkersStackHandler, + swarmSystemWorkersStackHandler + })); + }}; + } + + + public List getManagerNodes() throws Exception{ + if(!clusterReady) + return new ArrayList<>(); + + List ret = getNodesFromAutoscalingGroup(swarmStacksBuilder.managersStackName); + + return ret; + } + + public List getBechmarkNodes() throws Exception{ + if(!clusterReady) + return new ArrayList<>(); + List ret = getNodesFromAutoscalingGroup(swarmStacksBuilder.benchmarkWorkersStackName); + return ret; + } + + public List getSystemNodes() throws Exception{ + if(!clusterReady) + return new ArrayList<>(); + List ret = getNodesFromAutoscalingGroup(swarmStacksBuilder.systemWorkersStackName); + return ret; + } + + protected Map getStackConfig(JsonObject configuration, String stackName){ + + if (configuration != null && configuration.has(stackName)) { + Map ret = new HashMap<>(); + JsonObject stackParams = configuration.get(stackName).getAsJsonObject(); + for(Map.Entry entry : stackParams.entrySet()) { + + ret.put(entry.getKey(), entry.getValue().getAsString()); + if(entry.getKey().equals("DesiredCapacity")) + ret.put("MaxSize", entry.getValue().getAsString()); + //ret.put("MaxSize", String.valueOf(Integer.parseInt(entry.getValue().toString())+1)); + } + return ret; + } + + return null; + } + + public JsonObject parseConfiguration(String desiredConfiguration){ + JsonObject configuration = null; + if(desiredConfiguration==null || desiredConfiguration.equals("")) + return configuration; + try { + JsonParser jsonParser = new JsonParser(); + configuration = jsonParser.parse(desiredConfiguration).getAsJsonObject(); + } + catch (Exception e){ + LOGGER.error("Failed to parse configuraiton"); + } + return configuration; + } + + public String getClusterConfiguration() { + return null; + } + + + @Override + public void createCluster(String configuration) throws Exception { + + JsonObject jsonConfiguration = parseConfiguration(configuration); + + initStackHandlers(jsonConfiguration); + + Boolean updateStacksIfNotMatching = false; + if(jsonConfiguration!=null) + updateStacksIfNotMatching = true; + + + long started = new Date().getTime(); + + awsController.createBucket(bucketName); + + clusterReady = false; + awsController.createStacks(stackList, updateStacksIfNotMatching); + clusterReady = true; + clusterCreated = started; + } + +// public void createManagers(String configuration) throws Exception { +// +// awsController.createBucket(bucketName); +// +// ArrayList> managerStackList = new ArrayList>() {{ +// add(Arrays.asList(new AbstractStackHandler[]{securityGroupsStackHandler, keysManagementStackHandler})); +// add(Arrays.asList(new AbstractStackHandler[]{swarmManagerStackHandler})); +// +// }}; +// +// awsController.createStacks(managerStackList); +// } +// +// public void createWorkers(String configuration) throws Exception { +// ArrayList> workersStackList = new ArrayList>() {{ +// add(Arrays.asList(new AbstractStackHandler[]{swarmBenchmarkWorkersStackHandler, swarmSystemWorkersStackHandler})); +// }}; +// awsController.createStacks(workersStackList); +// } + + @Override + public void deleteCluster() throws Exception { + + //deleteSwarmCluster(); + //LOGGER.warn("Deletion of vpc, bastion, nat is disabled"); + awsController.deleteStacks(stackList); + + clusterCreated = 0; + clusterReady=false; + } + +// public void deleteSwarmCluster() throws Exception { +// List> stackList = new ArrayList>() {{ +// add(Arrays.asList(new AbstractStackHandler[]{securityGroupsStackHandler, keysManagementStackHandler })); +// add(Arrays.asList(new AbstractStackHandler[]{swarmManagerStackHandler, swarmBenchmarkWorkersStackHandler, swarmSystemWorkersStackHandler })); +// }}; +// awsController.deleteStacks(stackList); +// } + +// @Override +// public void deleteManagers() throws Exception { +// awsController.deleteStacks(managerStackList); +// } +// +// public void deleteWorkers() throws Exception{ +// awsController.deleteStacks(workersStackList); +// } + +} diff --git a/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/DockerSwarmStackHandler.java b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/DockerSwarmStackHandler.java new file mode 100644 index 00000000..78b26504 --- /dev/null +++ b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/DockerSwarmStackHandler.java @@ -0,0 +1,20 @@ +package org.hobbit.controller.cloud.aws.swarm.handlers; + +import org.hobbit.awscontroller.StackHandlers.SSHDependentStackHandler; + +/** + * @author Pavel Smirnov. (psmirnov@agtinternational.com / smirnp@gmail.com) + */ + +public class DockerSwarmStackHandler extends SSHDependentStackHandler { + + public DockerSwarmStackHandler(SwarmClusterStackHandler.Builder builder) { + super(builder); + parameters.put("ParentSecurityGroupsStack", builder.securityGroupsStackName); + parameters.put("ParentKeysManagementStack", builder.keysManagementStackName); + parameters.put("DockerVersion", "17.12.1"); + parameters.put("BucketName", builder.bucketName); + + } + +} \ No newline at end of file diff --git a/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/KeysManagementStackHandler.java b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/KeysManagementStackHandler.java new file mode 100644 index 00000000..ad5489d2 --- /dev/null +++ b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/KeysManagementStackHandler.java @@ -0,0 +1,20 @@ +package org.hobbit.controller.cloud.aws.swarm.handlers; + +import org.hobbit.awscontroller.StackHandlers.AbstractStackHandler; + +/** + * @author Pavel Smirnov. (psmirnov@agtinternational.com / smirnp@gmail.com) + */ + +public class KeysManagementStackHandler extends AbstractStackHandler { + + public KeysManagementStackHandler(SwarmClusterStackHandler.Builder builder){ + super(builder); + name = builder.keysManagementStackName; + bodyFilePath = "AWS/swarm-mode/kms.yaml"; + } + + + + +} \ No newline at end of file diff --git a/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/SecurityGroupsStackHandler.java b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/SecurityGroupsStackHandler.java new file mode 100644 index 00000000..4978d7e1 --- /dev/null +++ b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/SecurityGroupsStackHandler.java @@ -0,0 +1,22 @@ +package org.hobbit.controller.cloud.aws.swarm.handlers; + +import org.hobbit.awscontroller.StackHandlers.VpcDependentStackHandler; + +/** + * @author Pavel Smirnov. (psmirnov@agtinternational.com / smirnp@gmail.com) + */ + + +public class SecurityGroupsStackHandler extends VpcDependentStackHandler { + + + public SecurityGroupsStackHandler(SwarmClusterStackHandler.Builder builder) { + super(builder); + name = builder.securityGroupsStackName; + bodyFilePath = "AWS/swarm-mode/securitygroups.yaml"; + } + + + + +} diff --git a/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/SwarmClusterStackHandler.java b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/SwarmClusterStackHandler.java new file mode 100644 index 00000000..461e25e4 --- /dev/null +++ b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/SwarmClusterStackHandler.java @@ -0,0 +1,93 @@ +package org.hobbit.controller.cloud.aws.swarm.handlers; + + +import org.hobbit.awscontroller.StackHandlers.SSHDependentStackHandler; +import org.hobbit.controller.cloud.aws.handlers.BasicClusterStackHandler; + +/** + * @author Pavel Smirnov. (psmirnov@agtinternational.com / smirnp@gmail.com) + */ + + +public class SwarmClusterStackHandler extends SSHDependentStackHandler { + + public SwarmClusterStackHandler(SSHDependentStackHandler.Builder builder) { + super(builder); + } + + + //public SwarmClusterStackHandler(Builder builder) { +// super(builder); +// } + + public static class Builder extends SSHDependentStackHandler.Builder + { + + public String bucketName; + public String securityGroupsStackName; + public String keysManagementStackName; + + public String managersStackName; + public String benchmarkWorkersStackName; + public String systemWorkersStackName; + public String workerType; + + public String bastionStackName; + public String natStackName; + + public Builder(BasicClusterStackHandler.Builder parent) { + vpcStackName = parent.getVpcStackName(); + bastionStackName = parent.getNatStackName(); + natStackName = parent.getNatStackName(); + sshKeyName = parent.getSshKeyName(); + } + + public Builder bastionStackName(String value){ + this.bastionStackName = value; + return (Builder) this; + } + + public Builder natStackName (String value){ + this.natStackName = value; + return (Builder) this; + } + + public Builder securityGroupsStackName(String value) { + this.securityGroupsStackName = value; + return this; + } + + public Builder keysManagementStackName(String value) { + this.keysManagementStackName = value; + return this; + } + + public Builder managersStackName(String value) { + this.managersStackName = value; + return this; + } + + public Builder benchmarkWorkersStackName(String value) { + this.benchmarkWorkersStackName = value; + return this; + } + + public Builder systemWorkersStackName(String value) { + this.systemWorkersStackName = value; + return this; + } + + public Builder bucketName(String value) { + this.bucketName = value; + return this; + } + + public Builder workerType(String value) { + this.workerType = value; + return this; + } + + } + + +} diff --git a/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/SwarmManagerStackHandler.java b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/SwarmManagerStackHandler.java new file mode 100644 index 00000000..dea44cdb --- /dev/null +++ b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/SwarmManagerStackHandler.java @@ -0,0 +1,20 @@ +package org.hobbit.controller.cloud.aws.swarm.handlers; + + +/** + * @author Pavel Smirnov. (psmirnov@agtinternational.com / smirnp@gmail.com) + */ + + +public class SwarmManagerStackHandler extends DockerSwarmStackHandler { + + public SwarmManagerStackHandler(SwarmClusterStackHandler.Builder builder) { + super(builder); + name = builder.managersStackName; + bodyFilePath = "AWS/swarm-mode/manager.yaml"; + } + + + +} + diff --git a/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/SwarmWorkerStackHandler.java b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/SwarmWorkerStackHandler.java new file mode 100644 index 00000000..08bb00f5 --- /dev/null +++ b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/handlers/SwarmWorkerStackHandler.java @@ -0,0 +1,25 @@ +package org.hobbit.controller.cloud.aws.swarm.handlers; + + +/** + * @author Pavel Smirnov. (psmirnov@agtinternational.com / smirnp@gmail.com) + */ + + +public class SwarmWorkerStackHandler extends DockerSwarmStackHandler { + + + public SwarmWorkerStackHandler(SwarmClusterStackHandler.Builder builder) { + super(builder); + + name = (builder.workerType.equals("benchmark")?builder.benchmarkWorkersStackName:builder.systemWorkersStackName); + parameters.put("WorkerType", builder.workerType); + bodyFilePath = "AWS/swarm-mode/worker.yaml"; + } + + + + +} + + diff --git a/platform-controller/src/main/java/org/hobbit/controller/data/ExperimentStatus.java b/platform-controller/src/main/java/org/hobbit/controller/data/ExperimentStatus.java index b9c2c35c..77ebfc54 100644 --- a/platform-controller/src/main/java/org/hobbit/controller/data/ExperimentStatus.java +++ b/platform-controller/src/main/java/org/hobbit/controller/data/ExperimentStatus.java @@ -57,6 +57,7 @@ public class ExperimentStatus implements Closeable { * */ public static enum States { + CLOUD_RESOURCES_PREPARATION("The platform is preparing cloud resources for the experiment."), /** * The platform is still preparing the experiment, e.g., pulling Docker images. */ @@ -142,7 +143,7 @@ private States(String description) { * experiment URI and the current system time as start time. * * @param config - * the configuration of the experiment + * the configurationString of the experiment * @param experimentUri * the URI of the experiment */ @@ -157,7 +158,7 @@ public ExperimentStatus(ExperimentConfiguration config, String experimentUri) { * manager which will be used to abort the experiment if the time is exceeded. * * @param config - * the configuration of the experiment + * the configurationString of the experiment * @param experimentUri * the URI of the experiment * @param startTimeStamp @@ -175,7 +176,7 @@ public ExperimentStatus(ExperimentConfiguration config, String experimentUri, lo * exceeded. * * @param config - * the configuration of the experiment + * the configurationString of the experiment * @param experimentUri * the URI of the experiment * @param manager @@ -197,7 +198,7 @@ public ExperimentStatus(ExperimentConfiguration config, String experimentUri, Ex * manager which will be used to abort the experiment if the time is exceeded. * * @param config - * the configuration of the experiment + * the configurationString of the experiment * @param experimentUri * the URI of the experiment * @param manager diff --git a/platform-controller/src/main/java/org/hobbit/controller/docker/CloudClusterManager.java b/platform-controller/src/main/java/org/hobbit/controller/docker/CloudClusterManager.java new file mode 100644 index 00000000..7a2974e4 --- /dev/null +++ b/platform-controller/src/main/java/org/hobbit/controller/docker/CloudClusterManager.java @@ -0,0 +1,142 @@ +package org.hobbit.controller.docker; + +import com.spotify.docker.client.DockerClient; +import com.spotify.docker.client.exceptions.DockerException; +import org.hobbit.controller.cloud.CloudSshTunnelsProvider; +import org.hobbit.controller.PlatformController; +import org.hobbit.controller.cloud.aws.swarm.SwarmClusterManager; +import org.hobbit.controller.cloud.ClusterManagerProvider; +import org.hobbit.controller.cloud.DockerClientProvider; + +import com.spotify.docker.client.exceptions.DockerCertificateException; +import org.hobbit.controller.queue.ExperimentQueue; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Date; + +/** + * ClusterManager implementation on AWS resources + * + * @author Pavel Smirnov (smirnp@gmail.com) + * + */ +public class CloudClusterManager extends ClusterManagerImpl { + public static final String CLOUD_EXPIRE_TIMEOUT_MIN_KEY ="CLOUD_EXPIRE_TIMEOUT_MIN"; + public static final String CLOUD_SSH_KEY_NAME_KEY ="CLOUD_SSH_KEY_NAME"; + public static final String CLOUD_SSH_KEY_FILE_PATH_KEY ="CLOUD_SSH_KEY_FILE_PATH"; + + private static final Logger LOGGER = LoggerFactory.getLogger(CloudClusterManager.class); + + private SwarmClusterManager swarmClusterManager; + private CloudSshTunnelsProvider commonSshTunnelsProvider; + + private long clusterDeletionTime=0; + private boolean clusterDeleted; + private boolean creationRequested; + private long prevDelta; + private int clusterExpireTimeoutMin = 30; + + + public CloudClusterManager() throws DockerCertificateException{ + super(); + swarmClusterManager = ClusterManagerProvider.getManager(); + if(System.getenv().containsKey(CLOUD_EXPIRE_TIMEOUT_MIN_KEY)) + clusterExpireTimeoutMin = Integer.parseInt(System.getenv().get(CLOUD_EXPIRE_TIMEOUT_MIN_KEY)); + + } + + public CloudClusterManager(PlatformController platformController) throws DockerCertificateException{ + this(); + commonSshTunnelsProvider = new CloudSshTunnelsProvider(platformController); + } + + @Override + public boolean isClusterHealthy() throws DockerException, InterruptedException { + try { + if(swarmClusterManager.getManagerNodes().size()==0) + return false; + } catch (Exception e) { + LOGGER.error("Cannot get manager nodes: {}", e.getLocalizedMessage()); + return false; + } + + return super.isClusterHealthy(); + } + + @Override + public DockerClient getDockerClient(){ + return DockerClientProvider.getDockerClient(); + } + + public SwarmClusterManager getSwarmClusterManager() { + return swarmClusterManager; + } + + @Override + public Integer getNumberOfNodes() throws DockerException, InterruptedException { + //final Info info = getClusterInfo(); + //return info.swarm().nodes(); + try { + if(swarmClusterManager.getManagerNodes().size()==0) + return 0; + } catch (Exception e) { + LOGGER.error("Cannot get manager nodes: {}", e.getLocalizedMessage()); + return 0; + } + return super.getNumberOfNodes(); + } + + public void createCluster(String clusterConfiguration) throws Exception { + //if(!creationRequested){ + // creationRequested = true; + swarmClusterManager.createCluster(clusterConfiguration); + + if(!commonSshTunnelsProvider.isConnected()) { + if(!commonSshTunnelsProvider.isProviderInitialized()) + commonSshTunnelsProvider.initSshProvider(); + commonSshTunnelsProvider.newSshTunnel(null, null); + } + + creationRequested = false; + //} + + } + + public String getClusterConfiguration(){ + return swarmClusterManager.getClusterConfiguration(); + } + + public void reactOnQueue(ExperimentQueue queue){ + + if(queue.listAll().size()==0){ + if(clusterExpireTimeoutMin>0) + if(!clusterDeleted){ + if(clusterDeletionTime==0) + clusterDeletionTime = new Date().getTime() + clusterExpireTimeoutMin * 60 * 1000; + else{ + long delta = ((clusterDeletionTime - new Date().getTime())/60000)+1; + if(prevDelta!=delta) + LOGGER.info("The queue is empty. Cluster (if exists) will be deleted in {} minutes", delta); + prevDelta = delta; + + if(delta<=0){ + LOGGER.info("Deleting cluster"); + try { + swarmClusterManager.deleteCluster(); + clusterDeletionTime=0; + clusterDeleted = true; + } catch (Exception e) { + LOGGER.error("Failed to delete cluster: {}", e.getLocalizedMessage()); + e.printStackTrace(); + } + } + + } + } + }else{ + clusterDeleted=false; + clusterDeletionTime=0; + } + } +} diff --git a/platform-controller/src/main/java/org/hobbit/controller/docker/CloudContainerManager.java b/platform-controller/src/main/java/org/hobbit/controller/docker/CloudContainerManager.java new file mode 100644 index 00000000..a854e417 --- /dev/null +++ b/platform-controller/src/main/java/org/hobbit/controller/docker/CloudContainerManager.java @@ -0,0 +1,75 @@ +package org.hobbit.controller.docker; + +import com.spotify.docker.client.DockerClient; +import com.spotify.docker.client.messages.ExecCreation; +import com.spotify.docker.client.messages.swarm.Node; +import com.spotify.docker.client.messages.swarm.Task; +import org.hobbit.controller.cloud.CloudSshTunnelsProvider; +import org.hobbit.controller.cloud.DockerClientProvider; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.concurrent.Semaphore; +import java.util.function.Function; + +/** + * @author Pavel Smirnov. (psmirnov@agtinternational.com / smirnp@gmail.com) + */ + +public class CloudContainerManager extends ContainerManagerImpl { + private static final Logger LOGGER = LoggerFactory.getLogger(CloudClusterManager.class); + + + public CloudContainerManager(ClusterManager clusterManager) throws Exception { + super(clusterManager); + + } + + @Override + public DockerClient getDockerClient(){ + return DockerClientProvider.getDockerClient(); + } + + + @Override + public String getContainerId(String name){ + if(getDockerClient()==null) + return null; + return super.getContainerId(name); + } + + @Override + public boolean execAsyncCommand(String containerName, String[] command){ + boolean ret = false; + Semaphore initFinishedMutex = new Semaphore(0); + try { + + String taskId = containerToTaskMapping.get(containerName); + Task task = inspectTask(taskId); + String containerId = task.status().containerStatus().containerId().substring(0,12); + + Node node = getDockerClient().listNodes(Node.Criteria.builder().nodeId(task.nodeId()).build()).get(0); + String nodeIp = node.status().addr(); + + CloudSshTunnelsProvider.execAsyncCommand(nodeIp, new Function() { + @Override + public String apply(DockerClient dockerClient) { + try { + ExecCreation execCreation = dockerClient.execCreate(containerId, command, DockerClient.ExecCreateParam.detach()); + dockerClient.execStart(execCreation.id()); + initFinishedMutex.release(); + } catch (Exception e) { + LOGGER.error("Failed to execute the command: {}", e.getLocalizedMessage()); + } + + return null; + } + }); + initFinishedMutex.acquire(); + ret = true; + } catch (Exception e) { + LOGGER.error("Failed to execute the command: {}", e.getLocalizedMessage()); + } + return ret; + } +} diff --git a/platform-controller/src/main/java/org/hobbit/controller/docker/ClusterManager.java b/platform-controller/src/main/java/org/hobbit/controller/docker/ClusterManager.java index ae7af456..56497bfb 100644 --- a/platform-controller/src/main/java/org/hobbit/controller/docker/ClusterManager.java +++ b/platform-controller/src/main/java/org/hobbit/controller/docker/ClusterManager.java @@ -23,16 +23,7 @@ public interface ClusterManager { * * @return number of nodes */ - public int getNumberOfNodes() throws DockerException, InterruptedException; - - /** - * Get number of nodes in the cluster - * - * @param label - * the label to filter nodes - * @return number of nodes with the specified label - */ - public int getNumberOfNodes(String label) throws DockerException, InterruptedException; + public Integer getNumberOfNodes() throws DockerException, InterruptedException; /** * Get the health status of the cluster @@ -47,7 +38,7 @@ public interface ClusterManager { * * @return expected number of nodes */ - public int getExpectedNumberOfNodes(); + public Integer getExpectedNumberOfNodes(); /** * Set task history limit for the swarm cluster @@ -59,8 +50,8 @@ public interface ClusterManager { /** * Get task history limit for the swarm cluster * - * @return int (task history limit) + * @return Integer (task history limit) */ - public int getTaskHistoryLimit() throws DockerException, InterruptedException; + public Integer getTaskHistoryLimit() throws DockerException, InterruptedException; } diff --git a/platform-controller/src/main/java/org/hobbit/controller/docker/ClusterManagerImpl.java b/platform-controller/src/main/java/org/hobbit/controller/docker/ClusterManagerImpl.java index 5cbae11c..fd74e1d4 100644 --- a/platform-controller/src/main/java/org/hobbit/controller/docker/ClusterManagerImpl.java +++ b/platform-controller/src/main/java/org/hobbit/controller/docker/ClusterManagerImpl.java @@ -7,10 +7,12 @@ import com.spotify.docker.client.exceptions.DockerCertificateException; import com.spotify.docker.client.exceptions.DockerException; import com.spotify.docker.client.messages.Info; -import com.spotify.docker.client.messages.swarm.Node; import com.spotify.docker.client.messages.swarm.OrchestrationConfig; import com.spotify.docker.client.messages.swarm.SwarmSpec; import com.spotify.docker.client.messages.swarm.Version; +import org.hobbit.controller.ExperimentManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * ClusterManager implementation @@ -24,12 +26,12 @@ public class ClusterManagerImpl implements ClusterManager { /** * Docker client instance */ - private DockerClient dockerClient; + //private DockerClient dockerClient; private Integer expectedNumberOfNodes = 0; private String SWARM_NODE_NUMBER = null; public ClusterManagerImpl() throws DockerCertificateException { - dockerClient = DockerUtility.getDockerClient(); + SWARM_NODE_NUMBER = System.getenv("SWARM_NODE_NUMBER"); if(SWARM_NODE_NUMBER == null) { expectedNumberOfNodes = 1; @@ -38,31 +40,25 @@ public ClusterManagerImpl() throws DockerCertificateException { } } - public Info getClusterInfo() throws DockerException, InterruptedException { - return dockerClient.info(); + public DockerClient getDockerClient(){ + DockerClient dockerClient = null; + try { + dockerClient = DockerUtility.getDockerClient(); + } catch (DockerCertificateException e) { + LOGGER.error(e.getMessage()); + } + return dockerClient; } - public int getNumberOfNodes() throws DockerException, InterruptedException { - final Info info = getClusterInfo(); - return info.swarm().nodes(); + public Info getClusterInfo() throws DockerException, InterruptedException { + return getDockerClient().info(); } - public int getNumberOfNodes(String label) throws DockerException, InterruptedException { - /* - // doesn't work - Node.Criteria criteria = Node.Criteria.builder().label(label).build(); - return dockerClient.listNodes(criteria).size(); - */ - final String[] parts = label.split("="); - int number = 0; - for (Node node : dockerClient.listNodes()) { - if (node.spec().labels().containsKey(parts[0])) { - if (parts.length == 1 || node.spec().labels().get(parts[0]).equals(parts[1])) { - number++; - } - } - } - return number; + public Integer getNumberOfNodes() throws DockerException, InterruptedException { + //final Info info = getClusterInfo(); + //return info.swarm().nodes(); + String test="123"; + return (int)getDockerClient().listNodes().stream().filter(n->n.status().state().equals("ready")).count(); } public boolean isClusterHealthy() throws DockerException, InterruptedException { @@ -74,7 +70,7 @@ public boolean isClusterHealthy() throws DockerException, InterruptedException { return false; } - public int getExpectedNumberOfNodes() { + public Integer getExpectedNumberOfNodes() { return expectedNumberOfNodes; } @@ -82,7 +78,7 @@ public void setTaskHistoryLimit(Integer taskHistoryLimit) throws DockerException OrchestrationConfig orchestrationConfig = OrchestrationConfig.builder() .taskHistoryRetentionLimit(0) .build(); - SwarmSpec currentSwarmSpec = dockerClient.inspectSwarm().swarmSpec(); + SwarmSpec currentSwarmSpec = getDockerClient().inspectSwarm().swarmSpec(); SwarmSpec updatedSwarmSpec = SwarmSpec.builder() .orchestration(orchestrationConfig) .caConfig(currentSwarmSpec.caConfig()) @@ -93,12 +89,12 @@ public void setTaskHistoryLimit(Integer taskHistoryLimit) throws DockerException .raft(currentSwarmSpec.raft()) .taskDefaults(currentSwarmSpec.taskDefaults()) .build(); - Version swarmVersion = dockerClient.inspectSwarm().version(); - dockerClient.updateSwarm(swarmVersion.index(), updatedSwarmSpec); + Version swarmVersion = getDockerClient().inspectSwarm().version(); + getDockerClient().updateSwarm(swarmVersion.index(), updatedSwarmSpec); } - public int getTaskHistoryLimit() throws DockerException, InterruptedException { - SwarmSpec currentSwarmSpec = dockerClient.inspectSwarm().swarmSpec(); + public Integer getTaskHistoryLimit() throws DockerException, InterruptedException { + SwarmSpec currentSwarmSpec = getDockerClient().inspectSwarm().swarmSpec(); return currentSwarmSpec.orchestration().taskHistoryRetentionLimit(); } } diff --git a/platform-controller/src/main/java/org/hobbit/controller/docker/ContainerManager.java b/platform-controller/src/main/java/org/hobbit/controller/docker/ContainerManager.java index 61c785e0..7201a838 100644 --- a/platform-controller/src/main/java/org/hobbit/controller/docker/ContainerManager.java +++ b/platform-controller/src/main/java/org/hobbit/controller/docker/ContainerManager.java @@ -18,8 +18,11 @@ import java.util.List; +import com.spotify.docker.client.DockerClient; +import com.spotify.docker.client.LogStream; import com.spotify.docker.client.exceptions.DockerException; import com.spotify.docker.client.messages.ContainerStats; +import com.spotify.docker.client.messages.swarm.Service; import com.spotify.docker.client.messages.swarm.Task; /** @@ -120,7 +123,7 @@ public interface ContainerManager { * @return container Id or null if an error occurred. */ public String startContainer(String imageName, String containerType, String parentId, String[] env, - String[] command); + String[] command, String[] volumePaths); /** * Starts the container with the given image name. @@ -143,6 +146,26 @@ public String startContainer(String imageName, String containerType, String pare public String startContainer(String imageName, String containerType, String parentId, String[] env, String[] command, String experimentId); + /** + * Starts the container with the given image name. + * + * @param imageName + * name of the image to be started + * @param containerType + * type to be assigned to container + * @param parentId + * id of the parent container + * @param env + * environment variables of the schema "key=value" + * @param command + * commands that should be executed + * @param experimentId + * experimentId to add to GELF tag + * + * @return container Id or null if an error occurred. + */ + public String startContainer(String imageName, String containerType, String parentId, String[] env, + String[] command, String experimentId, String[] volumePaths); /** * Stops the container with the given container Id. * @@ -244,4 +267,13 @@ public default List getContainers() { * container can not be found or an error occurs. */ public ContainerStats getStats(String containerId); + + public List listServices(); + + public Task inspectTask(String taskId); + + public LogStream serviceLogs(String serviceId, DockerClient.LogsParam... params); + + public boolean execAsyncCommand(String containerId, String[] command); + } diff --git a/platform-controller/src/main/java/org/hobbit/controller/docker/ContainerManagerImpl.java b/platform-controller/src/main/java/org/hobbit/controller/docker/ContainerManagerImpl.java index 516da5db..ac774d92 100644 --- a/platform-controller/src/main/java/org/hobbit/controller/docker/ContainerManagerImpl.java +++ b/platform-controller/src/main/java/org/hobbit/controller/docker/ContainerManagerImpl.java @@ -29,7 +29,13 @@ import java.util.regex.Pattern; import java.util.stream.Collectors; +import com.spotify.docker.client.LogStream; +import com.spotify.docker.client.messages.*; +import com.spotify.docker.client.messages.Network; +import com.spotify.docker.client.messages.mount.Mount; +import com.spotify.docker.client.messages.swarm.*; import org.hobbit.controller.gitlab.GitlabControllerImpl; +import org.hobbit.controller.utils.ServiceLogsReader; import org.hobbit.controller.utils.Waiting; import org.hobbit.core.Constants; import org.slf4j.Logger; @@ -40,21 +46,6 @@ import com.spotify.docker.client.exceptions.DockerException; import com.spotify.docker.client.exceptions.ServiceNotFoundException; import com.spotify.docker.client.exceptions.TaskNotFoundException; -import com.spotify.docker.client.messages.ContainerStats; -import com.spotify.docker.client.messages.Network; -import com.spotify.docker.client.messages.NetworkConfig; -import com.spotify.docker.client.messages.RegistryAuth; -import com.spotify.docker.client.messages.ServiceCreateResponse; -import com.spotify.docker.client.messages.swarm.ContainerSpec; -import com.spotify.docker.client.messages.swarm.Driver; -import com.spotify.docker.client.messages.swarm.NetworkAttachmentConfig; -import com.spotify.docker.client.messages.swarm.Placement; -import com.spotify.docker.client.messages.swarm.RestartPolicy; -import com.spotify.docker.client.messages.swarm.ServiceMode; -import com.spotify.docker.client.messages.swarm.ServiceSpec; -import com.spotify.docker.client.messages.swarm.Task; -import com.spotify.docker.client.messages.swarm.TaskSpec; -import com.spotify.docker.client.messages.swarm.TaskStatus; /** * Created by Timofey Ermilov on 31/08/16 @@ -73,6 +64,8 @@ public class ContainerManagerImpl implements ContainerManager { public static final String USER_PASSWORD_KEY = GitlabControllerImpl.GITLAB_TOKEN_KEY; public static final String REGISTRY_URL_KEY = "REGISTRY_URL"; + + public static final String ENABLE_VOLUMES_FOR_SYSTEM_CONTAINERS_KEY ="ENABLE_VOLUMES_FOR_SYSTEM_CONTAINERS"; private static final String DEPLOY_ENV = System.getenv().containsKey(DEPLOY_ENV_KEY) ? System.getenv().get(DEPLOY_ENV_KEY) : "production"; @@ -87,7 +80,7 @@ public class ContainerManagerImpl implements ContainerManager { private static final long DOCKER_POLL_INTERVAL = 100; private static final long DOCKER_IMAGE_PULL_MAX_WAITING_TIME = 1200000; // 20 min - + protected ClusterManager clusterManager; /** * Default network for new containers */ @@ -117,13 +110,13 @@ public class ContainerManagerImpl implements ContainerManager { /** * Docker client instance */ - private DockerClient dockerClient; + //private DockerClient dockerClient; /** - * Authentication configuration for accessing private repositories. + * Authentication configurationString for accessing private repositories. */ private final RegistryAuth gitlabAuth; /** - * Empty authentication configuration. Docker client's createService() uses + * Empty authentication configurationString. Docker client's createService() uses * ConfigFileRegistryAuthSupplier by default (if auth is omitted) and warns * about not being able to use it with swarm each time. */ @@ -132,16 +125,28 @@ public class ContainerManagerImpl implements ContainerManager { * Observers that should be notified if a container terminates. */ private List containerObservers = new ArrayList<>(); + protected Map containerToTaskMapping = new HashMap<>(); + private String gelfAddress = null; private String experimentId = null; + public DockerClient getDockerClient(){ + DockerClient dockerClient = null; + try { + dockerClient = DockerUtility.getDockerClient(); + } catch (DockerCertificateException e) { + LOGGER.error("Could init docker client: {}", e.getMessage()); + } + return dockerClient; + } + /** * Constructor that creates new docker client instance */ - public ContainerManagerImpl() throws Exception { + public ContainerManagerImpl(ClusterManager clusterManager) throws Exception { LOGGER.info("Deployed as \"{}\".", DEPLOY_ENV); - dockerClient = DockerUtility.getDockerClient(); + //dockerClient = DockerUtility.getDockerClient(); String username = System.getenv(USER_NAME_KEY); String email = System.getenv(USER_EMAIL_KEY); @@ -163,22 +168,45 @@ public ContainerManagerImpl() throws Exception { "Didn't find a gelf address ({}). Containers created by this platform will use the default logging.", LOGGING_GELF_ADDRESS_KEY); } + + this.clusterManager = clusterManager; +// try { +// this.clusterManager = new CloudClusterManager(); +// //clusterManager = new ClusterManagerImpl(); +// } catch (DockerCertificateException e) { +// LOGGER.error("Could not initialize Cluster Manager, will use container placement constraints by default. ", +// e); +// } + // try to find hobbit network in existing ones - List networks = dockerClient.listNetworks(); - String hobbitNetwork = null; - for (Network net : networks) { - if (net.name().equals(HOBBIT_DOCKER_NETWORK)) { - hobbitNetwork = net.id(); - break; + if(getDockerClient()==null) + LOGGER.info("Networks check is skipped"); + else + try { + List networks = getDockerClient().listNetworks(); + String hobbitNetwork = null; + for (Network net : networks) { + if (net.name().equals(HOBBIT_DOCKER_NETWORK)) { + hobbitNetwork = net.id(); + break; + } + } + // if not found - create new one + if (hobbitNetwork == null) { + LOGGER.warn("Could not find hobbit docker network, creating a new one"); + final NetworkConfig networkConfig = NetworkConfig.builder().name(HOBBIT_DOCKER_NETWORK).driver("overlay") + .build(); + getDockerClient().createNetwork(networkConfig); + } } - } - // if not found - create new one - if (hobbitNetwork == null) { - LOGGER.warn("Could not find hobbit docker network, creating a new one"); - final NetworkConfig networkConfig = NetworkConfig.builder().name(HOBBIT_DOCKER_NETWORK).driver("overlay") - .build(); - dockerClient.createNetwork(networkConfig); - } + catch (Exception e){ + LOGGER.error("Could not check networks: {}", e.getLocalizedMessage()); + } + + if(System.getenv().containsKey(ENABLE_VOLUMES_FOR_SYSTEM_CONTAINERS_KEY)) + LOGGER.info("Volumes for containers are enabled"); + else + LOGGER.info("Volumes for containers are disabled"); } /** @@ -218,6 +246,7 @@ private String getInstanceName(String imageName) { * the name of the image that should be pulled */ public void pullImage(String imageName) { + LOGGER.debug("Pulling image {}", imageName); // do not pull if env var is set to false if (!DOCKER_AUTOPULL) { LOGGER.warn("Skipping image pulling because DOCKER_AUTOPULL is unset"); @@ -256,8 +285,7 @@ public void pullImage(String imageName) { ServiceSpec serviceCfg = serviceCfgBuilder.build(); Integer totalNodes; try { - // TODO: use ClusterManager - totalNodes = dockerClient.listNodes().size(); + totalNodes = (int)getDockerClient().listNodes().stream().filter(n->n.status().state().equals("ready")).count(); } catch (Exception e) { LOGGER.error("Couldn't retrieve list of swarm nodes!"); return; @@ -268,10 +296,10 @@ public void pullImage(String imageName) { // the server address of these credentials, we should use them if ((gitlabAuth != null) && (imageName.startsWith(gitlabAuth.serverAddress()))) { // pull image and wait for the pull to finish - resp = dockerClient.createService(serviceCfg, gitlabAuth); + resp = getDockerClient().createService(serviceCfg, gitlabAuth); } else { // pull image and wait for the pull to finish - resp = dockerClient.createService(serviceCfg, nullAuth); + resp = getDockerClient().createService(serviceCfg, nullAuth); } String serviceId = resp.id(); @@ -281,9 +309,9 @@ public void pullImage(String imageName) { // wait for any container of that service to start on each node try { Waiting.waitFor(() -> { - List pullingTasks = dockerClient + List pullingTasks = getDockerClient() .listTasks(Task.Criteria.builder().serviceName(serviceId).build()); - for (Task pullingTask : pullingTasks) { + for (Task pullingTask : pullingTasks){ String state = pullingTask.status().state(); if (!UNFINISHED_TASK_STATES.contains(state)) { if (state.equals(TaskStatus.TASK_STATE_REJECTED)) { @@ -308,7 +336,7 @@ public void pullImage(String imageName) { imageName, e.getLocalizedMessage()); } - dockerClient.removeService(serviceId); + getDockerClient().removeService(serviceId); } catch (Exception e) { LOGGER.error("Exception while pulling the image \"" + imageName + "\".", e); } @@ -328,10 +356,10 @@ public void pullImage(String imageName) { * @param command * (optional) command to be executed with image * - * @return String the container Id or null if an error occurs + * @return String the taskId or null if an error occurs */ - private String createContainer(String imageName, String containerType, String parentId, String[] env, - String[] command) { + protected String createContainer(String imageName, String containerType, String parentId, String[] env, + String[] command, String[] volumePaths){ ServiceSpec.Builder serviceCfgBuilder = ServiceSpec.builder(); TaskSpec.Builder taskCfgBuilder = TaskSpec.builder(); @@ -346,6 +374,30 @@ private String createContainer(String imageName, String containerType, String pa // generate unique container name String containerName = getInstanceName(imageName); cfgBuilder.hostname(containerName); + + if(System.getenv().containsKey(ENABLE_VOLUMES_FOR_SYSTEM_CONTAINERS_KEY)){ + List mounts = new ArrayList<>(); + for (String volume : volumePaths) { + String[] splitted = volume.split(":"); + try { + Mount mount = Mount + .builder() + //.type("bind") + .type("volume") + .source(splitted[0]) + .target(splitted[1]) + .build(); + mounts.add(mount); + } catch (Exception e) { + LOGGER.error("Failed to create volume: {}", e.getLocalizedMessage()); + } + + } + if (mounts.size() > 0) + cfgBuilder.mounts(mounts); + }else + LOGGER.warn("Volumes for system containers are disabled"); + // get parent info List defaultEnv = new ArrayList<>(); defaultEnv.add(Constants.CONTAINER_NAME_KEY + "=" + containerName); @@ -368,26 +420,21 @@ private String createContainer(String imageName, String containerType, String pa } } - int numberOfSwarmNodes = 0; - int numberOfSystemSwarmNodes = 0; - int numberOfBenchmarkSwarmNodes = 0; + // If the parent has "system" --> we do not care what the container + // would like to have OR if there is no parent or the parent is a + // benchmark (in case of the benchmark controller) and the container has + // type "system" + Integer numberOfSwarmNodes = Integer.MAX_VALUE; try { - ClusterManager clusterManager = new ClusterManagerImpl(); numberOfSwarmNodes = clusterManager.getNumberOfNodes(); - numberOfSystemSwarmNodes = clusterManager.getNumberOfNodes("org.hobbit.workergroup=system"); - numberOfBenchmarkSwarmNodes = clusterManager.getNumberOfNodes("org.hobbit.workergroup=benchmark"); - } catch (DockerCertificateException e) { - LOGGER.error("Could not initialize Cluster Manager, will use container placement constraints by default. ", - e); } catch (Exception e) { LOGGER.error("Could not get number of swarm nodes. ", e); } - if (numberOfSwarmNodes > 1) { - // If the parent has "system" --> we do not care what the container - // would like to have OR if there is no parent or the parent is a - // benchmark (in case of the benchmark controller) and the container has - // type "system" + if(numberOfSwarmNodes == 0) { + LOGGER.error("No swarm nodes. Aborting experiment preparation"); + return null; + }else if (numberOfSwarmNodes > 1) { if ((((parentType == null) || Constants.CONTAINER_TYPE_BENCHMARK.equals(parentType)) && Constants.CONTAINER_TYPE_SYSTEM.equals(containerType)) || Constants.CONTAINER_TYPE_SYSTEM.equals(parentType)) { @@ -417,11 +464,6 @@ private String createContainer(String imageName, String containerType, String pa LOGGER.warn("The swarm cluster got only 1 node, I will not use placement constraints."); } - // add hardware information to environment - defaultEnv.add(Constants.HARDWARE_NUMBER_OF_NODES_KEY + "=" + numberOfSwarmNodes); - defaultEnv.add(Constants.HARDWARE_NUMBER_OF_SYSTEM_NODES_KEY + "=" + numberOfSystemSwarmNodes); - defaultEnv.add(Constants.HARDWARE_NUMBER_OF_BENCHMARK_NODES_KEY + "=" + numberOfBenchmarkSwarmNodes); - // create env vars to pass if (env != null) { defaultEnv.addAll(Arrays.asList(env)); @@ -463,17 +505,19 @@ private String createContainer(String imageName, String containerType, String pa serviceCfgBuilder.networks(NetworkAttachmentConfig.builder().target(HOBBIT_DOCKER_NETWORK).build()); serviceCfgBuilder.name(containerName); + serviceCfgBuilder.endpointSpec(EndpointSpec.builder().addPort(PortConfig.builder().build()).build()); + ServiceSpec serviceCfg = serviceCfgBuilder.build(); String serviceId = null; try { - ServiceCreateResponse resp = dockerClient.createService(serviceCfg, nullAuth); + ServiceCreateResponse resp = getDockerClient().createService(serviceCfg, nullAuth); serviceId = resp.id(); final String serviceIdForLambda = serviceId; // wait for a container of that service to start List serviceTasks = new ArrayList(); Waiting.waitFor(() -> { serviceTasks.clear(); - serviceTasks.addAll(dockerClient.listTasks(Task.Criteria.builder().serviceName(serviceIdForLambda).build())); + serviceTasks.addAll(getDockerClient().listTasks(Task.Criteria.builder().serviceName(serviceIdForLambda).build())); if (!serviceTasks.isEmpty()) { TaskStatus status = serviceTasks.get(0).status(); @@ -487,13 +531,20 @@ private String createContainer(String imageName, String containerType, String pa return false; }, DOCKER_POLL_INTERVAL); + +// Task task = serviceTasks.get(0); +// while(task.status()==null){ +// LOGGER.debug("Waiting status for task {}", task.id()); +// Thread.sleep(1000); +// } + String taskId = serviceTasks.get(0).id(); - // return new container id + containerToTaskMapping.put(getContainerName(taskId), taskId); return taskId; } catch (Exception e) { if (serviceId != null) { try { - dockerClient.removeService(serviceId); + getDockerClient().removeService(serviceId); } catch (Exception cleanupE) { LOGGER.error("Couldn't remove service {} which didn't cleanly start", serviceId, cleanupE); } @@ -520,35 +571,46 @@ public String startContainer(String imageName, String type, String parent) { @Override public String startContainer(String imageName, String containerType, String parentId, String[] command) { - return startContainer(imageName, containerType, parentId, null, command); + return startContainer(imageName, containerType, parentId, null, command, new String[]{}); } @Override public String startContainer(String imageName, String containerType, String parentId, String[] env, - String[] command) { - String containerId = createContainer(imageName, containerType, parentId, env, command); + String[] command, String experimentId) { + this.experimentId = experimentId; + return startContainer(imageName, containerType, parentId, env, command, new String[]{}); + } + + @Override + public String startContainer(String imageName, String containerType, String parentId, String[] env, String[] command, String experimentId, String[] volumes) { + this.experimentId = experimentId; + return startContainer(imageName, containerType, parentId, env, command, volumes); + } + + @Override + public String startContainer(String imageName, String containerType, String parentId, String[] env, String[] command, String[] volumes) { + String containerId = createContainer(imageName, containerType, parentId, env, command, volumes); // if the creation was successful if (containerId != null) { for (ContainerStateObserver observer : containerObservers) { - observer.addObservedContainer(containerId); + boolean add = true; + if (observer instanceof ServiceLogsReader) + if(String.join(",", env).contains("skip_logs_reading=1")) + add = false; + if(add) + observer.addObservedContainer(containerId); } + return containerId; } return null; } - @Override - public String startContainer(String imageName, String containerType, String parentId, String[] env, - String[] command, String experimentId) { - this.experimentId = experimentId; - return startContainer(imageName, containerType, parentId, env, command); - } - @Override public void removeContainer(String taskId) { try { - Task taskInfo = dockerClient.inspectTask(taskId); + Task taskInfo = getDockerClient().inspectTask(taskId); String serviceId = taskInfo.serviceId(); Integer exitCode = taskInfo.status().containerStatus().exitCode(); @@ -556,6 +618,7 @@ public void removeContainer(String taskId) { LOGGER.warn("Container for task {} has no exit code, assuming 0", taskId); exitCode = 0; } + if (DEPLOY_ENV.equals(DEPLOY_ENV_DEVELOP)) { LOGGER.info("Will not remove container with task id {}. " + "Development mode is enabled.", taskId); } else if (DEPLOY_ENV.equals(DEPLOY_ENV_TESTING) && (exitCode != 0)) { @@ -565,12 +628,12 @@ public void removeContainer(String taskId) { taskId); } else { LOGGER.info("Removing service of container with task id {}. ", taskId); - dockerClient.removeService(serviceId); + getDockerClient().removeService(serviceId); // wait for the service to disappear Waiting.waitFor(() -> { try { - dockerClient.inspectService(serviceId); + getDockerClient().inspectService(serviceId); return false; } catch (ServiceNotFoundException e) { return true; @@ -578,7 +641,7 @@ public void removeContainer(String taskId) { }, DOCKER_POLL_INTERVAL); } } catch (TaskNotFoundException | ServiceNotFoundException e) { - LOGGER.error("Couldn't remove container {} because it doesn't exist", taskId); + LOGGER.warn("Couldn't remove container {} because it doesn't exist", taskId); } catch (Exception e) { LOGGER.error("Couldn't remove container with task id " + taskId + ".", e); } @@ -606,7 +669,7 @@ public void removeParentAndChildren(String parentId) { // find children try { String label = LABEL_PARENT + "=" + parentId; - List containers = dockerClient.listTasks(Task.Criteria.builder().label(label).build()); + List containers = getDockerClient().listTasks(Task.Criteria.builder().label(label).build()); for (Task c : containers) { if (c != null) { removeParentAndChildren(c.id()); @@ -624,7 +687,7 @@ public Task getContainerInfo(String taskId) throws InterruptedException, DockerE } Task info = null; try { - info = dockerClient.inspectTask(taskId); + info = getDockerClient().inspectTask(taskId); } catch (TaskNotFoundException e) { // return null } @@ -634,7 +697,7 @@ public Task getContainerInfo(String taskId) throws InterruptedException, DockerE @Override public List getContainers(Task.Criteria criteria) { try { - return dockerClient.listTasks(criteria); + return getDockerClient().listTasks(criteria); } catch (Exception e) { return new ArrayList<>(); } @@ -643,8 +706,13 @@ public List getContainers(Task.Criteria criteria) { @Override public String getContainerId(String name) { try { - List serviceTasks = dockerClient.listTasks(Task.Criteria.builder().taskName(name).build()); + List serviceTasks = getDockerClient().listTasks(Task.Criteria.builder().taskName(name).build()); if (!serviceTasks.isEmpty()) { + Task task = serviceTasks.get(0); +// if(task.status()!=null) { +// String ret = task.status().containerStatus().containerId().substring(0,12); +// return ret; +// } return serviceTasks.get(0).id(); } } catch (Exception e) { @@ -654,19 +722,19 @@ public String getContainerId(String name) { } @Override - public String getContainerName(String containerId) { + public String getContainerName(String taskId) { Task response = null; try { - response = getContainerInfo(containerId); + response = getContainerInfo(taskId); } catch (Exception e) { - LOGGER.error("Couldn't retrieve info of container {} to get the name", containerId, e); + LOGGER.error("Couldn't retrieve info of container {} to get the name", taskId, e); } String containerName = null; if (response != null) { try { - containerName = dockerClient.inspectService(response.serviceId()).spec().name(); + containerName = getDockerClient().inspectService(response.serviceId()).spec().name(); } catch (Exception e) { - LOGGER.error("Couldn't inspect docker service {} to get the name", containerId, e); + LOGGER.error("Couldn't inspect docker service {} to get the name", taskId, e); } } return containerName; @@ -692,11 +760,63 @@ public static boolean containsVersionTag(String imageName) { public ContainerStats getStats(String containerId) { ContainerStats stats = null; try { - stats = dockerClient.stats(containerId); + stats = getDockerClient().stats(containerId); } catch (Exception e) { LOGGER.warn("Error while requesting usage stats for {}. Returning null. Error: {}", containerId, e.getLocalizedMessage()); } return stats; } + + @Override + public List listServices() { + try { + return getDockerClient().listServices(); + } + catch (Exception e){ + LOGGER.error("Could not list services: {}", e.getLocalizedMessage()); + return null; + } + + } + + @Override + public Task inspectTask(String taskId) { + try { + return getDockerClient().inspectTask(taskId); + } + catch (Exception e){ + LOGGER.error("Could not inspect task: {}", e.getLocalizedMessage()); + return null; + } + } + + @Override + public LogStream serviceLogs(String serviceId, DockerClient.LogsParam... params) { + try { + return getDockerClient().serviceLogs(serviceId, params); + } + catch (Exception e){ + LOGGER.warn("Could not get service logs: {}", e.getLocalizedMessage()); + return null; + } + } + + @Override + public boolean execAsyncCommand(String containerName, String[] command){ + try { + String taskId = containerToTaskMapping.get(containerName); + Task task = inspectTask(taskId); + String containerId = task.status().containerStatus().containerId().substring(0,12); + + ExecCreation execCreation = getDockerClient().execCreate(containerId, command, DockerClient.ExecCreateParam.detach()); + getDockerClient().execStart(execCreation.id()); + return true; + //return execOutput; + + } catch (Exception e) { + LOGGER.error("Failed to execute the command: {}", e.getLocalizedMessage()); + } + return false; + } } diff --git a/platform-controller/src/main/java/org/hobbit/controller/docker/DockerUtility.java b/platform-controller/src/main/java/org/hobbit/controller/docker/DockerUtility.java index 2b3ba85c..0eb41603 100644 --- a/platform-controller/src/main/java/org/hobbit/controller/docker/DockerUtility.java +++ b/platform-controller/src/main/java/org/hobbit/controller/docker/DockerUtility.java @@ -11,17 +11,33 @@ protected DockerUtility() { // Exists only to defeat instantiation. } + public static synchronized DockerClient getDockerClient(String host) throws DockerCertificateException { + if(dockerClient != null && dockerClient.getHost()!=host) + disposeDockerClient(); + + dockerClient = initializeDockerClient(host); + + return dockerClient; + } + public static synchronized DockerClient getDockerClient() throws DockerCertificateException { if(dockerClient == null) { - dockerClient = initializeDockerClient(); + dockerClient = initializeDockerClient(null); } return dockerClient; } - public static DockerClient initializeDockerClient() throws DockerCertificateException { + public static DockerClient initializeDockerClient(String host) throws DockerCertificateException { DefaultDockerClient.Builder builder = DefaultDockerClient.fromEnv(); + if(host!=null) + builder.uri(host); builder.connectionPoolSize(5000); builder.connectTimeoutMillis(1000); return builder.build(); } + + public static void disposeDockerClient(){ + dockerClient.close(); + dockerClient = null; + } } diff --git a/platform-controller/src/main/java/org/hobbit/controller/docker/FileBasedImageManager.java b/platform-controller/src/main/java/org/hobbit/controller/docker/FileBasedImageManager.java index d61bab6b..89cf1d80 100644 --- a/platform-controller/src/main/java/org/hobbit/controller/docker/FileBasedImageManager.java +++ b/platform-controller/src/main/java/org/hobbit/controller/docker/FileBasedImageManager.java @@ -35,9 +35,9 @@ public class FileBasedImageManager implements ImageManager { + public static final String FILE_BASED_IMAGE_MANAGER_FOLDER_KEY = "FILE_BASED_IMAGE_MANAGER_FOLDER"; private static final Logger LOGGER = LoggerFactory.getLogger(FileBasedImageManager.class); - - private static final String DEFAULT_DEF_FOLDER = "metadata"; + private static final String DEFAULT_DEF_FOLDER = "config/metadata"; private static final Date DEFAULT_DATE = new Date(0); private final String inputFolder; @@ -49,12 +49,13 @@ public class FileBasedImageManager implements ImageManager { private List systems = Collections.EMPTY_LIST; public FileBasedImageManager() { - this(DEFAULT_DEF_FOLDER); + this((System.getenv().containsKey(FILE_BASED_IMAGE_MANAGER_FOLDER_KEY) ? System.getenv().get(FILE_BASED_IMAGE_MANAGER_FOLDER_KEY) : DEFAULT_DEF_FOLDER)); + } public FileBasedImageManager(String inputFolder) { this.inputFolder = inputFolder; - timer = new Timer(); + this.timer = new Timer(); startFetchingFiles(); } diff --git a/platform-controller/src/main/java/org/hobbit/controller/docker/MountImpl.java b/platform-controller/src/main/java/org/hobbit/controller/docker/MountImpl.java new file mode 100644 index 00000000..b0ccc30a --- /dev/null +++ b/platform-controller/src/main/java/org/hobbit/controller/docker/MountImpl.java @@ -0,0 +1,117 @@ +package org.hobbit.controller.docker; + +import com.spotify.docker.client.messages.mount.BindOptions; +import com.spotify.docker.client.messages.mount.Mount; +import com.spotify.docker.client.messages.mount.TmpfsOptions; +import com.spotify.docker.client.messages.mount.VolumeOptions; + +/** + * @author Pavel Smirnov. (psmirnov@agtinternational.com / smirnp@gmail.com) + */ +public class MountImpl extends Mount { + + + private String type; + private String source; + private String target; + private Boolean readOnly; + private BindOptions bindOptions; + private VolumeOptions volumeOptions; + private TmpfsOptions tmpfsOptions; + +// public MountImpl(Builder builder){ +// type = builder.type; +// source = builder.source; +// target = builder.target; +// readOnly = builder.readOnly; +// bindOptions = builder.bindOptions; +// volumeOptions = builder.volumeOptions; +// tmpfsOptions = builder.tmpfsOptions; +// } + + @Override + public String type() { + return type; + } + + @Override + public String source() { + return source; + } + + @Override + public String target() { + return target; + } + + @Override + public Boolean readOnly() { + return readOnly; + } + + @Override + public BindOptions bindOptions() { + return bindOptions; + } + + @Override + public VolumeOptions volumeOptions() { + return volumeOptions; + } + + @Override + public TmpfsOptions tmpfsOptions() { + return tmpfsOptions; + } + +// public static class Builder { +// private String type; +// private String source; +// private String target; +// private Boolean readOnly; +// private BindOptions bindOptions; +// private VolumeOptions volumeOptions; +// private TmpfsOptions tmpfsOptions; +// +// +// public Builder type(String value) { +// type = value; +// return this; +// } +// +// public Builder source(String value) { +// source=value; +// return this; +// } +// +// public Builder target(String value) { +// target=value; +// return this; +// } +// +// public Builder readOnly(Boolean value) { +// readOnly=value; +// return this; +// } +// +// public Builder bindOptions(BindOptions value) { +// bindOptions=value; +// return this; +// } +// +// public Builder volumeOptions(VolumeOptions value) { +// volumeOptions=value; +// return this; +// } +// +// public Builder tmpfsOptions(TmpfsOptions value){ +// tmpfsOptions=value; +// return this; +// } +// +// public MountImpl build(Builder builder){ +// return new MountImpl(builder); +// } +// +// } +} diff --git a/platform-controller/src/main/java/org/hobbit/controller/docker/ResourceInformationCollector.java b/platform-controller/src/main/java/org/hobbit/controller/docker/ResourceInformationCollector.java index 829af492..fda667bd 100644 --- a/platform-controller/src/main/java/org/hobbit/controller/docker/ResourceInformationCollector.java +++ b/platform-controller/src/main/java/org/hobbit/controller/docker/ResourceInformationCollector.java @@ -74,6 +74,19 @@ public ResourceUsageInformation getSystemUsageInformation() { .label(ContainerManager.LABEL_TYPE + "=" + Constants.CONTAINER_TYPE_SYSTEM).build()); } + public ResourceUsageInformation getBenchmarkUsageInformation() { + return getUsageInformation(Task.Criteria.builder() + .label(ContainerManager.LABEL_TYPE + "=" + Constants.CONTAINER_TYPE_BENCHMARK).build()); + } + + public String getPrometheusHost() { + return prometheusHost; + } + + public String getPrometheusPort() { + return prometheusPort; + } + public ResourceUsageInformation getUsageInformation(Task.Criteria criteria) { List tasks = manager.getContainers(criteria); @@ -143,4 +156,26 @@ private String requestPrometheusValue(String taskId, String metric) throws IOExc } } + private String requestPrometheusRange1(String taskId, String metric) throws IOException, MalformedURLException { + StringBuilder builder = new StringBuilder(); + builder.append("http://").append(prometheusHost).append(':').append(prometheusPort) + .append("/api/v1/query_range?query=") + // append metric + .append(metric) + // append filter + .append("{container_label_com_docker_swarm_task_id=\"").append(taskId).append("\"}"); + URL url = new URL(builder.toString()); + String content = IOUtils.toString(url.openConnection().getInputStream()); + LOGGER.debug("Prometheus response: {}", content); + JsonParser parser = new JsonParser(); + JsonObject root = parser.parse(content).getAsJsonObject(); + JsonArray result = root.get("data").getAsJsonObject().get("result").getAsJsonArray(); + if (result.size() > 0) { + return result.get(0).getAsJsonObject().get("value").getAsJsonArray().get(1).getAsString(); + } else { + LOGGER.warn("Didn't got a result when requesting {} for {}. Returning null", metric, taskId); + return null; + } + } + } diff --git a/platform-controller/src/main/java/org/hobbit/controller/front/FrontEndApiHandler.java b/platform-controller/src/main/java/org/hobbit/controller/front/FrontEndApiHandler.java index 2aea84d4..833dcd02 100644 --- a/platform-controller/src/main/java/org/hobbit/controller/front/FrontEndApiHandler.java +++ b/platform-controller/src/main/java/org/hobbit/controller/front/FrontEndApiHandler.java @@ -101,7 +101,7 @@ public Builder() { * * @param controller * the platform controller that is called if data is incoming - * @return this builder instance + * @return this stacksBuilder instance */ public Builder platformController(PlatformController controller) { this.controller = controller; @@ -113,7 +113,7 @@ public Builder platformController(PlatformController controller) { * * @param queue * the queue that is used to receive data - * @return this builder instance + * @return this stacksBuilder instance */ public Builder queue(RabbitQueue queue) { this.queue = queue; @@ -129,7 +129,7 @@ public Builder queue(RabbitQueue queue) { * the queue factory used to create a queue * @param queueName * the name of the newly created queue - * @return this builder instance + * @return this stacksBuilder instance */ public Builder queue(RabbitQueueFactory factory, String queueName) { this.factory = factory; @@ -144,7 +144,7 @@ public Builder queue(RabbitQueueFactory factory, String queueName) { * @param maxParallelProcessedMsgs * the maximum number of incoming messages that are processed in * parallel - * @return this builder instance + * @return this stacksBuilder instance */ public Builder maxParallelProcessedMsgs(int maxParallelProcessedMsgs) { this.maxParallelProcessedMsgs = maxParallelProcessedMsgs; diff --git a/platform-controller/src/main/java/org/hobbit/controller/gitlab/GitlabControllerImpl.java b/platform-controller/src/main/java/org/hobbit/controller/gitlab/GitlabControllerImpl.java index 1a181f80..555cb2be 100644 --- a/platform-controller/src/main/java/org/hobbit/controller/gitlab/GitlabControllerImpl.java +++ b/platform-controller/src/main/java/org/hobbit/controller/gitlab/GitlabControllerImpl.java @@ -74,9 +74,9 @@ public class GitlabControllerImpl implements GitlabController { private static final int MAX_SIZE_OF_PROJECT_VISIBILITY_CHACHE = 50; private static final int VISIBILITY_CACHE_ELEMENT_LIFETIME_IN_SECS = 30; - protected static final String GITLAB_VISIBILITY_PUBLIC = "public"; - protected static final String GITLAB_VISIBILITY_PROTECTED = "internal"; - protected static final String GITLAB_VISIBILITY_PRIVATE = "private"; + protected static final int GITLAB_VISIBILITY_PUBLIC_ID = 20; + protected static final int GITLAB_VISIBILITY_PROTECTED_ID = 10; + protected static final int GITLAB_VISIBILITY_PRIVATE_ID = 0; // gitlab api private GitlabAPI api; @@ -108,7 +108,7 @@ public GitlabControllerImpl(String token, boolean startFetchingProjects, boolean } api = GitlabAPI.connect(GITLAB_URL, token); timer = new Timer(); - projects = new ArrayList<>(); + //projects = new ArrayList<>(); readyRunnable = new ArrayList<>(); if (useCache) { @@ -260,7 +260,7 @@ public Project gitlabToProject(GitlabProject project) { byte[] systemCfgBytes = api.getRawFileContent(project, b.getCommit().getId(), SYSTEM_CONFIG_FILENAME); systemModel = getCheckedModel(systemCfgBytes, "system", project.getWebUrl()); } catch (Exception e) { - LOGGER.debug("system.ttl configuration file NOT FOUND in {}", project.getWebUrl()); + LOGGER.debug("system.ttl configurationString file NOT FOUND in {}", project.getWebUrl()); } // read benchmark config Model benchmarkModel = null; @@ -268,7 +268,7 @@ public Project gitlabToProject(GitlabProject project) { byte[] benchmarkCfgBytes = api.getRawFileContent(project, b.getCommit().getId(), BENCHMARK_CONFIG_FILENAME); benchmarkModel = getCheckedModel(benchmarkCfgBytes, "benchmark", project.getWebUrl()); } catch (Exception e) { - LOGGER.debug("benchmark.ttl configuration file NOT FOUND in {}", project.getWebUrl()); + LOGGER.debug("benchmark.ttl configurationString file NOT FOUND in {}", project.getWebUrl()); } if ((benchmarkModel != null) || (systemModel != null)) { // get user @@ -281,7 +281,7 @@ public Project gitlabToProject(GitlabProject project) { handleErrorMsg(warning, null, false); } Project p = new Project(benchmarkModel, systemModel, user, project.getNameWithNamespace(), - project.getCreatedAt(), project.getVisibility() == GITLAB_VISIBILITY_PRIVATE); + project.getCreatedAt(), project.getVisibility().equals(GITLAB_VISIBILITY_PRIVATE_ID)); return p; } else { // There is no data which is interesting for us. We can ignore this project. @@ -373,7 +373,7 @@ protected Set getProjectsOfUser(String mail) throws IOException { LOGGER.warn("Couldn't find user with mail \"{}\". returning empty list of projects.", mail); return new TreeSet<>(); } - // List gitProjects = api.getProjectsViaSudo(user); + //List gitProjects = api.getProjectsViaSudo(user); List gitProjects = getProjectsVisibleForUser(user); Set projectNames = new HashSet(); for (GitlabProject p : gitProjects) { diff --git a/platform-controller/src/main/java/org/hobbit/controller/health/ClusterHealthChecker.java b/platform-controller/src/main/java/org/hobbit/controller/health/ClusterHealthChecker.java index 47e3fa43..8d48ffc3 100644 --- a/platform-controller/src/main/java/org/hobbit/controller/health/ClusterHealthChecker.java +++ b/platform-controller/src/main/java/org/hobbit/controller/health/ClusterHealthChecker.java @@ -18,7 +18,7 @@ /** * A class implementing this interface is able to check whether all given nodes - * are part of the Docker Swarm cluster. + * are part of the Docker Swarm interfaces. * * @author Michael Röder (roeder@informatik.uni-leipzig.de) * diff --git a/platform-controller/src/main/java/org/hobbit/controller/health/ClusterHealthCheckerImpl.java b/platform-controller/src/main/java/org/hobbit/controller/health/ClusterHealthCheckerImpl.java index 5ef29e2f..c950bd87 100644 --- a/platform-controller/src/main/java/org/hobbit/controller/health/ClusterHealthCheckerImpl.java +++ b/platform-controller/src/main/java/org/hobbit/controller/health/ClusterHealthCheckerImpl.java @@ -28,7 +28,7 @@ public class ClusterHealthCheckerImpl implements ClusterHealthChecker { public boolean isClusterHealthy(String[] clusterNodes) { // TODO get the list of available nodes from the Docker Swarm service // TODO compare the lists and make sure that all given nodes are - // available in the cluster + // available in the interfaces return false; } diff --git a/platform-controller/src/main/java/org/hobbit/controller/queue/CloudBasedExperimentQueue.java b/platform-controller/src/main/java/org/hobbit/controller/queue/CloudBasedExperimentQueue.java new file mode 100644 index 00000000..d91273a8 --- /dev/null +++ b/platform-controller/src/main/java/org/hobbit/controller/queue/CloudBasedExperimentQueue.java @@ -0,0 +1,35 @@ +package org.hobbit.controller.queue; + +import org.hobbit.controller.ExperimentManager; +import org.hobbit.controller.data.ExperimentConfiguration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * @author Pavel Smirnov. (psmirnov@agtinternational.com / smirnp@gmail.com) + */ +public class CloudBasedExperimentQueue extends ExperimentQueueImpl { + private static final Logger LOGGER = LoggerFactory.getLogger(CloudBasedExperimentQueue.class); + String runningConfig = ""; + + @Override + public ExperimentConfiguration getNextExperiment() { + List all = listAll(); + if(all.size()==0) + runningConfig=""; + else { + LOGGER.info("Experiments in the queue: {}", all.size()); + Map> grouped = all.stream().collect(Collectors.groupingBy(e -> ExperimentManager.getClusterConfiguration(e))); + if (!grouped.containsKey(runningConfig)) + runningConfig = grouped.keySet().iterator().next(); + LOGGER.info("Getting 1/{} experiments with cluster config={}", grouped.get(runningConfig).size(), runningConfig); + return grouped.get(runningConfig).get(0); + } + return null; + } + +} diff --git a/platform-controller/src/main/java/org/hobbit/controller/queue/ExperimentQueue.java b/platform-controller/src/main/java/org/hobbit/controller/queue/ExperimentQueue.java index 89a0df9d..63ae23db 100644 --- a/platform-controller/src/main/java/org/hobbit/controller/queue/ExperimentQueue.java +++ b/platform-controller/src/main/java/org/hobbit/controller/queue/ExperimentQueue.java @@ -60,11 +60,11 @@ public interface ExperimentQueue { public List listAll(); /** - * Retrieves the experiment configuration with the given experiment id. + * Retrieves the experiment configurationString with the given experiment id. * * @param experimentId * the id of the experiment that should be retrieved - * @return the experiment configuration or {@code null} if such an experiment + * @return the experiment configurationString or {@code null} if such an experiment * can not be found */ public ExperimentConfiguration getExperiment(String experimentId); diff --git a/platform-controller/src/main/java/org/hobbit/controller/queue/ExperimentQueueImpl.java b/platform-controller/src/main/java/org/hobbit/controller/queue/ExperimentQueueImpl.java index 47f2c64d..8d62bc1d 100644 --- a/platform-controller/src/main/java/org/hobbit/controller/queue/ExperimentQueueImpl.java +++ b/platform-controller/src/main/java/org/hobbit/controller/queue/ExperimentQueueImpl.java @@ -109,7 +109,7 @@ public ExperimentConfiguration getNextExperiment() { } @Override - public void add(ExperimentConfiguration experiment) { + public void add(ExperimentConfiguration experiment){ Gson gson = new Gson(); String typeKey, queueKey; if ((experiment.challengeUri != null) && (experiment.challengeTaskUri != null)) { diff --git a/platform-controller/src/main/java/org/hobbit/controller/utils/ServiceLogsReader.java b/platform-controller/src/main/java/org/hobbit/controller/utils/ServiceLogsReader.java new file mode 100644 index 00000000..f8234d47 --- /dev/null +++ b/platform-controller/src/main/java/org/hobbit/controller/utils/ServiceLogsReader.java @@ -0,0 +1,186 @@ +package org.hobbit.controller.utils; + +import com.spotify.docker.client.DockerClient; +import com.spotify.docker.client.LogStream; +import com.spotify.docker.client.messages.swarm.Task; +import org.hobbit.controller.docker.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; + + +/** + * @author Pavel Smirnov. (psmirnov@agtinternational.com / smirnp@gmail.com) + */ + + + +public class ServiceLogsReader implements ContainerStateObserver { + private static final Logger LOGGER = LoggerFactory.getLogger(ServiceLogsReader.class); + private Logger logger; + + + private Map displayedLogsLength; + private Map taskServiceMapping; + private Map taskNodeMapping; + private Map serviceImageMapping; + + + + private List monitoredTasks; + private List terminationCallbacks; + private ContainerManager manager; + private int repeatInterval; + private Timer timer; + + public ServiceLogsReader(ContainerManager manager, int repeatInterval) { + this.manager = manager; + this.repeatInterval = repeatInterval; + monitoredTasks = new ArrayList<>(); + timer = new Timer(); + displayedLogsLength = new HashMap<>(); + taskServiceMapping = new HashMap<>(); + serviceImageMapping = new HashMap<>(); + taskNodeMapping = new HashMap<>(); + } + + @Override + public void startObserving() { + timer.scheduleAtFixedRate(new TimerTask() { + @Override + public void run() { + String taskIds[] = null; + // copy the list of containers so that we don't have to care for + // access conflicts with other threads after this point + synchronized (monitoredTasks) { + taskIds = monitoredTasks.toArray(new String[monitoredTasks.size()]); + } + + for (String taskId : taskIds){ + + if(!taskServiceMapping.containsKey(taskId)){ + Task task = manager.inspectTask(taskId); + if(task!=null){ + String serviceId = task.serviceId(); + if (serviceId != null && serviceId.length() > 0){ + taskServiceMapping.put(taskId, serviceId); + taskNodeMapping.put(serviceId, task.nodeId()); + } + } + } + + if(!taskServiceMapping.containsKey(taskId)) + continue; + + String serviceId = taskServiceMapping.get(taskId); + if(!serviceImageMapping.containsKey(serviceId)){ + try { + String imageName = manager.listServices().stream().filter(s -> s.id().equals(serviceId)).findFirst().get().spec().taskTemplate().containerSpec().image(); + String[] splitted = imageName.split("/"); + serviceImageMapping.put(serviceId, splitted[splitted.length - 1]); + } + catch (Exception e){ + logger.warn("Failed to get imageName by service id: {}", e.getLocalizedMessage()); + } + } + + + LogStream logStream = null; + + String logs = ""; + try { + String loggerName = "service."+serviceImageMapping.get(serviceId)+" (id="+serviceId+"_node="+taskNodeMapping.get(serviceId)+")"; + logs=""; + try { + logStream = manager.serviceLogs(serviceId, + DockerClient.LogsParam.stderr(), + DockerClient.LogsParam.stdout() + //DockerClient.LogsParam.since(readLogsSince) + ); + if(logStream==null) { + removedObservedContainer(taskId); + return; + } + logs = logStream.readFully(); + } catch (Exception e) { + LOGGER.warn("No service logs are available {}", loggerName, e); + } finally { + if (logStream != null) { + logStream.close(); + } + } + + int prevLogsLength = (displayedLogsLength.containsKey(serviceId)? displayedLogsLength.get(serviceId):0); + if (logs.length() > prevLogsLength) { + logger = LoggerFactory.getLogger(loggerName); + String logsToPrint = logs.substring(prevLogsLength); + String[] splitted = logsToPrint.split("\n"); + for(String line : splitted){ + System.out.println(serviceImageMapping.get(serviceId)+" (id="+serviceId+"): "+line); + //logger.debug(line); + } + displayedLogsLength.put(serviceId, logs.length()); + } + + } + catch (Exception e){ + LOGGER.error("Failed to process logs for service {}: {}", serviceId, e.getMessage()); + } +// + + } + } + }, repeatInterval, repeatInterval); + } + + @Override + public void stopObserving() { + timer.cancel(); + timer.purge(); + } + + @Override + public void addTerminationCallback(ContainerTerminationCallback callback) { + + } + + @Override + public void removeTerminationCallback(ContainerTerminationCallback callback) { + + } + + @Override + public void addObservedContainer(String taskId) { + synchronized (monitoredTasks) { + + if (monitoredTasks.contains(taskId)){ + + return; + } + // if not - add + monitoredTasks.add(taskId); + } + } + + @Override + public void removedObservedContainer(String taskId) { + synchronized (monitoredTasks) { + if(taskServiceMapping.containsKey(taskId)) { + String serviceId = taskServiceMapping.get(taskId); + serviceImageMapping.remove(serviceId); + if (displayedLogsLength.containsKey(serviceId)) + displayedLogsLength.remove(serviceId); + taskServiceMapping.remove(taskId); + } + monitoredTasks.remove(taskId); + } + } + + @Override + public List getObservedContainers() { + synchronized (monitoredTasks) { + return new ArrayList(monitoredTasks); + } + } +} \ No newline at end of file diff --git a/platform-controller/src/main/resources/AWS/bastion.yaml b/platform-controller/src/main/resources/AWS/bastion.yaml new file mode 100644 index 00000000..cdbfc27d --- /dev/null +++ b/platform-controller/src/main/resources/AWS/bastion.yaml @@ -0,0 +1,684 @@ +--- +# Copyright 2018 widdix GmbH +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +AWSTemplateFormatVersion: '2010-09-09' +Description: 'VPC: highly available SSH bastion host/instance, a cloudonaut.io template' +Metadata: + 'AWS::CloudFormation::Interface': + ParameterGroups: + - Label: + default: 'Parent Stacks' + Parameters: + - ParentVPCStack + - ParentAlertStack + - ParentZoneStack + - Label: + default: 'EC2 Parameters' + Parameters: + - InstanceType + - KeyName + - IAMUserSSHAccess + - SystemsManagerAccess + - LogsRetentionInDays + - SubDomainNameWithDot +Parameters: + ParentVPCStack: + Description: 'Stack name of parent VPC stack based on vpc/vpc-*azs.yaml template.' + Type: String + ParentAlertStack: + Description: 'Optional but recommended stack name of parent alert stack based on operations/alert.yaml template.' + Type: String + Default: '' + ParentZoneStack: + Description: 'Optional stack name of parent zone stack based on vpc/zone-*.yaml template.' + Type: String + Default: '' + KeyName: + Description: 'Optional key pair of the ec2-user to establish a SSH connection to the SSH bastion host/instance.' + Type: String + Default: '' + IAMUserSSHAccess: + Description: 'Synchronize public keys of IAM users to enable personalized SSH access (Doc: https://cloudonaut.io/manage-aws-ec2-ssh-access-with-iam/).' + Type: String + Default: false + AllowedValues: + - true + - false + SystemsManagerAccess: + Description: 'Enable AWS Systems Manager agent and authorization.' + Type: String + Default: true + AllowedValues: + - true + - false + InstanceType: + Description: 'Instance type of the SSH bastion host/instance.' + Type: String + Default: 't2.nano' + LogsRetentionInDays: + Description: 'Specifies the number of days you want to retain log events.' + Type: Number + Default: 14 + AllowedValues: [1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, 3653] + SubDomainNameWithDot: + Description: 'Name that is used to create the DNS entry with trailing dot, e.g. ${SubDomainNameWithDot}${HostedZoneName}. Leave blank for naked (or apex and bare) domain. Requires ParentZoneStack parameter!' + Type: String + Default: 'ssh.' + Tag: + Type: String + Default: 'Hobbit' +Mappings: + RegionMap: + 'ap-south-1': + AMI: 'ami-d783a9b8' + 'eu-west-3': + AMI: 'ami-2cf54551' + 'eu-west-2': + AMI: 'ami-b8b45ddf' + 'eu-west-1': + AMI: 'ami-466768ac' + 'ap-northeast-2': + AMI: 'ami-afd86dc1' + 'ap-northeast-1': + AMI: 'ami-e99f4896' + 'sa-east-1': + AMI: 'ami-6dca9001' + 'ca-central-1': + AMI: 'ami-0ee86a6a' + 'ap-southeast-1': + AMI: 'ami-05868579' + 'ap-southeast-2': + AMI: 'ami-39f8215b' + 'eu-central-1': + AMI: 'ami-7c4f7097' + 'us-east-1': + AMI: 'ami-b70554c8' + 'us-east-2': + AMI: 'ami-8c122be9' + 'us-west-1': + AMI: 'ami-e0ba5c83' + 'us-west-2': + AMI: 'ami-a9d09ed1' +Conditions: + HasKeyName: !Not [!Equals [!Ref KeyName, '']] + HasIAMUserSSHAccess: !Equals [!Ref IAMUserSSHAccess, 'true'] + HasSystemsManagerAccess: !Equals [!Ref SystemsManagerAccess, 'true'] + HasAlertTopic: !Not [!Equals [!Ref ParentAlertStack, '']] + HasZone: !Not [!Equals [!Ref ParentZoneStack, '']] +Resources: + RecordSet: + Condition: HasZone + Type: 'AWS::Route53::RecordSet' + Properties: + HostedZoneId: {'Fn::ImportValue': !Sub '${ParentZoneStack}-HostedZoneId'} + Name: !Sub + - '${SubDomainNameWithDot}${HostedZoneName}' + - SubDomainNameWithDot: !Ref SubDomainNameWithDot + HostedZoneName: {'Fn::ImportValue': !Sub '${ParentZoneStack}-HostedZoneName'} + ResourceRecords: + - !Ref EIP + TTL: '60' + Type: A + Tags: + - Key: Cluster + Value: !Ref Tag + EIP: + Type: 'AWS::EC2::EIP' + Properties: + Domain: vpc + Logs: + Type: 'AWS::Logs::LogGroup' + Properties: + RetentionInDays: !Ref LogsRetentionInDays +# SecurityGroup: +# Type: 'AWS::EC2::SecurityGroup' +# Properties: +# GroupDescription: !Ref 'AWS::StackName' +# SecurityGroupIngress: +# - IpProtocol: tcp +# FromPort: 22 +# ToPort: 22 +# CidrIp: '0.0.0.0/0' +# VpcId: {'Fn::ImportValue': !Sub '${ParentVPCStack}-VPC'} + InstanceProfile: + Type: 'AWS::IAM::InstanceProfile' + Properties: + Path: '/' + Roles: + - !Ref IAMRole + IAMRole: + Type: 'AWS::IAM::Role' + Properties: + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: + - 'ec2.amazonaws.com' + Action: + - 'sts:AssumeRole' + Path: '/' + ManagedPolicyArns: !If [HasSystemsManagerAccess, ['arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM'], []] + Policies: + - PolicyName: 'ec2' + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 'ec2:AssociateAddress' + Resource: + - '*' + - PolicyName: logs + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 'logs:CreateLogGroup' + - 'logs:CreateLogStream' + - 'logs:PutLogEvents' + - 'logs:DescribeLogStreams' + Resource: + - 'arn:aws:logs:*:*:*' + IAMPolicySSHAccess: + Type: 'AWS::IAM::Policy' + Condition: HasIAMUserSSHAccess + Properties: + Roles: + - !Ref IAMRole + PolicyName: iam + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 'iam:ListUsers' + Resource: + - '*' + - Effect: Allow + Action: + - 'iam:ListSSHPublicKeys' + - 'iam:GetSSHPublicKey' + Resource: + - !Sub 'arn:aws:iam::${AWS::AccountId}:user/*' + LaunchConfiguration: + Type: 'AWS::AutoScaling::LaunchConfiguration' + Metadata: + 'AWS::CloudFormation::Init': + configSets: + #default: !If [HasIAMUserSSHAccess, [awslogs, ssh-access, config, configure-ssh, configure-vpn], [awslogs, config, configure-ssh, configure-vpn]] + default: !If [HasIAMUserSSHAccess, [awslogs, ssh-access, config, configure-ssh, configure-vpn], [awslogs, config, configure-ssh, configure-vpn]] + awslogs: + packages: + yum: + awslogs: [] + files: + '/etc/awslogs/awscli.conf': + content: !Sub | + [default] + region = ${AWS::Region} + [plugins] + cwlogs = cwlogs + mode: '000644' + owner: root + group: root + '/etc/awslogs/awslogs.conf': + content: !Sub | + [general] + state_file = /var/lib/awslogs/agent-state + [/var/log/amazon/ssm/amazon-ssm-agent.log] + datetime_format = %Y-%m-%d %H:%M:%S + file = /var/log/amazon/ssm/amazon-ssm-agent.log + log_stream_name = {instance_id}/var/log/amazon/ssm/amazon-ssm-agent.log + log_group_name = ${Logs} + [/var/log/amazon/ssm/errors.log] + datetime_format = %Y-%m-%d %H:%M:%S + file = /var/log/amazon/ssm/errors.log + log_stream_name = {instance_id}/var/log/amazon/ssm/errors.log + log_group_name = ${Logs} + [/var/log/audit/audit.log] + file = /var/log/audit/audit.log + log_stream_name = {instance_id}/var/log/audit/audit.log + log_group_name = ${Logs} + [/var/log/awslogs.log] + datetime_format = %Y-%m-%d %H:%M:%S + file = /var/log/awslogs.log + log_stream_name = {instance_id}/var/log/awslogs.log + log_group_name = ${Logs} + [/var/log/boot.log] + file = /var/log/boot.log + log_stream_name = {instance_id}/var/log/boot.log + log_group_name = ${Logs} + [/var/log/cfn-hup.log] + datetime_format = %Y-%m-%d %H:%M:%S + file = /var/log/cfn-hup.log + log_stream_name = {instance_id}/var/log/cfn-hup.log + log_group_name = ${Logs} + [/var/log/cfn-init-cmd.log] + datetime_format = %Y-%m-%d %H:%M:%S + file = /var/log/cfn-init-cmd.log + log_stream_name = {instance_id}/var/log/cfn-init-cmd.log + log_group_name = ${Logs} + [/var/log/cfn-init.log] + datetime_format = %Y-%m-%d %H:%M:%S + file = /var/log/cfn-init.log + log_stream_name = {instance_id}/var/log/cfn-init.log + log_group_name = ${Logs} + [/var/log/cfn-wire.log] + datetime_format = %Y-%m-%d %H:%M:%S + file = /var/log/cfn-wire.log + log_stream_name = {instance_id}/var/log/cfn-wire.log + log_group_name = ${Logs} + [/var/log/cloud-init-output.log] + file = /var/log/cloud-init-output.log + log_stream_name = {instance_id}/var/log/cloud-init-output.log + log_group_name = ${Logs} + [/var/log/cloud-init.log] + datetime_format = %b %d %H:%M:%S + file = /var/log/cloud-init.log + log_stream_name = {instance_id}/var/log/cloud-init.log + log_group_name = ${Logs} + [/var/log/cron] + datetime_format = %b %d %H:%M:%S + file = /var/log/cron + log_stream_name = {instance_id}/var/log/cron + log_group_name = ${Logs} + [/var/log/dmesg] + file = /var/log/dmesg + log_stream_name = {instance_id}/var/log/dmesg + log_group_name = ${Logs} + [/var/log/grubby_prune_debug] + file = /var/log/grubby_prune_debug + log_stream_name = {instance_id}/var/log/grubby_prune_debug + log_group_name = ${Logs} + [/var/log/maillog] + datetime_format = %b %d %H:%M:%S + file = /var/log/maillog + log_stream_name = {instance_id}/var/log/maillog + log_group_name = ${Logs} + [/var/log/messages] + datetime_format = %b %d %H:%M:%S + file = /var/log/messages + log_stream_name = {instance_id}/var/log/messages + log_group_name = ${Logs} + [/var/log/secure] + datetime_format = %b %d %H:%M:%S + file = /var/log/secure + log_stream_name = {instance_id}/var/log/secure + log_group_name = ${Logs} + [/var/log/yum.log] + datetime_format = %b %d %H:%M:%S + file = /var/log/yum.log + log_stream_name = {instance_id}/var/log/yum.log + log_group_name = ${Logs} + mode: '000644' + owner: root + group: root + services: + sysvinit: + awslogsd: + enabled: true + ensureRunning: true + packages: + yum: + - awslogs + files: + - '/etc/awslogs/awslogs.conf' + - '/etc/awslogs/awscli.conf' + ssh-access: + files: + '/opt/authorized_keys_command.sh': + content: | + #!/bin/bash -e + if [ -z "$1" ]; then + exit 1 + fi + UnsaveUserName="$1" + UnsaveUserName=${UnsaveUserName//".plus."/"+"} + UnsaveUserName=${UnsaveUserName//".equal."/"="} + UnsaveUserName=${UnsaveUserName//".comma."/","} + UnsaveUserName=${UnsaveUserName//".at."/"@"} + aws iam list-ssh-public-keys --user-name "$UnsaveUserName" --query "SSHPublicKeys[?Status == 'Active'].[SSHPublicKeyId]" --output text | while read -r KeyId; do + aws iam get-ssh-public-key --user-name "$UnsaveUserName" --ssh-public-key-id "$KeyId" --encoding SSH --query "SSHPublicKey.SSHPublicKeyBody" --output text + done + mode: '000755' + owner: root + group: root + '/opt/import_users.sh': + content: | + #!/bin/bash -e + aws iam list-users --query "Users[].[UserName]" --output text | while read User; do + SaveUserName="$User" + SaveUserName=${SaveUserName//"+"/".plus."} + SaveUserName=${SaveUserName//"="/".equal."} + SaveUserName=${SaveUserName//","/".comma."} + SaveUserName=${SaveUserName//"@"/".at."} + if [ "${#SaveUserName}" -le "32" ]; then + if ! id -u "$SaveUserName" > /dev/null 2>&1; then + # don't grant sudo rights on bastion host! + /usr/sbin/useradd "$SaveUserName" + fi + else + echo "Can not import IAM user ${SaveUserName}. User name is longer than 32 characters." + fi + done + mode: '000755' + owner: root + group: root + '/etc/cron.d/import_users': + content: | + */10 * * * * root /opt/import_users.sh + mode: '000644' + owner: root + group: root + commands: + 'a_configure_sshd_command': + command: 'sed -i "s:#AuthorizedKeysCommand none:AuthorizedKeysCommand /opt/authorized_keys_command.sh:g" /etc/ssh/sshd_config' + 'b_configure_sshd_commanduser': + command: 'sed -i "s:#AuthorizedKeysCommandUser nobody:AuthorizedKeysCommandUser nobody:g" /etc/ssh/sshd_config' + 'c_import_users': + command: './import_users.sh' + cwd: '/opt' + services: + sysvinit: + sshd: + enabled: true + ensureRunning: true + commands: + - 'a_configure_sshd_command' + - 'b_configure_sshd_commanduser' + config: + packages: + yum: + mariadb: [] + files: + '/etc/cfn/cfn-hup.conf': + content: !Sub | + [main] + stack=${AWS::StackId} + region=${AWS::Region} + interval=1 + mode: '000400' + owner: root + group: root + '/etc/cfn/hooks.d/cfn-auto-reloader.conf': + content: !Sub | + [cfn-auto-reloader-hook] + triggers=post.update + path=Resources.LaunchConfiguration.Metadata.AWS::CloudFormation::Init + action=/opt/aws/bin/cfn-init --verbose --stack=${AWS::StackName} --region=${AWS::Region} --resource=LaunchConfiguration + runas=root + services: + sysvinit: + cfn-hup: + enabled: true + ensureRunning: true + files: + - '/etc/cfn/cfn-hup.conf' + - '/etc/cfn/hooks.d/cfn-auto-reloader.conf' + amazon-ssm-agent: + enabled: !If [HasSystemsManagerAccess, true, false] + ensureRunning: !If [HasSystemsManagerAccess, true, false] + configure-ssh: + packages: + yum: + mariadb: [] + files: + '/etc/cfn/cfn-hup.conf': + content: !Sub | + [main] + stack=${AWS::StackId} + region=${AWS::Region} + interval=1 + mode: '000400' + owner: root + group: root + '/etc/cfn/hooks.d/cfn-auto-reloader.conf': + content: !Sub | + [cfn-auto-reloader-hook] + triggers=post.update + path=Resources.LaunchConfiguration.Metadata.AWS::CloudFormation::Init + action=/opt/aws/bin/cfn-init --verbose --stack=${AWS::StackName} --region=${AWS::Region} --resource=LaunchConfiguration + runas=root + services: + sysvinit: + cfn-hup: + enabled: true + ensureRunning: true + files: + - '/etc/cfn/cfn-hup.conf' + - '/etc/cfn/hooks.d/cfn-auto-reloader.conf' + amazon-ssm-agent: + enabled: !If [HasSystemsManagerAccess, true, false] + ensureRunning: !If [HasSystemsManagerAccess, true, false] + configure-vpn: + packages: + yum: + openvpn: [openswan, xl2tpd] + files: + '/etc/ipsec.conf': + content: !Sub | + version 2.0 + + config setup + dumpdir=/var/run/pluto/ + nat_traversal=yes + virtual_private=%v4:10.0.0.0/8,%v4:192.168.0.0/16,%v4:172.16.0.0/12,%v4:25.0.0.0/8,%v6:fd00::/8,%v6:fe80::/10 + oe=off + protostack=netkey + nhelpers=0 + interfaces=%defaultroute + + conn vpnpsk + auto=add + left=$PRIVATE_IP + leftid=$PUBLIC_IP + leftsubnet=$PRIVATE_IP/32 + leftnexthop=%defaultroute + leftprotoport=17/1701 + rightprotoport=17/%any + right=%any + rightsubnetwithin=0.0.0.0/0 + forceencaps=yes + authby=secret + pfs=no + type=transport + auth=esp + ike=3des-sha1 + phase2alg=3des-sha1 + dpddelay=30 + dpdtimeout=120 + dpdaction=clear + mode: '000400' + owner: root + group: root + + '/etc/ipsec.secrets': + content: !Sub | + $PUBLIC_IP %any : PSK \"$IPSEC_PSK\" + mode: '000400' + owner: root + group: root + + '/etc/xl2tpd/xl2tpd.conf': + content: !Sub | + [global] + port = 1701 + + ;debug avp = yes + ;debug network = yes + ;debug state = yes + ;debug tunnel = yes + + [lns default] + ip range = 192.168.42.10-192.168.42.250 + local ip = 192.168.42.1 + require chap = yes + refuse pap = yes + require authentication = yes + name = l2tpd + ;ppp debug = yes + pppoptfile = /etc/ppp/options.xl2tpd + length bit = yes + mode: '000400' + owner: root + group: root + '/etc/ppp/options.xl2tpd': + content: !Sub | + ipcp-accept-local + ipcp-accept-remote + ms-dns 8.8.8.8 + ms-dns 8.8.4.4 + noccp + auth + crtscts + idle 1800 + mtu 1280 + mru 1280 + lock + connect-delay 5000 + mode: '000400' + owner: root + group: root + '/etc/ppp/chap-secrets': + content: !Sub | + # Secrets for authentication using CHAP + # client\tserver\tsecret\t\t\tIP addresses + + $VPN_USER\tl2tpd $VPN_PASSWORD * + mode: '000400' + owner: root + group: root + + services: + sysvinit: + cfn-hup: + enabled: true + ensureRunning: true + files: + - '/etc/cfn/cfn-hup.conf' + - '/etc/cfn/hooks.d/cfn-auto-reloader.conf' + amazon-ssm-agent: + enabled: !If [HasSystemsManagerAccess, true, false] + ensureRunning: !If [HasSystemsManagerAccess, true, false] + Properties: + AssociatePublicIpAddress: true + EbsOptimized: false + IamInstanceProfile: !Ref InstanceProfile + ImageId: !FindInMap [RegionMap, !Ref 'AWS::Region', AMI] + InstanceType: !Ref InstanceType + SecurityGroups: + - 'Fn::ImportValue': !Sub '${ParentVPCStack}-BastionSecurityGroup' +# - !Ref SecurityGroup + KeyName: !If [HasKeyName, !Ref KeyName, !Ref 'AWS::NoValue'] + UserData: + 'Fn::Base64': !Sub | + #!/bin/bash -xe + echo "Executing user data" >> /home/ec2-user/init.log + + echo "Getting instance id" >> /home/ec2-user/init.log + INSTANCEID=$(curl -s -m 60 http://169.254.169.254/latest/meta-data/instance-id) + echo "Instance id is $INSTANCEID" >> /home/ec2-user/init.log + echo "Associating address with instanceId" >> /home/ec2-user/init.log + echo "aws --region ${AWS::Region} ec2 associate-address --instance-id $INSTANCEID --allocation-id ${EIP.AllocationId}" >> /home/ec2-user/init.log + aws --region ${AWS::Region} ec2 associate-address --instance-id $INSTANCEID --allocation-id ${EIP.AllocationId} + + echo "Executing launch configuration" >> /home/ec2-user/int.log + echo "/opt/aws/bin/cfn-init -v --region ${AWS::Region} --stack ${AWS::StackName} --resource LaunchConfiguration" >> /home/ec2-user/cfn.sh + echo "Sending finish signals" >> /home/ec2-user/init.log + echo "/opt/aws/bin/cfn-signal -e $? --region ${AWS::Region} --stack ${AWS::StackName} --resource AutoScalingGroup" >> /home/ec2-user/cfn.sh + echo "echo 'Signals should be sent' >> /home/ec2-user/init.log" >> /home/ec2-user/cfn.sh + sudo sh /home/ec2-user/cfn.sh >> /home/ec2-user/init.log +# + #!/bin/bash -ex +# trap '/opt/aws/bin/cfn-signal -e 1 --stack ${AWS::StackName} --resource AutoScalingGroup --region ${AWS::Region}' ERR +# +# /opt/aws/bin/cfn-init -v --stack ${AWS::StackName} --resource LaunchConfiguration --region ${AWS::Region} +# /opt/aws/bin/cfn-signal -e 0 --stack ${AWS::StackName} --resource AutoScalingGroup --region ${AWS::Region} + + AutoScalingGroup: + Type: 'AWS::AutoScaling::AutoScalingGroup' + Properties: + DesiredCapacity: '1' + LaunchConfigurationName: !Ref LaunchConfiguration + MaxSize: '1' + MinSize: '1' + Tags: + - Key: Cluster + Value: !Ref Tag + PropagateAtLaunch: true + - Key: Name + Value: !Sub + - 'SSH bastion host/instance ${CidrBlock}' + - CidrBlock: {'Fn::ImportValue': !Sub '${ParentVPCStack}-CidrBlock'} + PropagateAtLaunch: true + NotificationConfigurations: !If + - HasAlertTopic + - - NotificationTypes: + - 'autoscaling:EC2_INSTANCE_LAUNCH_ERROR' + - 'autoscaling:EC2_INSTANCE_TERMINATE_ERROR' + TopicARN: {'Fn::ImportValue': !Sub '${ParentAlertStack}-TopicARN'} + - [] + VPCZoneIdentifier: !Split [',', {'Fn::ImportValue': !Sub '${ParentVPCStack}-SubnetsPublic'}] + CreationPolicy: + ResourceSignal: + Count: 1 + Timeout: PT180M + UpdatePolicy: + AutoScalingRollingUpdate: + PauseTime: PT10M + SuspendProcesses: + - HealthCheck + - ReplaceUnhealthy + - AZRebalance + - AlarmNotification + - ScheduledActions + WaitOnResourceSignals: true + CPUTooHighAlarm: + Condition: HasAlertTopic + Type: 'AWS::CloudWatch::Alarm' + Properties: + AlarmDescription: 'Average CPU utilization over last 10 minutes higher than 80%' + Namespace: 'AWS/EC2' + MetricName: CPUUtilization + Statistic: Average + Period: 600 + EvaluationPeriods: 1 + ComparisonOperator: GreaterThanThreshold + Threshold: 80 + AlarmActions: + - {'Fn::ImportValue': !Sub '${ParentAlertStack}-TopicARN'} + Dimensions: + - Name: AutoScalingGroupName + Value: !Ref AutoScalingGroup + Tags: + - Key: Cluster + Value: !Ref Tag +Outputs: + TemplateID: + Description: 'cloudonaut.io template id.' + Value: 'vpc/vpc-ssh-bastion' + TemplateVersion: + Description: 'cloudonaut.io template version.' + Value: 'latest' + StackName: + Description: 'Stack name.' + Value: !Sub '${AWS::StackName}' + IPAddress: + Description: 'The public IP address of the SSH bastion host/instance.' + Value: !Ref EIP + Export: + Name: !Sub '${AWS::StackName}-IPAddress' \ No newline at end of file diff --git a/platform-controller/src/main/resources/AWS/swarm-mode/kms.yaml b/platform-controller/src/main/resources/AWS/swarm-mode/kms.yaml new file mode 100644 index 00000000..4e040e71 --- /dev/null +++ b/platform-controller/src/main/resources/AWS/swarm-mode/kms.yaml @@ -0,0 +1,76 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: 'Docker Swarm - KMS' + + +Resources: + + SwarmTokenKey: + Type: "AWS::KMS::Key" + Properties: + Description: "KMS key to encrypt swarm join tokens" + KeyPolicy: + Id: key-docker-swarm + Version: '2012-10-17' + Statement: + - Sid: Enable IAM User Permissions + Effect: Allow + Principal: + AWS: + - !Sub arn:aws:iam::${AWS::AccountId}:root + Action: kms:* + Resource: "*" + # - Sid: Allow access for Key Administrators + # Effect: Allow + # Principal: + # AWS: + # - !Sub arn:aws:iam::${AWS::AccountId}:root + # Action: + # - kms:Create* + # - kms:Describe* + # - kms:Enable* + # - kms:List* + # - kms:Put* + # - kms:Update* + # - kms:Revoke* + # - kms:Disable* + # - kms:Get* + # - kms:Delete* + # - kms:TagResource + # - kms:UntagResource + # - kms:ScheduleKeyDeletion + # - kms:CancelKeyDeletion + # Resource: "*" + # - Sid: Allow use of the key + # Effect: Allow + # Principal: + # AWS: + # - !Sub arn:aws:iam::${AWS::AccountId}:root + # Action: + # - kms:Encrypt + # - kms:Decrypt + # - kms:ReEncrypt* + # - kms:GenerateDataKey* + # - kms:DescribeKey + # Resource: "*" + # - Sid: Allow attachment of persistent resources + # Effect: Allow + # Principal: + # AWS: + # - !Sub arn:aws:iam::${AWS::AccountId}:root + # Action: + # - kms:CreateGrant + # - kms:ListGrants + # - kms:RevokeGrant + # Resource: "*" + # Condition: + # Bool: + # kms:GrantIsForAWSResource: true +Outputs: + + SwarmTokenKey: + Value: !Ref SwarmTokenKey + + SwarmTokenKeyArn: + Value: !GetAtt SwarmTokenKey.Arn + Export: + Name: !Sub '${AWS::StackName}-SwarmTokenKeyArn' diff --git a/platform-controller/src/main/resources/AWS/swarm-mode/manager.yaml b/platform-controller/src/main/resources/AWS/swarm-mode/manager.yaml new file mode 100644 index 00000000..c8852b2b --- /dev/null +++ b/platform-controller/src/main/resources/AWS/swarm-mode/manager.yaml @@ -0,0 +1,654 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: 'Docker Swarm - Manager' + + +Parameters: + + ParentVPCStack: + Description: 'Stack name of parent VPC stack based on vpc/vpc-*azs.yaml template.' + Type: String + + KeyName: + Description: 'Optional key pair of the ec2-user to establish a SSH connection to the EC2 instance.' + Type: String + Default: '' + + IAMUserSSHAccess: + Description: 'Synchronize public keys of IAM users to enable personalized SSH access (Doc: https://cloudonaut.io/manage-aws-ec2-ssh-access-with-iam/).' + Type: String + Default: false + AllowedValues: + - true + - false + + InstanceType: + Description: 'The instance type for the EC2 instance.' + Type: String + Default: 't2.micro' + + DesiredCapacity: + Description: 'The number of manager nodes' + Type: Number + Default: 1 + AllowedValues: [1,3,5,7] + + ManagerSubnetsReach: + Description: 'Should the managers have direct access to the Internet or do you prefer private subnets with NAT?' + Type: String + Default: Private + AllowedValues: + - Public + - Private + +# SwarmManagerAutoScalingGroup: +# Description: AutoScaling Group of Swarm managers +# Type: String + +# SecurityGroups: +# Description: Security group for which are allowed to talk to ASG +# Type: CommaDelimitedList + + ParentSecurityGroupsStack: + Description: 'ParentSecurityGroupsStack' + Type: String + + TargetGroups: + Description: Security group for which are allowed to talk to ASG + Type: CommaDelimitedList + Default: '' + + DockerVersion: + Description: 'Specifies the version of the Docker engine' + Type: String + Default: "17.12.1" + + DockerRepository: + Description: 'Specifies if stable or edge repository should be used' + Type: String + Default: stable + AllowedValues: + - stable + - edge + + JoinToken: + Description: 'The token to join the swarm cluster as a manager node' + Type: String + Default: '' + NoEcho: true + +# JoinTokenKmsKey: +# Description: 'KMS key to decrypt swarm join tokens' +# Type: String + + ParentKeysManagementStack: + Description: 'ParentKeysManagementStack' + Type: String + + BucketName: + Description: 'Bucket name for placing join tokens' + Type: String + Default: '' + NoEcho: true + Tag: + Type: String + Default: 'Hobbit' + +# ParentVPCClusterId: +# Description: 'ID of parent VPC cluster based on vpc/vpc-*azs.yaml template.' +# Type: String + +# NatInstanceIP: +# Description: 'Public IP address of VPC NAT to access the S3 bucket' +# Type: String + +Conditions: + + HasKeyName: !Not [!Equals [!Ref KeyName, '']] + HasIAMUserSSHAccess: !Equals [!Ref IAMUserSSHAccess, 'true'] + HasSwarmJoinToken: !Not [!Equals [!Ref JoinToken, '']] + + +Resources: + + InstanceProfile: + Type: 'AWS::IAM::InstanceProfile' + Properties: + Path: '/' + Roles: + - !Ref IAMRole + + IAMRole: + Type: 'AWS::IAM::Role' + Properties: + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: + - 'ec2.amazonaws.com' + Action: + - 'sts:AssumeRole' + Path: '/' + Policies: + - PolicyName: logs + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 'logs:CreateLogGroup' + - 'logs:CreateLogStream' + - 'logs:PutLogEvents' + - 'logs:DescribeLogStreams' + Resource: + - 'arn:aws:logs:*:*:*' + - PolicyName: asg + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 'autoscaling:DescribeAutoScalingGroups' + - 'autoscaling:DescribeAutoScalingInstances' + - 'ec2:DescribeInstances' + Resource: + - '*' + - PolicyName: kms + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 'kms:Decrypt' + - 'kms:DescribeKey' + Resource: + 'Fn::ImportValue': !Sub '${ParentKeysManagementStack}-SwarmTokenKeyArn' + #- !Ref JoinTokenKmsKey + + IAMPolicySSHAccess: + Type: 'AWS::IAM::Policy' + Condition: HasIAMUserSSHAccess + Properties: + Roles: + - !Ref IAMRole + PolicyName: iam + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 'iam:ListUsers' + Resource: + - '*' + - Effect: Allow + Action: + - 'iam:ListSSHPublicKeys' + - 'iam:GetSSHPublicKey' + Resource: + - !Sub 'arn:aws:iam::${AWS::AccountId}:user/*' + + S3Endpoint: + Type: 'AWS::EC2::VPCEndpoint' + Properties: + VpcId: {'Fn::ImportValue': !Sub '${ParentVPCStack}-VPC'} + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - 's3:PutObject' + - 's3:GetObject' + Resource: + - !Sub 'arn:aws:s3:::${BucketName}' + - !Sub 'arn:aws:s3:::${BucketName}/*' + Effect: Allow + Principal: '*' + RouteTableIds: + - {'Fn::ImportValue': !Sub '${ParentVPCStack}-RouteTableAPrivate'} + - {'Fn::ImportValue': !Sub '${ParentVPCStack}-RouteTableAPublic' } + ServiceName: !Sub 'com.amazonaws.${AWS::Region}.s3' + + S3BucketPolicy: + Type: 'AWS::S3::BucketPolicy' + Properties: + Bucket: !Sub '${BucketName}' + PolicyDocument: + Statement: + - Sid: Access-to-specific-VPCE-only + Effect: Allow + Principal: "*" + Action: + - 's3:PutObject' + - 's3:GetObject' + Resource: + - !Sub 'arn:aws:s3:::${BucketName}' + - !Sub 'arn:aws:s3:::${BucketName}/*' + Condition: + StringEquals: + 'aws:sourceVpce': !Ref S3Endpoint + + AutoScalingGroup: + Type: AWS::AutoScaling::AutoScalingGroup + Properties: + #AutoScalingGroupName: !Ref SwarmManagerAutoScalingGroup + # AvailabilityZones: !Ref AvailabilityZones + VPCZoneIdentifier: + - 'Fn::ImportValue': !Sub '${ParentVPCStack}-SubnetA${ManagerSubnetsReach}' + #- 'Fn::ImportValue': !Sub '${ParentVPCStack}-SubnetB${ManagerSubnetsReach}' + #- 'Fn::ImportValue': !Sub '${ParentVPCStack}-SubnetC${ManagerSubnetsReach}' + LaunchConfigurationName: !Ref LaunchConfiguration + MinSize: 0 + MaxSize: !Ref DesiredCapacity + DesiredCapacity: !Ref DesiredCapacity + #TargetGroupARNs: !Ref TargetGroups + MetricsCollection: + - Granularity: 1Minute + Metrics: + - GroupInServiceInstances + Tags: + - Key: Cluster + Value: !Ref Tag + PropagateAtLaunch: true + - Key: Name + Value: !Sub ${AWS::StackName} + PropagateAtLaunch: 'true' + CreationPolicy: + ResourceSignal: + Timeout: PT180M + UpdatePolicy: + AutoScalingRollingUpdate: + MinInstancesInService: !Ref DesiredCapacity + MaxBatchSize: '1' + PauseTime: PT180M + SuspendProcesses: + - AlarmNotification + WaitOnResourceSignals: 'true' + + LaunchConfiguration: + Type: AWS::AutoScaling::LaunchConfiguration + Metadata: + AWS::CloudFormation::Init: + configSets: + default: + !If + - HasSwarmJoinToken + - !If [HasIAMUserSSHAccess, [docker-ubuntu, swarm-join], [docker-ubuntu, swarm-join]] + - !If [HasIAMUserSSHAccess, [docker-ubuntu, swarm-init, hobbit], [docker-ubuntu, swarm-init, hobbit]] +# - !If [HasIAMUserSSHAccess, [hobbit], [hobbit]] + + ssh-access: + files: + '/opt/authorized_keys_command.sh': + content: | + #!/bin/bash -e + if [ -z "$1" ]; then + exit 1 + fi + SaveUserName="$1" + SaveUserName=${SaveUserName//"+"/".plus."} + SaveUserName=${SaveUserName//"="/".equal."} + SaveUserName=${SaveUserName//","/".comma."} + SaveUserName=${SaveUserName//"@"/".at."} + aws iam list-ssh-public-keys --user-name "$SaveUserName" --query "SSHPublicKeys[?Status == 'Active'].[SSHPublicKeyId]" --output text | while read KeyId; do + aws iam get-ssh-public-key --user-name "$SaveUserName" --ssh-public-key-id "$KeyId" --encoding SSH --query "SSHPublicKey.SSHPublicKeyBody" --output text + done + mode: '000755' + owner: root + group: root + '/opt/import_users.sh': + content: | + #!/bin/bash + aws iam list-users --query "Users[].[UserName]" --output text | while read User; do + SaveUserName="$User" + SaveUserName=${SaveUserName//"+"/".plus."} + SaveUserName=${SaveUserName//"="/".equal."} + SaveUserName=${SaveUserName//","/".comma."} + SaveUserName=${SaveUserName//"@"/".at."} + if id -u "$SaveUserName" >/dev/null 2>&1; then + echo "$SaveUserName exists" + else + #sudo will read each file in /etc/sudoers.d, skipping file names that end in ‘~’ or contain a ‘.’ character to avoid causing problems with package manager or editor temporary/backup files. + SaveUserFileName=$(echo "$SaveUserName" | tr "." " ") + /usr/sbin/adduser "$SaveUserName" + echo "$SaveUserName ALL=(ALL) NOPASSWD:ALL" > "/etc/sudoers.d/$SaveUserFileName" + fi + done + mode: '000755' + owner: root + group: root + '/etc/cron.d/import_users': + content: | + */10 * * * * root /opt/import_users.sh + mode: '000644' + owner: root + group: root + commands: + 'a_configure_sshd_command': + command: 'sed -i "s:#AuthorizedKeysCommand none:AuthorizedKeysCommand /opt/authorized_keys_command.sh:g" /etc/ssh/sshd_config' + 'b_configure_sshd_commanduser': + command: 'sed -i "s:#AuthorizedKeysCommandUser nobody:AuthorizedKeysCommandUser nobody:g" /etc/ssh/sshd_config' + 'c_import_users': + command: './import_users.sh' + cwd: '/opt' + services: + sysvinit: + sshd: + enabled: true + ensureRunning: true + commands: + - 'a_configure_sshd_command' + - 'b_configure_sshd_commanduser' + + docker-ubuntu: + commands: + 'a_start_installation': + command: 'echo "docker-ubuntu started" >> /home/ubuntu/docker.log' + 'b_get_certificates': + command: 'sudo apt-get install apt-transport-https ca-certificates curl software-properties-common htop socat -y' + 'c_set_gpg_key': + command: 'curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -' + 'd_add_fingerprint': + command: 'sudo apt-key fingerprint 0EBFCD88' + 'e_add_docker_repo': + command: !Sub 'sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) ${DockerRepository}"' + 'f_update_aptget': + command: 'sudo apt-get update' + 'g_install_docker': + command: !Sub 'sudo apt-get install -y docker-ce=${DockerVersion}~ce-0~ubuntu' +# 'g_start_service': +# command: 'sudo service docker start' + 'h_add_ubuntu_user_to_docker_group': + command: 'sudo usermod -aG docker ubuntu' + 'i_verify_installation': + command: 'sudo docker run hello-world' + 'k_verify_installation': + command: 'docker run hello-world >> /home/ubuntu/docker.log' + 'm_report_installation': + command: 'echo "docker-ubuntu finished correctly" >> /home/ubuntu/docker.log' + swarm-init: + commands: + 'a_init_swarm': + command: !Sub | + echo "init_swarm" >> /home/ubuntu/init.log + docker swarm init >> /home/ubuntu/swarm.log + docker swarm join-token worker | grep token | awk '{ print $5 }' > /home/ubuntu/token + + docker swarm join-token worker | awk '{ if(NR==3) print }' >> /home/ubuntu/worker_join.sh + echo "Uploading token to S3" >> /home/ubuntu/swarm.log + echo "sudo aws s3 cp /home/ubuntu/token s3://${BucketName}/token --region ${AWS::Region}" >> /home/ubuntu/swarm.log + aws s3 cp /home/ubuntu/worker_join.sh s3://${BucketName}/worker_join.sh --region ${AWS::Region} > /home/ubuntu/swarm.log + echo "Upload to S3 should be finished" >> /home/ubuntu/swarm.log + + NODE_ID=$(docker info | grep NodeID | awk '{print $2}') + echo "Adding labels (master/data) to $NODE_ID" >> /home/ubuntu/swarm.log + + docker node update $NODE_ID --label-add org.hobbit.type=master + docker node update $NODE_ID --label-add org.hobbit.workergroup=master + docker node update $NODE_ID --label-add org.hobbit.name=master + #aws s3 mb s3://${BucketName} --region ${AWS::Region} +# #aws s3 cp /home/ubuntu/token s3://${BucketName}/token --region ${AWS::Region} > /home/ubuntu/swarm.log +# docker node update $NODE_ID --label-add org.hobbit.type=data +# docker node update $NODE_ID --label-add org.hobbit.workergroup=data +# docker node update $NODE_ID --label-add org.hobbit.name=data + + 'b_swarm_healthcheck': + command: 'docker node ls >> /home/ubuntu/swarm.log' + + swarm-join: + commands: + 'a_join_swarm': + command: !Sub | + echo "swarm-join -> a_join_swarm" >> /home/ubuntu/init.log + # Decrypt join token via KMS + echo -n "${JoinToken}" | base64 --decode > ciphertextblob + JOIN_TOKEN=$(aws kms decrypt --region ${AWS::Region} --ciphertext-blob fileb://ciphertextblob --query Plaintext --output text | base64 --decode) + + INSTANCE_ID="`wget -q -O - http://instance-data/latest/meta-data/instance-id`" + ASG_NAME=$(aws autoscaling describe-auto-scaling-instances --instance-ids $INSTANCE_ID --region ${AWS::Region} --query AutoScalingInstances[].AutoScalingGroupName --output text) + + for ID in $(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names $ASG_NAME --region ${AWS::Region} --query AutoScalingGroups[].Instances[].InstanceId --output text); + do + # Ignore "myself" + if [ "$ID" == "$INSTANCE_ID" ] ; then + continue; + fi + + IP=$(aws ec2 describe-instances --instance-ids $ID --region ${AWS::Region} --query Reservations[].Instances[].PrivateIpAddress --output text) + if [ ! -z "$IP" ] ; then + echo "Try to join swarm with IP $IP" + + # Join the swarm; if it fails try the next one + docker swarm join --token $JOIN_TOKEN $IP:2377 && break || continue + fi + done + + 'b_swarm_healthcheck': + command: 'docker node ls >> /home/ubuntu/swarm.log' + + hobbit: + commands: + 'a_install_prereqs': + command: !Sub | + echo "installing prereqs" >> /home/ubuntu/init.log + sudo apt-get install make maven supervisor socat -y + echo "installing docker compose" >> /home/ubuntu/init.log + sudo curl -L https://github.com/docker/compose/releases/download/1.22.0/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose + sudo chmod +x /usr/local/bin/docker-compose >> /home/ubuntu/docker.log + sudo ln /usr/local/bin/docker-compose /usr/bin/docker-compose >> /home/ubuntu/docker.log + echo "prereqs installed" >> /home/ubuntu/init.log + + 'b_start_socat_daemon': + command: !Sub | + cat > /opt/getNodeIps.sh << 'EOL' + NODES=$(docker node ls --format "{{.Hostname}} {{.Status}}" | grep Ready | awk '{print $1}') + LINE="" + for NODE in $NODES; + do + CID=$(echo $NODE | cut -c4-15) + CID2=$(echo $CID | tr - .) + LINE="$LINE '$CID2:$2'," + done + echo $LINE + EOL + + cat > /opt/updatePrometheus.sh << 'EOL' + cp /opt/hobbit-platform-2.0.5/config/prometheus/prometheus.conf.template /opt/hobbit-platform-2.0.5/config/prometheus/prometheus.conf + LINE=$(sh /opt/getNodeIps.sh container-exporter 9104) + sed -i "s~'container-exporter:9104'~$LINE~g" /opt/hobbit-platform-2.0.5/config/prometheus/prometheus.conf + LINE=$(sh /opt/getNodeIps.sh cAdvisor 8081) + sed -i "s~'cAdvisor:8081'~$LINE~g" /opt/hobbit-platform-2.0.5/config/prometheus/prometheus.conf + LINE=$(sh /opt/getNodeIps.sh node-exporter 9100) + sed -i "s~'node-exporter:9100'~$LINE~g" /opt/hobbit-platform-2.0.5/config/prometheus/prometheus.conf + sed -i "s~',]~']~g" /opt/hobbit-platform-2.0.5/config/prometheus/prometheus.conf + echo "stopping working prometheus" >> /var/log/prometheusLoop.log + sudo docker stop $(sudo docker ps --filter "name=prometheus" --format "{{.ID}}") + cAdvisorID=$(sudo docker ps --filter "name=cAdvisor" --format "{{.ID}}") + exporterID=$(sudo docker ps --filter "name=node-exporter" --format "{{.ID}}") + echo "starting prometheus" >> /var/log/prometheusLoop.log + echo "docker run --name prometheus -d --net hobbit-core -p 9090:9090 --link $cAdvisorID:cAdvisor --link $exporterID:node-exporter --rm -v /opt/hobbit-platform-2.0.5/config/prometheus:/config prom/prometheus --config.file=/config/prometheus.conf" >> /home/ubuntu/hobbit.log + docker run --name prometheus -d --net hobbit-core -p 9090:9090 --link $cAdvisorID:cAdvisor --link $exporterID:node-exporter --rm -v /opt/hobbit-platform-2.0.5/config/prometheus:/config prom/prometheus --config.file=/config/prometheus.conf + EOL + + sudo install -m 777 /dev/null /var/log/socat.log + echo "creating /opt/getmsg.sh" >> /home/ubuntu/init.log + cat > /opt/getmsg.sh << 'EOL' + read MESSAGE + COMMAND="sudo docker node update $MESSAGE" + echo $COMMAND >> /var/log/socat.log + exec $COMMAND + EOL + sudo chmod +x /opt/getmsg.sh + + sudo install -m 777 /dev/null /var/log/prometheusLoop.log + cat > /home/ubuntu/prometheusLoop.sh << 'EOL' + while : + do + LINE=$(sh /opt/getNodeIps.sh container-exporter 9104) + if [ "$LINE" != "$PREVLINE" ] + then + date >> /var/log/prometheusLoop.log + sudo sh /opt/updatePrometheus.sh >> /var/log/prometheusLoop.log + fi + PREVLINE=$LINE + sleep 15 + done + EOL + + echo "Configuring /etc/supervisor/supervisord.conf" >> /home/ubuntu/init.log + cat > /etc/supervisor/supervisord.conf << 'EOL' + [supervisord] + [program:hobbit-socat] + command=/usr/bin/socat -u tcp-l:4444,fork system:/opt/getmsg.sh + autostart=true + autorestart=true + [program:update-prometheus] + command=/bin/bash /home/ubuntu/prometheusLoop.sh + autostart=true + autorestart=true + EOL + + echo "restarting supervisor service" >> /home/ubuntu/init.log + sudo service supervisor restart & + echo "socat_daemon should be started" >> /home/ubuntu/init.log + 'c_install_hobbit': + command: !Sub | + echo "modifying docker service" >> /home/ubuntu/hobbit.log + sudo sed -i "s~-H fd://~-H tcp://0.0.0.0:2376 -H unix:///var/run/docker.sock~g" /lib/systemd/system/docker.service + echo "reloading service daemon" >> /home/ubuntu/hobbit.log + sudo systemctl daemon-reload + echo "restaring service" >> /home/ubuntu/hobbit.log + sudo service docker restart + echo "checking 2376 port with netstat" >> /home/ubuntu/hobbit.log + netstat -atn | grep 2376 >> /home/ubuntu/hobbit.log + + echo "cloning hobbit_platform" >> /home/ubuntu/init.log + sudo git clone https://github.com/hobbit-project/platform.git /opt/hobbit-platform-2.0.5 >> /home/ubuntu/hobbit.log + cd /opt/hobbit-platform-2.0.5/ + echo "switching branch to 2.0.5" >> /home/ubuntu/hobbit.log + sudo git checkout tags/v2.0.5 >> /home/ubuntu/hobbit.log + + echo "creating networks" >> /home/ubuntu/hobbit.log + sudo make create-networks + + echo "creating prometheus.conf.template" >> /home/ubuntu/hobbit.log + sudo mkdir /opt/hobbit-platform-2.0.5/config/prometheus + cat > /opt/hobbit-platform-2.0.5/config/prometheus/prometheus.conf.template << 'EOL' + global: + scrape_interval: 15s + evaluation_interval: 15s + scrape_configs: + - job_name: container-metrics + static_configs: + - targets: ['container-exporter:9104'] + - job_name: cadvisor-metrics + static_configs: + - targets: ['cAdvisor:8081'] + - job_name: node-metrics + static_configs: + - targets: ['node-exporter:9100'] + EOL + + echo "starting rabbit container" >> /home/ubuntu/hobbit.log + sudo sed -i "s~8081:15672~8082:15672~g" /opt/hobbit-platform-2.0.5/docker-compose.yml + sudo docker-compose up -d rabbit + echo "Rabbit should container started. Checking port 5672" >> /home/ubuntu/hobbit.log + netstat -atn | grep 5672 >> /home/ubuntu/hobbit.log + + echo "starting node-exporter" >> /home/ubuntu/hobbit.log + docker run -d --name node-exporter -p 9100:9100 --rm prom/node-exporter + + echo "starting container-exporter" >> /home/ubuntu/hobbit.log + docker run -d --name container-exporter -p 9104:9104 --rm -v /var/run/docker.sock:/var/run/docker.sock:ro -v /sys/fs/cgroup:/cgroup:rw prom/container-exporter + + echo "starting cAdvisor" >> /home/ubuntu/hobbit.log + docker run -d --name cAdvisor -p 8081:8080 --rm -v /:/rootfs:ro -v /var/run:/var/run:rw -v /sys:/sys:ro -v /var/lib/docker/:/var/lib/docker:ro -v /dev/disk:/dev/disk:ro google/cadvisor + + echo "install_hobbit finished. exiting" >> /home/ubuntu/init.log +#docker service create -d --name cAdvisor -p 8081:8080 --rm -v /:/rootfs:ro -v /var/run:/var/run:rw -v /sys:/sys:ro -v /var/lib/docker/:/var/lib/docker:ro -v /dev/disk:/dev/disk:ro google/cadvisor +# echo "starting keycloak & gui" >> /home/ubuntu/hobbit.log +# sudo docker-compose up -d keycloak gui +# sudo sed -i "s~-H fd://~-H tcp://0.0.0.0:2376 -H unix:///var/run/docker.sock~g" /etc/systemd/system/docker.service +# echo "export GITLAB_USER=${GitlabUser} >> /etc/environment" +# echo "export GITLAB_EMAIL=${GitlabEmail} >> /etc/environment" +# echo "export GITLAB_TOKEN=${GitlabToken} >> /etc/environment" +# +# source /etc/environment +# +# echo "creating networks" >> /home/ubuntu/hobbit.log +# make create-networks >> /home/ubuntu/hobbit.log +# +# echo "pulling images" >> /home/ubuntu/hobbit.log +# sudo docker-compose pull >> /home/ubuntu/hobbit.log +# +# echo "pulling elk images" >> /home/ubuntu/hobbit.log +# sudo docker-compose -f /opt/hobbit-platform-2.0.5/docker-compose-elk.yml pull >> /home/ubuntu/hobbit.log +# +# echo "configuring virtuoso" >> /home/ubuntu/hobbit.log +# make setup-virtuoso >> /home/ubuntu/hobbit.log +# +# sudo chmod 777 /etc/sysctl.conf +# sudo echo "vm.max_map_count=262144" >> /etc/sysctl.conf +# sudo sysctl -p +# +# sudo sed -i "s/-Xms8g/-Xms2g/g" /opt/hobbit-platform-2.0.5/config/elk/jvm.options +# sudo sed -i "s/-Xmx8g/-Xmx2g/g" /opt/hobbit-platform-2.0.5/config/elk/jvm.options +# echo "starting elk" >> /home/ubuntu/hobbit.log +# sudo docker stack deploy --compose-file /opt/hobbit-platform-2.0.5/docker-compose-elk.yml elk >> /home/ubuntu/hobbit.log +# +# echo "starting platform" >> /home/ubuntu/hobbit.log +# sudo docker stack deploy --compose-file /opt/hobbit-platform-2.0.5/docker-compose.yml platform >> /home/ubuntu/hobbit.log +# +# echo "everything should be started" >> /home/ubuntu/hobbit.log +# echo "killing all docker daemons" >> /home/ubuntu/hobbit.log +# sudo kill -9 $(sudo ps -aux | grep docker | awk '{print $2}') + Properties: + ImageId: ami-de8fb135 # Ubuntu 16.04 + InstanceType: !Ref InstanceType + #SecurityGroups: !Ref SecurityGroups + SecurityGroups: + - 'Fn::ImportValue': !Sub '${ParentSecurityGroupsStack}-SecurityGroup' +# - 'Fn::ImportValue': !Sub '${ParentNATStack}-SecurityGroup' + + IamInstanceProfile: !Ref InstanceProfile + KeyName: !If [HasKeyName, !Ref KeyName, !Ref 'AWS::NoValue'] + BlockDeviceMappings: + - DeviceName: "/dev/sda1" + Ebs: + VolumeSize: '30' +# - DeviceName: "/dev/xvdcz" +# Ebs: +# VolumeSize: '22' + UserData: + "Fn::Base64": !Sub | + #!/bin/bash -xe + echo "Executing user data" >> /home/ubuntu/init.log + sudo apt-get update + sudo apt-get -y upgrade + + # Install AWSCli + sudo apt install -y awscli + + # Install cfn-init for Ubuntu + apt-get -y install python-setuptools + easy_install https://s3.amazonaws.com/cloudformation-examples/aws-cfn-bootstrap-latest.tar.gz + ln -s /root/aws-cfn-bootstrap-latest/init/ubuntu/cfn-hup /etc/init.d/cfn-hup + + echo "Creating cfn script" >> /home/ubuntu/init.log + echo "echo 'Starting launch configuration' >> /home/ubuntu/init.log" >> /home/ubuntu/cfn.sh + echo "cfn-init -v --region ${AWS::Region} --stack ${AWS::StackName} --resource LaunchConfiguration" >> /home/ubuntu/cfn.sh + echo "cfn-signal -e $? --region ${AWS::Region} --stack ${AWS::StackName} --resource AutoScalingGroup" >> /home/ubuntu/cfn.sh + echo "echo 'Signals should be sent' >> /home/ubuntu/init.log" >> /home/ubuntu/cfn.sh + echo "Executing cfn script" >> /home/ubuntu/init.log + sudo sh /home/ubuntu/cfn.sh >> /home/ubuntu/init.log + + +# cfn-init -v --region eu-central-1 --stack swarm-manager --resource LaunchConfiguration +# cfn-signal -s true --region eu-central-1 --stack swarm-manager --resource AutoScalingGroup + +#Outputs: +# AutoScalingGroup: +# Description: 'Use this AutoScaling Group to identify Swarm Managers.' +# Value: !Ref AutoScalingGroup +# Export: +# Name: !Sub '${AWS::StackName}-AutoScalingGroup' + diff --git a/platform-controller/src/main/resources/AWS/swarm-mode/securitygroups.yaml b/platform-controller/src/main/resources/AWS/swarm-mode/securitygroups.yaml new file mode 100644 index 00000000..0d08c22e --- /dev/null +++ b/platform-controller/src/main/resources/AWS/swarm-mode/securitygroups.yaml @@ -0,0 +1,115 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: 'Docker Swarm - SecurityGroups' + + +Parameters: + + ParentVPCStack: + Description: 'Stack name of parent VPC stack based on vpc/vpc-*azs.yaml template.' + Type: String + + ParentSSHBastionStack: + Description: 'Optional Stack name of parent SSH bastion host/instance stack based on vpc/vpc-ssh-bastion.yaml template.' + Type: String + Default: '' + + +Conditions: + HasSSHBastionSecurityGroup: !Not [!Equals [!Ref ParentSSHBastionStack, '']] + HasNotSSHBastionSecurityGroup: !Equals [!Ref ParentSSHBastionStack, ''] + +Resources: + + SwarmSecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupDescription: !Sub ${AWS::StackName} + VpcId: + 'Fn::ImportValue': !Sub '${ParentVPCStack}-VPC' + + SecurityGroupInSSHBastion: + Type: 'AWS::EC2::SecurityGroupIngress' + Condition: HasSSHBastionSecurityGroup + Properties: + GroupId: !Ref SwarmSecurityGroup + IpProtocol: tcp + FromPort: 22 + ToPort: 22 + SourceSecurityGroupId: + 'Fn::ImportValue': !Sub '${ParentSSHBastionStack}-SecurityGroup' + SecurityGroupInSSHBastion2: + Type: 'AWS::EC2::SecurityGroupIngress' + Condition: HasNotSSHBastionSecurityGroup + Properties: + GroupId: !Ref SwarmSecurityGroup + IpProtocol: tcp + FromPort: 22 + ToPort: 22 + SourceSecurityGroupId: + 'Fn::ImportValue': !Sub '${ParentVPCStack}-BastionSecurityGroup' + + SecurityGroupPing: + Type: 'AWS::EC2::SecurityGroupIngress' + Properties: + GroupId: !Ref SwarmSecurityGroup + IpProtocol: icmp + FromPort: 8 + ToPort: 0 + CidrIp: {'Fn::ImportValue': !Sub '${ParentVPCStack}-CidrBlock'} + Description: "Echo requests from other machines of the VPC cluster" + + SecurityGroupInClusterDocker1: + Type: 'AWS::EC2::SecurityGroupIngress' + Properties: + GroupId: !Ref SwarmSecurityGroup + IpProtocol: tcp + FromPort: 2376 + ToPort: 2376 + SourceSecurityGroupId: + 'Fn::ImportValue': !Sub '${ParentVPCStack}-BastionSecurityGroup' + + SecurityGroupInClusterRabbit: + Type: 'AWS::EC2::SecurityGroupIngress' + Properties: + GroupId: !Ref SwarmSecurityGroup + IpProtocol: tcp + FromPort: 5672 + ToPort: 5672 + SourceSecurityGroupId: + 'Fn::ImportValue': !Sub '${ParentVPCStack}-BastionSecurityGroup' + + SecurityGroupInClusterDocker2: + Type: 'AWS::EC2::SecurityGroupIngress' + Properties: + GroupId: !Ref SwarmSecurityGroup + IpProtocol: tcp + FromPort: 0 + ToPort: 65535 + SourceSecurityGroupId: !Ref SwarmSecurityGroup + + SecurityGroupInClusterDocker7946Udp: + Type: 'AWS::EC2::SecurityGroupIngress' + Properties: + GroupId: !Ref SwarmSecurityGroup + IpProtocol: udp + FromPort: 7946 + ToPort: 7946 + SourceSecurityGroupId: !Ref SwarmSecurityGroup + + SecurityGroupInSwarmOverlayNetwork: + Type: 'AWS::EC2::SecurityGroupIngress' + Properties: + GroupId: !Ref SwarmSecurityGroup + IpProtocol: udp + FromPort: 4789 + ToPort: 4789 + SourceSecurityGroupId: !Ref SwarmSecurityGroup + + +Outputs: + + SecurityGroup: + Description: Security group for internal swarm communication + Value: !Ref SwarmSecurityGroup + Export: + Name: !Sub '${AWS::StackName}-SecurityGroup' \ No newline at end of file diff --git a/platform-controller/src/main/resources/AWS/swarm-mode/worker.yaml b/platform-controller/src/main/resources/AWS/swarm-mode/worker.yaml new file mode 100644 index 00000000..d14376cd --- /dev/null +++ b/platform-controller/src/main/resources/AWS/swarm-mode/worker.yaml @@ -0,0 +1,457 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: 'Docker Swarm - Worker' + + +Parameters: + + ParentVPCStack: + Description: 'Stack name of parent VPC stack based on vpc/vpc-*azs.yaml template.' + Type: String + + KeyName: + Description: 'Optional key pair of the ec2-user to establish a SSH connection to the EC2 instance.' + Type: String + Default: '' + + IAMUserSSHAccess: + Description: 'Synchronize public keys of IAM users to enable personalized SSH access (Doc: https://cloudonaut.io/manage-aws-ec2-ssh-access-with-iam/).' + Type: String + Default: false + AllowedValues: + - true + - false + + InstanceType: + Description: 'The instance type for the EC2 instance.' + Type: String + Default: 't2.micro' + + DesiredCapacity: + Description: 'The number of worker nodes' + Type: Number + Default: 1 + + WorkerSubnetsReach: + Description: 'Should the managers have direct access to the Internet or do you prefer private subnets with NAT?' + Type: String + Default: Private + AllowedValues: + - Public + - Private + + WorkerType: + Description: 'WorkerType to be specified in HOBBIT labels' + Type: String + Default: '' + NoEcho: true + +# SecurityGroups: +# Description: Security group for which are allowed to talk to ASG +# Type: CommaDelimitedList + + ParentSecurityGroupsStack: + Description: 'ParentSecurityGroupsStack' + Type: String + + TargetGroups: + Description: Security group for which are allowed to talk to ASG + Type: CommaDelimitedList + Default: '' + + DockerVersion: + Description: 'Specifies the version of the Docker engine' + Type: String + Default: "17.03.0" + + DockerRepository: + Description: 'Specifies if stable or edge repository should be used' + Type: String + Default: stable + AllowedValues: + - stable + - edge + + ParentKeysManagementStack: + Description: 'ParentKeysManagementStack' + Type: String + + BucketName: + Description: 'Bucket name for placing join tokens' + Type: String + Default: '' + Tag: + Type: String + Default: 'Hobbit' +Conditions: + + HasKeyName: !Not [!Equals [!Ref KeyName, '']] + HasIAMUserSSHAccess: !Equals [!Ref IAMUserSSHAccess, 'true'] + +Resources: + InstanceProfile: + Type: 'AWS::IAM::InstanceProfile' + Properties: + Path: '/' + Roles: + - !Ref IAMRole + + IAMRole: + Type: 'AWS::IAM::Role' + Properties: + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: + - 'ec2.amazonaws.com' + Action: + - 'sts:AssumeRole' + Path: '/' + Policies: + - PolicyName: logs + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 'logs:CreateLogGroup' + - 'logs:CreateLogStream' + - 'logs:PutLogEvents' + - 'logs:DescribeLogStreams' + Resource: + - 'arn:aws:logs:*:*:*' + - PolicyName: asg + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 'autoscaling:DescribeAutoScalingGroups' + - 'autoscaling:DescribeAutoScalingInstances' + - 'ec2:DescribeInstances' + Resource: + - '*' + - PolicyName: kms + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 'kms:Decrypt' + - 'kms:DescribeKey' + Resource: + 'Fn::ImportValue': !Sub '${ParentKeysManagementStack}-SwarmTokenKeyArn' + #- !Ref JoinTokenKmsKey + + IAMPolicySSHAccess: + Type: 'AWS::IAM::Policy' + Condition: HasIAMUserSSHAccess + Properties: + Roles: + - !Ref IAMRole + PolicyName: iam + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 'iam:ListUsers' + Resource: + - '*' + - Effect: Allow + Action: + - 'iam:ListSSHPublicKeys' + - 'iam:GetSSHPublicKey' + Resource: + - !Sub 'arn:aws:iam::${AWS::AccountId}:user/*' + + AutoScalingGroup: + Type: AWS::AutoScaling::AutoScalingGroup + Properties: + # AvailabilityZones: !Ref AvailabilityZones + VPCZoneIdentifier: + - 'Fn::ImportValue': !Sub '${ParentVPCStack}-SubnetA${WorkerSubnetsReach}' + #- 'Fn::ImportValue': !Sub '${ParentVPCStack}-SubnetB${WorkerSubnetsReach}' + #- 'Fn::ImportValue': !Sub '${ParentVPCStack}-SubnetC${WorkerSubnetsReach}' + LaunchConfigurationName: !Ref LaunchConfiguration + MinSize: 0 + MaxSize: !Ref DesiredCapacity + DesiredCapacity: !Ref DesiredCapacity + #TargetGroupARNs: !Ref TargetGroups + MetricsCollection: + - Granularity: 1Minute + Metrics: + - GroupInServiceInstances + Tags: + - Key: Cluster + Value: !Ref Tag + PropagateAtLaunch: true + - Key: Name + Value: !Sub ${AWS::StackName} + PropagateAtLaunch: 'true' + CreationPolicy: + ResourceSignal: + Timeout: PT10M + UpdatePolicy: + AutoScalingRollingUpdate: + MinInstancesInService: !Ref DesiredCapacity + MaxBatchSize: '1' + PauseTime: PT10M + SuspendProcesses: + - AlarmNotification + WaitOnResourceSignals: 'true' + + LaunchConfiguration: + Type: AWS::AutoScaling::LaunchConfiguration + Metadata: + AWS::CloudFormation::Init: + configSets: + default: + #!If [HasIAMUserSSHAccess, [docker-ubuntu, swarm], [docker-ubuntu, swarm]] + !If [HasIAMUserSSHAccess, [docker-ubuntu, swarm], [docker-ubuntu, swarm]] + #!If [HasIAMUserSSHAccess, [ssh-access, docker-ubuntu, swarm], [docker-ubuntu, swarm]] +# ssh-access: +# files: +# '/opt/authorized_keys_command.sh': +# content: | +# #!/bin/bash -e +# if [ -z "$1" ]; then +# exit 1 +# fi +# SaveUserName="$1" +# SaveUserName=${SaveUserName//"+"/".plus."} +# SaveUserName=${SaveUserName//"="/".equal."} +# SaveUserName=${SaveUserName//","/".comma."} +# SaveUserName=${SaveUserName//"@"/".at."} +# aws iam list-ssh-public-keys --user-name "$SaveUserName" --query "SSHPublicKeys[?Status == 'Active'].[SSHPublicKeyId]" --output text | while read KeyId; do +# aws iam get-ssh-public-key --user-name "$SaveUserName" --ssh-public-key-id "$KeyId" --encoding SSH --query "SSHPublicKey.SSHPublicKeyBody" --output text +# done +# mode: '000755' +# owner: root +# group: root +# '/opt/import_users.sh': +# content: | +# #!/bin/bash +# aws iam list-users --query "Users[].[UserName]" --output text | while read User; do +# SaveUserName="$User" +# SaveUserName=${SaveUserName//"+"/".plus."} +# SaveUserName=${SaveUserName//"="/".equal."} +# SaveUserName=${SaveUserName//","/".comma."} +# SaveUserName=${SaveUserName//"@"/".at."} +# if id -u "$SaveUserName" >/dev/null 2>&1; then +# echo "$SaveUserName exists" +# else +# #sudo will read each file in /etc/sudoers.d, skipping file names that end in ‘~’ or contain a ‘.’ character to avoid causing problems with package manager or editor temporary/backup files. +# SaveUserFileName=$(echo "$SaveUserName" | tr "." " ") +# /usr/sbin/adduser "$SaveUserName" +# echo "$SaveUserName ALL=(ALL) NOPASSWD:ALL" > "/etc/sudoers.d/$SaveUserFileName" +# fi +# done +# mode: '000755' +# owner: root +# group: root +# '/etc/cron.d/import_users': +# content: | +# */10 * * * * root /opt/import_users.sh +# mode: '000644' +# owner: root +# group: root +# commands: +# 'a_configure_sshd_command': +# command: 'sed -i "s:#AuthorizedKeysCommand none:AuthorizedKeysCommand /opt/authorized_keys_command.sh:g" /etc/ssh/sshd_config' +# 'b_configure_sshd_commanduser': +# command: 'sed -i "s:#AuthorizedKeysCommandUser nobody:AuthorizedKeysCommandUser nobody:g" /etc/ssh/sshd_config' +# 'c_import_users': +# command: './import_users.sh' +# cwd: '/opt' +# services: +# sysvinit: +# sshd: +# enabled: true +# ensureRunning: true +# commands: +# - 'a_configure_sshd_command' +# - 'b_configure_sshd_commanduser' + + docker-ubuntu: + commands: + 'a_install_prereqs': + command: 'sudo apt-get install apt-transport-https ca-certificates curl software-properties-common htop supervisor -y' + 'b_set_gpg_key': + command: 'curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -' + 'c_add_fingerprint': + command: 'sudo apt-key fingerprint 0EBFCD88' + 'd_add_docker_repo': + command: !Sub 'sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) ${DockerRepository}"' + 'e_update_aptget': + command: 'sudo apt-get update' + 'f_install_docker': + command: !Sub 'sudo apt-get install -y docker-ce=${DockerVersion}~ce-0~ubuntu' + 'g_create_service': + command: 'sudo service docker start' + 'h_add_ubuntu_user_to_docker_group': + command: 'sudo usermod -aG docker ubuntu' + 'i_verify_installation': + command: 'sudo docker run hello-world' + 'k_verify_installation': + command: 'docker run hello-world' + swarm: + commands: + 'a_join_swarm': + command: !Sub | + echo "swarm -> a_join_swarm" >> /home/ubuntu/swarm.log + + cat > /home/ubuntu/joinSwarmIfNeeded.sh << 'EOL' + date + FORMERMANAGER=$(cat /home/ubuntu/worker_join.sh | awk '{print $6}' | tr ":" "\n" | awk '{if(NR==1)print}') + echo "Former manager: $FORMERMANAGER" + echo "Downloading worker_join.sh from the s3" + aws s3api get-object --bucket ${BucketName} --key worker_join.sh /home/ubuntu/worker_join.sh --region=${AWS::Region} + NEWMANAGER=$(cat /home/ubuntu/worker_join.sh | awk '{print $6}' | tr ":" "\n" | awk '{if(NR==1)print}') + echo "New manager: $NEWMANAGER" + if [ "$NEWMANAGER" != "$FORMERMANAGER" ] + then + echo "Leaving former swarm at $FORMERMANAGER" + docker swarm leave + echo "Killing all running containers" + sudo docker rm $(sudo docker stop $(sudo docker ps -a | awk '{print $1}')) + echo "Joining new swarm at $NEWMANAGER" + sh /home/ubuntu/worker_join.sh + echo "sudo sh /home/ubuntu/sendLabels.sh ${WorkerType} $NEWMANAGER" + sudo chmod 777 /etc/hosts + sudo sed -i "s~$FORMERMANAGER~#$FORMERMANAGER~g" /etc/hosts + sudo echo "$NEWMANAGER rabbit" >> /etc/hosts + fi + sudo sh /home/ubuntu/sendLabels.sh ${WorkerType} $NEWMANAGER + EOL + + cat > /home/ubuntu/sendLabels.sh << 'EOL' + echo "Sending labels to master $2" + NODE_ID=$(docker info | grep NodeID | awk '{print $2}') + IP=$(docker info | grep Name | awk '{print $2}' | cut -c4-15) + + echo 'echo "$NODE_ID --label-add org.hobbit.type=worker" | netcat $2 4444' + echo 'echo "$NODE_ID --label-add org.hobbit.workergroup=$1" | netcat $2 4444' + echo 'echo "$NODE_ID --label-add org.hobbit.name=$1_worker_$IP" | netcat $2 4444' + + echo "$NODE_ID --label-add org.hobbit.type=worker" | netcat $2 4444 + echo "$NODE_ID --label-add org.hobbit.workergroup=$1" | netcat $2 4444 + echo "$NODE_ID --label-add org.hobbit.name=$1_worker_$IP" | netcat $2 4444 + echo "Labels should be sent" + EOL + + cat > /home/ubuntu/joinLoop.sh << 'EOL' + sudo install -m 777 /dev/null /var/log/joinLoop.log + while : + do + sudo sh /home/ubuntu/joinSwarmIfNeeded.sh > /var/log/joinLoop.log + sleep 15 + done + EOL + + echo "adding joinLoop to /etc/supervisor/supervisord.conf" >> /home/ubuntu/init.log + cat > /etc/supervisor/supervisord.conf << 'EOL' + [supervisord] + [program:joinLoop] + command=/bin/bash /home/ubuntu/joinLoop.sh + autostart=true + autorestart=true + EOL + + echo "restarting supervisor service" >> /home/ubuntu/init.log + sudo service supervisor restart & + echo "join_loop should be started" >> /home/ubuntu/init.log + + echo "starting node-exporter" >> /home/ubuntu/hobbit.log + docker run -d --name node-exporter -p 9100:9100 --rm prom/node-exporter + + echo "starting container-exporter" >> /home/ubuntu/hobbit.log + docker run -d --name container-exporter -p 9104:9104 --rm -v /var/run/docker.sock:/var/run/docker.sock:ro -v /sys/fs/cgroup:/cgroup:rw prom/container-exporter + + echo "starting cAdvisor" >> /home/ubuntu/hobbit.log + docker run -d --name cAdvisor -p 8081:8080 --rm -v /:/rootfs:ro -v /var/run:/var/run:rw -v /sys:/sys:ro -v /var/lib/docker/:/var/lib/docker:ro -v /dev/disk:/dev/disk:ro google/cadvisor + + # for ID in $(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names ${SwarmManagerAutoScalingGroup} --region ${AWS::Region} --query AutoScalingGroups[].Instances[].InstanceId --output text); +# do +# IP=$(aws ec2 describe-instances --instance-ids $ID --region ${AWS::Region} --query Reservations[].Instances[].PrivateIpAddress --output text) +# if [ ! -z "$IP" ] ; then +# #echo "Try to join swarm with IP $IP" >> /home/ubuntu/init.log +# # Join the swarm; if it fails try the next one +# +# #echo "docker swarm join --token $JOIN_TOKEN $IP:2377" >> /home/ubuntu/swarm.log +# #docker swarm join --token $JOIN_TOKEN $IP:2377 && break || continue +# +# #NODE_ADDRESS=$(docker info | grep "Node Address" | awk '{print $3}') +# echo "Sending labels to master (NODE_ID is $NODE_ID)" >> /home/ubuntu/swarm.log +# +# cat > /home/ubuntu/labels.sh << 'EOL' +# NODE_ID=$(docker info | grep NodeID | awk '{print $2}') +# echo 'echo "$NODE_ID --label-add org.hobbit.type=worker" | netcat $2 4444' +# echo "$NODE_ID --label-add org.hobbit.type=worker" | netcat $2 4444 +# echo 'echo "$NODE_ID --label-add org.hobbit.workergroup=$1" | netcat $2 4444' +# echo "$NODE_ID --label-add org.hobbit.workergroup=$1" | netcat $2 4444 +# echo 'echo "$NODE_ID --label-add org.hobbit.name=worker" | netcat $2 4444' +# echo "$NODE_ID --label-add org.hobbit.name=worker" | netcat $2 4444 +# EOL +# +# echo "sudo /home/ubuntu/labels.sh ${WorkerType} $IP" >> /home/ubuntu/swarm.log +# sudo sh /home/ubuntu/labels.sh ${WorkerType} $IP >> /home/ubuntu/swarm.log +# +# #echo "abc --label-add org.hobbit.type=data" | netcat localhost 4444 +# #echo "$NODE_ID --label-add org.hobbit.type=worker" | netcat $IP 4444 +# #echo "$NODE_ID --label-add org.hobbit.workergroup=${WorkerType}" | netcat $IP 4444 +# #echo "$NODE_ID --label-add org.hobbit.name=worker" | netcat $IP 4444 +# +# echo "Labels should be sent to master" >> /home/ubuntu/swarm.log +# +# echo "$IP rabbit" >> /etc/hosts +# fi +# done + + 'b_swarm_healthcheck': + command: + echo "swarm -> b_swarm_healthcheck" >> /home/ubuntu/init.log + docker info --format "{{.Swarm.NodeID}}" >> /home/ubuntu/init.log + + Properties: + ImageId: ami-de8fb135 # Ubuntu 16.04 + InstanceType: !Ref InstanceType + SecurityGroups: + - 'Fn::ImportValue': !Sub '${ParentSecurityGroupsStack}-SecurityGroup' +# - !Ref SwarmSecurityGroup + + IamInstanceProfile: !Ref InstanceProfile + KeyName: !If [HasKeyName, !Ref KeyName, !Ref 'AWS::NoValue'] + BlockDeviceMappings: + - DeviceName: "/dev/sda1" + Ebs: + VolumeSize: '30' + UserData: + "Fn::Base64": !Sub | + #!/bin/bash -xe + + + echo "Executing user data" >> /home/ubuntu/init.log + + sudo apt-get update + sudo apt-get -y upgrade + + # Install AWSCli + sudo apt install -y awscli + + # Install cfn-init for Ubuntu + apt-get -y install python-setuptools + easy_install https://s3.amazonaws.com/cloudformation-examples/aws-cfn-bootstrap-latest.tar.gz + ln -s /root/aws-cfn-bootstrap-latest/init/ubuntu/cfn-hup /etc/init.d/cfn-hup + + echo "Creating cfn script" >> /home/ubuntu/init.log + echo "cfn-init -v --region ${AWS::Region} --stack ${AWS::StackName} --resource LaunchConfiguration" >> /home/ubuntu/cfn.sh + echo "cfn-signal -e $? --region ${AWS::Region} --stack ${AWS::StackName} --resource AutoScalingGroup" >> /home/ubuntu/cfn.sh + echo "echo 'Signals should be sent' >> /home/ubuntu/init.log" >> /home/ubuntu/cfn.sh + + sh /home/ubuntu/cfn.sh >> /home/ubuntu/init.log + +#Outputs: +# SwarmManagerAutoScalingGroup: +# Value: !Sub 'arn:aws:s3:::${BucketName}/*' diff --git a/platform-controller/src/main/resources/AWS/vpc-1azs.yaml b/platform-controller/src/main/resources/AWS/vpc-1azs.yaml new file mode 100644 index 00000000..b1916da6 --- /dev/null +++ b/platform-controller/src/main/resources/AWS/vpc-1azs.yaml @@ -0,0 +1,285 @@ +--- +# Copyright 2018 widdix GmbH +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +AWSTemplateFormatVersion: '2010-09-09' +Description: 'VPC: public and private subnets in one availability zone, a cloudonaut.io template' +Metadata: + 'AWS::CloudFormation::Interface': + ParameterGroups: + - Label: + default: 'VPC Parameters' + Parameters: + - ClassB +Parameters: + ClassB: + Description: 'Class B of VPC (10.XXX.0.0/16)' + Type: Number + Default: 0 + ConstraintDescription: 'Must be in the range [0-255]' + MinValue: 0 + MaxValue: 255 + Tag: + Type: String + Default: 'Hobbit' +Resources: + VPC: + Type: 'AWS::EC2::VPC' + Properties: + CidrBlock: !Sub '10.${ClassB}.0.0/16' + EnableDnsSupport: true + EnableDnsHostnames: true + InstanceTenancy: default + Tags: + - Key: Cluster + Value: !Ref Tag + InternetGateway: + Type: 'AWS::EC2::InternetGateway' + Properties: + Tags: + - Key: Name + Value: !Sub '10.${ClassB}.0.0/16' + - Key: Cluster + Value: !Ref Tag + VPCGatewayAttachment: + Type: 'AWS::EC2::VPCGatewayAttachment' + Properties: + VpcId: !Ref VPC + InternetGatewayId: !Ref InternetGateway + RouteTablePublicInternetRoute: # should be RouteTablePublicAInternetRoute, but logical id was not changed for backward compatibility + Type: 'AWS::EC2::Route' + DependsOn: VPCGatewayAttachment + Properties: + RouteTableId: !Ref RouteTablePublic + DestinationCidrBlock: '0.0.0.0/0' + GatewayId: !Ref InternetGateway + SubnetAPublic: + Type: 'AWS::EC2::Subnet' + Properties: + AvailabilityZone: !Select [0, !GetAZs ''] + CidrBlock: !Sub '10.${ClassB}.0.0/20' + MapPublicIpOnLaunch: true + VpcId: !Ref VPC + Tags: + - Key: Name + Value: 'A public' + - Key: Reach + Value: public + - Key: Cluster + Value: !Ref Tag + SubnetAPrivate: + Type: 'AWS::EC2::Subnet' + Properties: + AvailabilityZone: !Select [0, !GetAZs ''] + CidrBlock: !Sub '10.${ClassB}.16.0/20' + VpcId: !Ref VPC + Tags: + - Key: Name + Value: 'A private' + - Key: Reach + Value: private + - Key: Cluster + Value: !Ref Tag + RouteTablePublic: # should be RouteTableAPublic, but logical id was not changed for backward compatibility + Type: 'AWS::EC2::RouteTable' + Properties: + VpcId: !Ref VPC + Tags: + - Key: Name + Value: 'A Public' + - Key: Cluster + Value: !Ref Tag + RouteTablePrivate: # should be RouteTableAPrivate, but logical id was not changed for backward compatibility + Type: 'AWS::EC2::RouteTable' + Properties: + VpcId: !Ref VPC + Tags: + - Key: Name + Value: 'A Private' + - Key: Cluster + Value: !Ref Tag + RouteTableAssociationAPublic: + Type: 'AWS::EC2::SubnetRouteTableAssociation' + Properties: + SubnetId: !Ref SubnetAPublic + RouteTableId: !Ref RouteTablePublic + RouteTableAssociationAPrivate: + Type: 'AWS::EC2::SubnetRouteTableAssociation' + Properties: + SubnetId: !Ref SubnetAPrivate + RouteTableId: !Ref RouteTablePrivate + NetworkAclPublic: + Type: 'AWS::EC2::NetworkAcl' + Properties: + VpcId: !Ref VPC + Tags: + - Key: Name + Value: Public + - Key: Cluster + Value: !Ref Tag + NetworkAclPrivate: + Type: 'AWS::EC2::NetworkAcl' + Properties: + VpcId: !Ref VPC + Tags: + - Key: Name + Value: Private + - Key: Cluster + Value: !Ref Tag + SubnetNetworkAclAssociationAPublic: + Type: 'AWS::EC2::SubnetNetworkAclAssociation' + Properties: + SubnetId: !Ref SubnetAPublic + NetworkAclId: !Ref NetworkAclPublic + SubnetNetworkAclAssociationAPrivate: + Type: 'AWS::EC2::SubnetNetworkAclAssociation' + Properties: + SubnetId: !Ref SubnetAPrivate + NetworkAclId: !Ref NetworkAclPrivate + NetworkAclEntryInPublicAllowAll: + Type: 'AWS::EC2::NetworkAclEntry' + Properties: + NetworkAclId: !Ref NetworkAclPublic + RuleNumber: 99 + Protocol: -1 + RuleAction: allow + Egress: false + CidrBlock: '0.0.0.0/0' + NetworkAclEntryOutPublicAllowAll: + Type: 'AWS::EC2::NetworkAclEntry' + Properties: + NetworkAclId: !Ref NetworkAclPublic + RuleNumber: 99 + Protocol: -1 + RuleAction: allow + Egress: true + CidrBlock: '0.0.0.0/0' + NetworkAclEntryInPrivateAllowVPC: + Type: 'AWS::EC2::NetworkAclEntry' + Properties: + NetworkAclId: !Ref NetworkAclPrivate + RuleNumber: 99 + Protocol: -1 + RuleAction: allow + Egress: false + CidrBlock: '0.0.0.0/0' + NetworkAclEntryOutPrivateAllowVPC: + Type: 'AWS::EC2::NetworkAclEntry' + Properties: + NetworkAclId: !Ref NetworkAclPrivate + RuleNumber: 99 + Protocol: -1 + RuleAction: allow + Egress: true + CidrBlock: '0.0.0.0/0' + BastionSecurityGroup: + Type: 'AWS::EC2::SecurityGroup' + Properties: + GroupDescription: !Sub '${AWS::StackName}-bastion-security-group' + SecurityGroupIngress: + - IpProtocol: tcp + FromPort: 22 + ToPort: 22 + CidrIp: '0.0.0.0/0' +# - IpProtocol: tcp +# FromPort: 2376 +# ToPort: 2376 +# CidrIp: '0.0.0.0/0' + VpcId: !Ref VPC + Tags: + - Key: Cluster + Value: !Ref Tag + +Outputs: + TemplateID: + Description: 'cloudonaut.io template id.' + Value: 'vpc/vpc-1azs' + TemplateVersion: + Description: 'cloudonaut.io template version.' + Value: 'latest' + StackName: + Description: 'Stack name.' + Value: !Sub '${AWS::StackName}' + AZs: + Description: 'AZs' + Value: 1 + Export: + Name: !Sub '${AWS::StackName}-AZs' + AZA: + Description: 'AZ of A' + Value: !Select [0, !GetAZs ''] + Export: + Name: !Sub '${AWS::StackName}-AZA' + ClassB: + Description: 'Deprecated in v7, will be removed in v8, use CidrBlock instead! Class B.' + Value: !Ref ClassB + Export: + Name: !Sub '${AWS::StackName}-ClassB' + CidrBlock: + Description: 'The set of IP addresses for the VPC.' + Value: !GetAtt 'VPC.CidrBlock' + Export: + Name: !Sub '${AWS::StackName}-CidrBlock' + VPC: + Description: 'VPC.' + Value: !Ref VPC + Export: + Name: !Sub '${AWS::StackName}-VPC' + SubnetsPublic: + Description: 'Subnets public.' + Value: !Join [',', [!Ref SubnetAPublic]] + Export: + Name: !Sub '${AWS::StackName}-SubnetsPublic' + SubnetsPrivate: + Description: 'Subnets private.' + Value: !Join [',', [!Ref SubnetAPrivate]] + Export: + Name: !Sub '${AWS::StackName}-SubnetsPrivate' + + RouteTablesPrivate: + Description: 'Route tables private.' + Value: !Join [',', [!Ref RouteTablePrivate]] + Export: + Name: !Sub '${AWS::StackName}-RouteTablesPrivate' + RouteTablesPublic: + Description: 'Route tables public.' + Value: !Join [',', [!Ref RouteTablePublic]] + Export: + Name: !Sub '${AWS::StackName}-RouteTablesPublic' + + SubnetAPublic: + Description: 'Subnet A public.' + Value: !Ref SubnetAPublic + Export: + Name: !Sub '${AWS::StackName}-SubnetAPublic' + RouteTableAPublic: + Description: 'Route table A public.' + Value: !Ref RouteTablePublic + Export: + Name: !Sub '${AWS::StackName}-RouteTableAPublic' + + SubnetAPrivate: + Description: 'Subnet A private.' + Value: !Ref SubnetAPrivate + Export: + Name: !Sub '${AWS::StackName}-SubnetAPrivate' + RouteTableAPrivate: + Description: 'Route table A private.' + Value: !Ref RouteTablePrivate + Export: + Name: !Sub '${AWS::StackName}-RouteTableAPrivate' + SecurityGroup: + Description: 'Use this Security Group to reference incoming traffic from the SSH bastion host/instance.' + Value: !Ref BastionSecurityGroup + Export: + Name: !Sub '${AWS::StackName}-BastionSecurityGroup' \ No newline at end of file diff --git a/platform-controller/src/main/resources/log4j.properties b/platform-controller/src/main/resources/log4j.properties index 30eaea9e..b0e48b3a 100644 --- a/platform-controller/src/main/resources/log4j.properties +++ b/platform-controller/src/main/resources/log4j.properties @@ -6,3 +6,5 @@ log4j.appender.stdout.layout=org.apache.log4j.PatternLayout log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - <%m>%n log4j.category.org.apache.jena.riot=OFF +log4j.logger.org.hobbit.controller.cloud=DEBUG +log4j.logger.service=DEBUG diff --git a/platform-controller/src/test/java/org/hobbit/PlatformControllerTest.java b/platform-controller/src/test/java/org/hobbit/PlatformControllerTest.java new file mode 100644 index 00000000..e726720d --- /dev/null +++ b/platform-controller/src/test/java/org/hobbit/PlatformControllerTest.java @@ -0,0 +1,175 @@ +/** + * This file is part of platform-controller. + * + * platform-controller is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * platform-controller is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with platform-controller. If not, see . + */ +package org.hobbit; + +import static org.hobbit.controller.ExperimentManager.MAX_EXECUTION_TIME_KEY; +import static org.hobbit.controller.PlatformController.*; +import static org.hobbit.controller.cloud.ClusterManagerProvider.CLOUD_VPC_CLUSTER_NAME_KEY; +import static org.hobbit.controller.docker.CloudClusterManager.CLOUD_EXPIRE_TIMEOUT_MIN_KEY; +import static org.hobbit.controller.docker.CloudClusterManager.CLOUD_SSH_KEY_FILE_PATH_KEY; +import static org.hobbit.controller.docker.CloudClusterManager.CLOUD_SSH_KEY_NAME_KEY; +import static org.hobbit.controller.docker.ContainerManagerImpl.ENABLE_VOLUMES_FOR_SYSTEM_CONTAINERS_KEY; +import static org.hobbit.controller.docker.FileBasedImageManager.FILE_BASED_IMAGE_MANAGER_FOLDER_KEY; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.io.IOUtils; +import org.hobbit.controller.PlatformController; +import org.hobbit.controller.docker.ContainerManagerBasedTest; +import org.hobbit.controller.docker.ContainerManagerImpl; +import org.hobbit.core.Commands; +import org.hobbit.core.Constants; +import org.hobbit.utils.docker.DockerHelper; +import org.junit.*; +import org.junit.contrib.java.lang.system.EnvironmentVariables; + +import com.spotify.docker.client.messages.swarm.Service; +import com.spotify.docker.client.messages.swarm.Task; + +/** + * Created by Timofey Ermilov on 02/09/16. + */ +public class PlatformControllerTest extends ContainerManagerBasedTest { + + private static final String RABBIT_HOST_NAME = DockerHelper.getHost(); + + @Rule + public static final EnvironmentVariables environmentVariables = new EnvironmentVariables(); + + private PlatformController controller; + + private void assertDockerImageEquals(String message, String expected, String got) throws Exception { + final Matcher matcher = Pattern.compile("^(.*?)(?:@.*)?$").matcher(got); + assertTrue("Image name matches pattern", matcher.find()); + assertEquals(message, expected, matcher.group(1)); + } + + @BeforeClass + public static void setEnvVars(){ + + // Cloud extension parameters + environmentVariables.set(USE_CLOUD_KEY, "true"); + environmentVariables.set(CLOUD_VPC_CLUSTER_NAME_KEY, "hobbit"); + environmentVariables.set(CLOUD_EXPIRE_TIMEOUT_MIN_KEY, "-1"); + environmentVariables.set(CLOUD_SSH_KEY_NAME_KEY, "hobbit_2"); + environmentVariables.set(CLOUD_SSH_KEY_FILE_PATH_KEY, "ssh/hobbit_2.pem"); + + + //environmentVariables.set("DOCKER_HOST", "tcp://localhost:2376"); - might be needed for testing + + // Enabling file-based image manager for local platforms + environmentVariables.set(FILE_BASED_IMAGE_MANAGER_KEY, "true"); + environmentVariables.set(FILE_BASED_IMAGE_MANAGER_FOLDER_KEY, "/mnt/share/platform-controller/metadata"); + + // Enabling file-based image manager for local platforms + environmentVariables.set(ENABLE_VOLUMES_FOR_SYSTEM_CONTAINERS_KEY, "1"); + environmentVariables.set(ALLOW_ASYNC_CONTAINER_COMMANDS_KEY, "1"); + + // Enabling containet logs output to console (does not require ELK) + environmentVariables.set(SERVICE_LOGS_READER_KEY, "1"); + environmentVariables.set(MAX_EXECUTION_TIME_KEY, "3600000"); + + environmentVariables.set("HOBBIT_RABBIT_HOST", "rabbit"); + environmentVariables.set("DEPLOY_ENV", "testing"); + //environmentVariables.set("LOGGING_GELF_ADDRESS", "udp://localhost:12201"); + + environmentVariables.set("GITLAB_USER", System.getenv("GITLAB_USER")); + environmentVariables.set("GITLAB_EMAIL", System.getenv("GITLAB_EMAIL")); + environmentVariables.set("GITLAB_TOKEN", System.getenv("GITLAB_TOKEN")); + + environmentVariables.set("HOBBIT_REDIS_HOST", "redis"); + environmentVariables.set("CONTAINER_PARENT_CHECK", "0"); + + environmentVariables.set(Constants.GENERATOR_ID_KEY, "0"); + environmentVariables.set(Constants.GENERATOR_COUNT_KEY, "1"); + environmentVariables.set(Constants.HOBBIT_SESSION_ID_KEY, "0"); + } + + @Before + public void init() throws Exception { + + + controller = new PlatformController(); + try { + controller.init(); + } catch (Exception e) { + throw e; + } + } + + public void close() throws Exception { + IOUtils.closeQuietly(controller); + super.close(); + } + + //#mvn -Dtest=PlatformControllerTest#checkRuntimeWork test + //java -cp platform-controller.jar org.hobbit.core.run.ComponentStarter org.hobbit.controller.PlatformController + @Test + public void checkRuntimeWork() throws Exception{ + controller.run(); + } + + @Ignore + @Test + public void receiveCommand() throws Exception { + byte command = Commands.DOCKER_CONTAINER_START; + + // create and execute parent container + final String parentId = manager.startContainer( + "busybox", + Constants.CONTAINER_TYPE_SYSTEM, + null, + new String[] { "sh", "-c", "while :; do sleep 1; done" }); + final String parentName = manager.getContainerName(parentId); + tasks.add(parentId); + + // create and execute test container + final String image = "busybox:latest"; + final String type = Constants.CONTAINER_TYPE_SYSTEM; + byte[] data = ("{\"image\": \"" + image + "\", \"type\": \"" + type + "\", \"parent\": \"" + parentName + "\"}") + .getBytes(StandardCharsets.UTF_8); + controller.receiveCommand(command, data, "1", ""); + + // get running containers + Service serviceInfo = null; + Task taskInfo = null; + String taskId = null; + String containerId = null; + final List taskList = dockerClient.listTasks(Task.Criteria.builder() + .label(ContainerManagerImpl.LABEL_PARENT + "=" + parentId) + .build()); + + if (!taskList.isEmpty()) { + taskInfo = taskList.get(0); + serviceInfo = dockerClient.inspectService(taskInfo.serviceId()); + taskId = taskInfo.id(); + } + + // check that container exists + assertNotNull(taskId); + assertEquals("Amount of child containers of the test parent container", 1, taskList.size()); + assertEquals("Type of created container", + Constants.CONTAINER_TYPE_SYSTEM, serviceInfo.spec().labels().get(ContainerManagerImpl.LABEL_TYPE)); + assertDockerImageEquals("Image of created container", image, taskInfo.spec().containerSpec().image()); + } +} diff --git a/platform-controller/src/test/java/org/hobbit/QueueClient.java b/platform-controller/src/test/java/org/hobbit/QueueClient.java new file mode 100644 index 00000000..6b9b6531 --- /dev/null +++ b/platform-controller/src/test/java/org/hobbit/QueueClient.java @@ -0,0 +1,69 @@ +package org.hobbit; + +import com.google.gson.JsonObject; +import org.hobbit.controller.data.ExperimentConfiguration; +import org.hobbit.controller.queue.ExperimentQueueImpl; +import org.junit.Before; +import org.junit.Ignore; +import org.junit.Test; +import org.junit.contrib.java.lang.system.EnvironmentVariables; + +import java.util.Calendar; +import java.util.Date; + +/** + * @author Pavel Smirnov. (psmirnov@agtinternational.com / smirnp@gmail.com) + */ +public class QueueClient { + public final EnvironmentVariables environmentVariables = new EnvironmentVariables(); + + String id; + ExperimentQueueImpl queue; + + @Before + public void init(){ + + //environmentVariables.set("HOBBIT_REDIS_HOST", "10.67.43.25:6379"); + + queue = new ExperimentQueueImpl(); + id = "http://w3id.org/hobbit/experiments#"+String.valueOf(String.valueOf(new Date().getTime())); + + } + + @Test + @Ignore + public void flushQueue(){ + int deleted=0; + for(ExperimentConfiguration configuration : queue.listAll()){ + queue.remove(configuration); + deleted++; + } + System.out.println(String.valueOf(deleted)+" experiments deleted"); + } + + @Test + @Ignore + public void submitToQueue(){ + //submitToQueue(BENCHMARK_URI, SYSTEM_URI); + } + + private void submitToQueue(String benchmarkUri, String systemUri){ + + ExperimentConfiguration cfg = new ExperimentConfiguration(); + cfg.id = id; + Calendar cal = Calendar.getInstance(); + cal.set(Calendar.YEAR, 2016); + cal.set(Calendar.MONTH, Calendar.SEPTEMBER); + cal.set(Calendar.DAY_OF_MONTH, 5); + cfg.executionDate = cal; + cfg.benchmarkUri = benchmarkUri; + cfg.systemUri = systemUri; + cfg.userName = ""; + + cfg.serializedBenchParams = ""; + + queue.add(cfg); + + System.out.println("Experiment submitted"); + } +} diff --git a/platform-controller/src/test/java/org/hobbit/cloud/DockerClientTest.java b/platform-controller/src/test/java/org/hobbit/cloud/DockerClientTest.java new file mode 100644 index 00000000..7cfbfb59 --- /dev/null +++ b/platform-controller/src/test/java/org/hobbit/cloud/DockerClientTest.java @@ -0,0 +1,19 @@ +package org.hobbit.cloud; + +import com.spotify.docker.client.DockerClient; +import org.hobbit.controller.cloud.DockerClientProvider; +import org.hobbit.controller.cloud.aws.swarm.SwarmClusterManager; +import org.junit.Before; + +/** + * @author Pavel Smirnov. (psmirnov@agtinternational.com / smirnp@gmail.com) + */ +public class DockerClientTest { + + DockerClient dockerClient; + + @Before + public void init() throws Exception { + dockerClient = DockerClientProvider.getDockerClient(); + } +} diff --git a/platform-controller/src/test/java/org/hobbit/cloud/SwarmClusterManagerTest.java b/platform-controller/src/test/java/org/hobbit/cloud/SwarmClusterManagerTest.java new file mode 100644 index 00000000..057c0c58 --- /dev/null +++ b/platform-controller/src/test/java/org/hobbit/cloud/SwarmClusterManagerTest.java @@ -0,0 +1,75 @@ +package org.hobbit.cloud; + +import com.google.gson.JsonObject; +import org.hobbit.controller.cloud.aws.swarm.SwarmClusterManager; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Ignore; +import org.junit.Test; + + +public class SwarmClusterManagerTest { + + SwarmClusterManager manager; + String clusterName = "hobbit"; + + + @Before + public void init() throws Exception { + manager = new SwarmClusterManager(clusterName); + } + + @Test + @Ignore + public void createManagersTest() throws Exception { + //manager.createManagers(null); + Assert.assertTrue(true); + } + + @Test + @Ignore + public void createClusterTest() throws Exception { + + JsonObject config = new JsonObject(); + + JsonObject nodes = new JsonObject(); +// nat.addProperty("InstanceType", "m5.large"); +// //nat.addProperty("instanceType", "c5.large"); +// //nat.addProperty("InstanceType", "t2.medium"); +// config.add(clusterName+"-nat", nat); + +// JsonObject nodes = new JsonObject(); +// nodes.addProperty("DesiredCapacity", "1"); +// //managerNodes.addProperty("InstanceType", "t2.small"); +// nodes.addProperty("InstanceType", "t2.medium"); +// //managerNodes.addProperty("InstanceType", "c4.large"); +// config.add(clusterName+"-swarm-manager", nodes); +// +// nodes = new JsonObject(); + nodes.addProperty("DesiredCapacity", "1"); + nodes.addProperty("InstanceType", "t2.medium"); + //nat.addProperty("InstanceType", "t2.small"); + config.add(clusterName+"-swarm-benchmark-worker", nodes); +// + nodes = new JsonObject(); + nodes.addProperty("DesiredCapacity", "1"); + nodes.addProperty("InstanceType", "t2.micro"); + //nodes.addProperty("InstanceType", "c4.large"); + //nodes.addProperty("InstanceType", "t2.medium"); + config.add(clusterName+"-swarm-system-worker", nodes); + + manager.createCluster(config.toString()); + //manager.createCluster("{hobbit-nat:{InstanceType:t2.small},hobbit-swarm-manager:{DesiredCapacity:1,InstanceType:t2.small},hobbit-swarm-benchmark-worker:{DesiredCapacity:1},hobbit-swarm-system-worker:{DesiredCapacity:1,InstanceType:t2.micro}}"); + Assert.assertTrue(true); + } + + @Test + @Ignore + public void deleteClusterTest() throws Exception { + //manager.reactOnQueue(); + manager.deleteCluster(); + //manager.deleteSwarmCluster(); + Assert.assertTrue(true); + } +} + diff --git a/platform-controller/src/test/java/org/hobbit/controller/DockerBasedTest.java b/platform-controller/src/test/java/org/hobbit/controller/DockerBasedTest.java index 9799e8a5..2755c012 100644 --- a/platform-controller/src/test/java/org/hobbit/controller/DockerBasedTest.java +++ b/platform-controller/src/test/java/org/hobbit/controller/DockerBasedTest.java @@ -18,13 +18,19 @@ import java.util.List; +import com.spotify.docker.client.exceptions.DockerException; import org.apache.commons.compress.utils.IOUtils; +import org.hobbit.core.Constants; +import org.hobbit.utils.docker.DockerHelper; import org.junit.After; import org.junit.Before; import com.spotify.docker.client.DefaultDockerClient; import com.spotify.docker.client.DockerClient; import com.spotify.docker.client.messages.Image; +import org.junit.Rule; +import org.junit.Test; +import org.junit.contrib.java.lang.system.EnvironmentVariables; /** * Created by Timofey Ermilov on 02/09/16. @@ -32,8 +38,13 @@ public class DockerBasedTest { protected DockerClient dockerClient; protected static final String busyboxImageName = "busybox:latest"; + protected static final String benchmarkImageName = "git.project-hobbit.eu:4567/smirnp/sml-benchmark-v2/benchmark-controller"; + protected static final String[] sleepCommand = { "sleep", "60s" }; + @Rule + public final EnvironmentVariables environmentVariables = new EnvironmentVariables(); + protected boolean findImageWithTag(final String id, final List images) { if (images != null) { for (Image image : images) { @@ -51,14 +62,25 @@ protected boolean findImageWithTag(final String id, final List images) { @Before public void initClient() throws Exception { + + + dockerClient = DefaultDockerClient.fromEnv().build(); + + } + + @Test + public void pullImageTest() throws DockerException, InterruptedException { // check if busybox is present List images = dockerClient.listImages(DockerClient.ListImagesParam.allImages()); - if (!findImageWithTag(busyboxImageName, images)) { - dockerClient.pull(busyboxImageName); + String imageName = benchmarkImageName; + if (!findImageWithTag(imageName, images)) { + dockerClient.pull(imageName); + } } + @After public void close() throws Exception { IOUtils.closeQuietly(dockerClient); diff --git a/platform-controller/src/test/java/org/hobbit/controller/ExperimentTimeoutTest.java b/platform-controller/src/test/java/org/hobbit/controller/ExperimentTimeoutTest.java index 0cf3cb8f..66dfb69c 100644 --- a/platform-controller/src/test/java/org/hobbit/controller/ExperimentTimeoutTest.java +++ b/platform-controller/src/test/java/org/hobbit/controller/ExperimentTimeoutTest.java @@ -6,6 +6,9 @@ import java.util.List; import java.util.concurrent.Semaphore; +import com.spotify.docker.client.DockerClient; +import com.spotify.docker.client.LogStream; +import com.spotify.docker.client.messages.swarm.Service; import org.apache.commons.compress.utils.IOUtils; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.ModelFactory; @@ -104,7 +107,7 @@ public DummyPlatformController(Semaphore benchmarkControllerTerminated) { @Override public void init() throws Exception { - // do not init the super class + // do not setAsEnvVariables the super class } @Override @@ -199,17 +202,26 @@ public String startContainer(String imageName, String containerType, String pare } @Override - public String startContainer(String imageName, String containerType, String parentId, String[] env, - String[] command) { - return imageName; + public String startContainer(String imageName, String containerType, String parentId, String[] env, String[] command, String[] volumePaths) { + return null; } +// @Override +// public String startContainer(String imageName, String containerType, String parentId, String[] env, String[] command) { +// return imageName; +// } + @Override public String startContainer(String imageName, String containerType, String parentId, String[] env, String[] command, String experimentId) { return imageName; } + @Override + public String startContainer(String imageName, String containerType, String parentId, String[] env, String[] command, String experimentId, String[] volumePaths) { + return null; + } + @Override public void stopContainer(String containerId) { // Check whether the benchmark controller has been terminated @@ -275,6 +287,27 @@ public ContainerStats getStats(String containerId) { return null; } + @Override + public List listServices() { + return null; + } + + @Override + public Task inspectTask(String taskId) { + return null; + } + + @Override + public LogStream serviceLogs(String var1, DockerClient.LogsParam... var2) { + return null; + } + + @Override + public boolean execAsyncCommand(String containerId, String[] command) { + return true; + } + + } private static class DummyStorageServiceClient extends StorageServiceClient { diff --git a/platform-controller/src/test/java/org/hobbit/controller/docker/ContainerManagerBasedTest.java b/platform-controller/src/test/java/org/hobbit/controller/docker/ContainerManagerBasedTest.java index 0fd139d4..59ab9eed 100644 --- a/platform-controller/src/test/java/org/hobbit/controller/docker/ContainerManagerBasedTest.java +++ b/platform-controller/src/test/java/org/hobbit/controller/docker/ContainerManagerBasedTest.java @@ -22,6 +22,8 @@ import org.hobbit.controller.DockerBasedTest; import org.junit.After; import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,9 +37,15 @@ public class ContainerManagerBasedTest extends DockerBasedTest { protected List containers = new ArrayList(); protected List tasks = new ArrayList(); + @Before public void initManager() throws Exception { - manager = new ContainerManagerImpl(); + //manager = new ContainerManagerImpl(); + } + + @Test + public void pullImage(){ + manager.pullImage("git.project-hobbit.eu:4567/smirnp/sml-benchmark-v2/benchmark-controller"); } @After diff --git a/platform-controller/src/test/java/org/hobbit/controller/queue/ExperimentQueueImplTest.java b/platform-controller/src/test/java/org/hobbit/controller/queue/ExperimentQueueImplTest.java index 9122b1d2..9a5aeab5 100644 --- a/platform-controller/src/test/java/org/hobbit/controller/queue/ExperimentQueueImplTest.java +++ b/platform-controller/src/test/java/org/hobbit/controller/queue/ExperimentQueueImplTest.java @@ -28,6 +28,7 @@ import org.hobbit.controller.data.ExperimentConfiguration; import org.junit.After; import org.junit.Before; +import org.junit.Ignore; import org.junit.Test; import com.google.gson.Gson; @@ -133,7 +134,7 @@ public void test() throws Exception { } @Test - public void addThenNextTest() { + public void addThenNextTest(){ // create test config ExperimentConfiguration cfg = new ExperimentConfiguration(); cfg.id = "1"; @@ -157,6 +158,7 @@ public void addThenNextTest() { assertNull(next); } + @After public void close() { // cleanup diff --git a/platform-controller/src/test/java/org/hobbit/controller/queue/RedisBasedTest.java b/platform-controller/src/test/java/org/hobbit/controller/queue/RedisBasedTest.java index dfc4e901..3d794472 100644 --- a/platform-controller/src/test/java/org/hobbit/controller/queue/RedisBasedTest.java +++ b/platform-controller/src/test/java/org/hobbit/controller/queue/RedisBasedTest.java @@ -33,7 +33,7 @@ public class RedisBasedTest { @Before public void initRedis() { - // init redis redisConnection + // setAsEnvVariables redis redisConnection redisClient = RedisClient.create("redis://localhost"); redisConnection = redisClient.connect(); redisSyncCommands = redisConnection.sync(); diff --git a/platform-controller/src/test/resources/log4j.properties b/platform-controller/src/test/resources/log4j.properties index 30eaea9e..b0e48b3a 100644 --- a/platform-controller/src/test/resources/log4j.properties +++ b/platform-controller/src/test/resources/log4j.properties @@ -6,3 +6,5 @@ log4j.appender.stdout.layout=org.apache.log4j.PatternLayout log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - <%m>%n log4j.category.org.apache.jena.riot=OFF +log4j.logger.org.hobbit.controller.cloud=DEBUG +log4j.logger.service=DEBUG diff --git a/platform-controller/ssh/.keep b/platform-controller/ssh/.keep new file mode 100644 index 00000000..e69de29b diff --git a/platform-controller/stop-services.sh b/platform-controller/stop-services.sh new file mode 100644 index 00000000..8f9661e1 --- /dev/null +++ b/platform-controller/stop-services.sh @@ -0,0 +1,2 @@ +sudo docker stop $(sudo docker ps -a | grep -v "hobbit_" | awk '{print $1}') +sudo docker service rm $(sudo docker service ls | grep -v "hobbit_") \ No newline at end of file From 581f8cac0623e74a00acc6fb43a85b811400c911 Mon Sep 17 00:00:00 2001 From: smirnp Date: Mon, 3 Dec 2018 14:11:54 +0100 Subject: [PATCH 3/7] Some comments added --- .../java/org/hobbit/PlatformControllerTest.java | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/platform-controller/src/test/java/org/hobbit/PlatformControllerTest.java b/platform-controller/src/test/java/org/hobbit/PlatformControllerTest.java index e726720d..08164500 100644 --- a/platform-controller/src/test/java/org/hobbit/PlatformControllerTest.java +++ b/platform-controller/src/test/java/org/hobbit/PlatformControllerTest.java @@ -70,10 +70,16 @@ public static void setEnvVars(){ // Cloud extension parameters environmentVariables.set(USE_CLOUD_KEY, "true"); environmentVariables.set(CLOUD_VPC_CLUSTER_NAME_KEY, "hobbit"); - environmentVariables.set(CLOUD_EXPIRE_TIMEOUT_MIN_KEY, "-1"); + environmentVariables.set(CLOUD_EXPIRE_TIMEOUT_MIN_KEY, "-1"); //-1 unlimited environmentVariables.set(CLOUD_SSH_KEY_NAME_KEY, "hobbit_2"); environmentVariables.set(CLOUD_SSH_KEY_FILE_PATH_KEY, "ssh/hobbit_2.pem"); + //Setup this vars for your environment or define them here for the test +// environmentVariables.set("AWS_ACCESS_KEY_ID", System.getenv("AWS_ACCESS_KEY_ID")); +// environmentVariables.set("AWS_SECRET_KEY", System.getenv("AWS_SECRET_KEY")); +// environmentVariables.set("AWS_ROLE_ARN", System.getenv("AWS_ROLE_ARN")); +// environmentVariables.set("AWS_REGION", System.getenv("AWS_REGION")); + //environmentVariables.set("DOCKER_HOST", "tcp://localhost:2376"); - might be needed for testing @@ -93,9 +99,10 @@ public static void setEnvVars(){ environmentVariables.set("DEPLOY_ENV", "testing"); //environmentVariables.set("LOGGING_GELF_ADDRESS", "udp://localhost:12201"); - environmentVariables.set("GITLAB_USER", System.getenv("GITLAB_USER")); - environmentVariables.set("GITLAB_EMAIL", System.getenv("GITLAB_EMAIL")); - environmentVariables.set("GITLAB_TOKEN", System.getenv("GITLAB_TOKEN")); + //Setup this vars for your environment or define them here for the test +// environmentVariables.set("GITLAB_USER", System.getenv("GITLAB_USER")); +// environmentVariables.set("GITLAB_EMAIL", System.getenv("GITLAB_EMAIL")); +// environmentVariables.set("GITLAB_TOKEN", System.getenv("GITLAB_TOKEN")); environmentVariables.set("HOBBIT_REDIS_HOST", "redis"); environmentVariables.set("CONTAINER_PARENT_CHECK", "0"); From fb1044ab43643f3d750eba1d25ca93c6624382e9 Mon Sep 17 00:00:00 2001 From: Pavel Smirnov Date: Tue, 18 Dec 2018 19:48:52 +0100 Subject: [PATCH 4/7] Forwarded parameters are filled with pure values --- platform-controller/pom.xml | 4 ++-- .../main/java/org/hobbit/controller/ExperimentManager.java | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/platform-controller/pom.xml b/platform-controller/pom.xml index 966b18bf..cdd2f32f 100644 --- a/platform-controller/pom.xml +++ b/platform-controller/pom.xml @@ -22,7 +22,7 @@ org.hobbit parent - 2.0.6-CLOUD + 2.0.6 ../parent-pom platform-controller @@ -53,7 +53,7 @@ org.hobbit aws-controller - 1.0.0-SNAPSHOT + 1.0.1 diff --git a/platform-controller/src/main/java/org/hobbit/controller/ExperimentManager.java b/platform-controller/src/main/java/org/hobbit/controller/ExperimentManager.java index 3ad973f4..8c2c3b52 100644 --- a/platform-controller/src/main/java/org/hobbit/controller/ExperimentManager.java +++ b/platform-controller/src/main/java/org/hobbit/controller/ExperimentManager.java @@ -356,7 +356,9 @@ protected static String getSerializedSystemParams(ExperimentConfiguration config objIterator = benchParams.listObjectsOfProperty(experiment, parameter); // If there is a value, add it to the system model while (objIterator.hasNext()) { - systemModel.add(systemResource, parameter, objIterator.next()); + Literal valueLiteral = (Literal)objIterator.next(); + systemModel.add(systemResource, parameter, valueLiteral.getString()); + //systemModel.add(systemResource, parameter, objIterator.next()); } } } From faedf1195509589d4f9eeda28d96d53d8625af5d Mon Sep 17 00:00:00 2001 From: Pavel Smirnov Date: Wed, 19 Dec 2018 12:48:52 +0100 Subject: [PATCH 5/7] Fixed bug with update configurations --- platform-controller/AWS/bastion.yaml | 10 ++++++++++ platform-controller/pom.xml | 2 +- .../java/org/hobbit/controller/ExperimentManager.java | 2 +- .../cloud/aws/swarm/SwarmClusterManager.java | 2 -- .../test/java/org/hobbit/PlatformControllerTest.java | 2 +- platform-controller/stop-services.sh | 4 ++-- 6 files changed, 15 insertions(+), 7 deletions(-) diff --git a/platform-controller/AWS/bastion.yaml b/platform-controller/AWS/bastion.yaml index f74e763f..9f6e7d9d 100644 --- a/platform-controller/AWS/bastion.yaml +++ b/platform-controller/AWS/bastion.yaml @@ -53,6 +53,16 @@ Parameters: - B - C - D + DesiredCapacity: + Description: 'The number of manager nodes' + Type: Number + Default: 1 + AllowedValues: [1,3,5,7] + + MaxSize: + Description: 'MaxSize of manager nodes' + Type: Number + Default: 1 KeyName: Description: 'Optional key pair of the ec2-user to establish a SSH connection to the SSH bastion host/instance.' Type: String diff --git a/platform-controller/pom.xml b/platform-controller/pom.xml index cdd2f32f..e1053751 100644 --- a/platform-controller/pom.xml +++ b/platform-controller/pom.xml @@ -53,7 +53,7 @@ org.hobbit aws-controller - 1.0.1 + 1.0.2-SNAPSHOT diff --git a/platform-controller/src/main/java/org/hobbit/controller/ExperimentManager.java b/platform-controller/src/main/java/org/hobbit/controller/ExperimentManager.java index 8c2c3b52..7ed7985d 100644 --- a/platform-controller/src/main/java/org/hobbit/controller/ExperimentManager.java +++ b/platform-controller/src/main/java/org/hobbit/controller/ExperimentManager.java @@ -393,7 +393,7 @@ public static String getClusterConfiguration(ExperimentConfiguration config) { Property parameter; //NodeIterator objIterator; //Resource systemResource = systemModel.getResource(config.systemUri); - Resource experiment = benchParams.getResource(Constants.NEW_EXPERIMENT_URI); + Resource experiment = benchParams.listResourcesWithProperty(RDF.type, HOBBIT.Experiment).next(); NodeIterator objIterator = benchParams.listObjectsOfProperty(experiment, new PropertyImpl(config.benchmarkUri+"#clusterConfig")); while (objIterator.hasNext()) { String ret = objIterator.next().asLiteral().getString(); diff --git a/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/SwarmClusterManager.java b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/SwarmClusterManager.java index 3f16da19..eb9aaf37 100644 --- a/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/SwarmClusterManager.java +++ b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/SwarmClusterManager.java @@ -37,10 +37,8 @@ public class SwarmClusterManager extends VpcClusterManager { protected AbstractStackHandler swarmSystemWorkersStackHandler; //protected SwarmClusterStackHandler.Builder swarmStacksBuilder; - protected String clusterName; protected String swarmClusterName; protected String bucketName; - protected String sshKeyName; boolean clusterReady = false; diff --git a/platform-controller/src/test/java/org/hobbit/PlatformControllerTest.java b/platform-controller/src/test/java/org/hobbit/PlatformControllerTest.java index 08164500..8d6bbaba 100644 --- a/platform-controller/src/test/java/org/hobbit/PlatformControllerTest.java +++ b/platform-controller/src/test/java/org/hobbit/PlatformControllerTest.java @@ -102,7 +102,7 @@ public static void setEnvVars(){ //Setup this vars for your environment or define them here for the test // environmentVariables.set("GITLAB_USER", System.getenv("GITLAB_USER")); // environmentVariables.set("GITLAB_EMAIL", System.getenv("GITLAB_EMAIL")); -// environmentVariables.set("GITLAB_TOKEN", System.getenv("GITLAB_TOKEN")); +// environmentVariables.s", System.getenv("GITLAB_TOKEN")); environmentVariables.set("HOBBIT_REDIS_HOST", "redis"); environmentVariables.set("CONTAINER_PARENT_CHECK", "0"); diff --git a/platform-controller/stop-services.sh b/platform-controller/stop-services.sh index 8f9661e1..892cedaa 100644 --- a/platform-controller/stop-services.sh +++ b/platform-controller/stop-services.sh @@ -1,2 +1,2 @@ -sudo docker stop $(sudo docker ps -a | grep -v "hobbit_" | awk '{print $1}') -sudo docker service rm $(sudo docker service ls | grep -v "hobbit_") \ No newline at end of file +#sudo docker stop $(docker ps -a | grep -v "hobbit_" | awk '{print $1}') +docker service rm $(docker service ls | grep -v "hobbit_") \ No newline at end of file From beec4a0f0ca97ce2484f4880671a84b8bb2a0ab5 Mon Sep 17 00:00:00 2001 From: Pavel Smirnov Date: Thu, 20 Dec 2018 19:01:07 +0100 Subject: [PATCH 6/7] MaxSize value increased --- .../controller/cloud/aws/swarm/SwarmClusterManager.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/SwarmClusterManager.java b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/SwarmClusterManager.java index eb9aaf37..f18c009d 100644 --- a/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/SwarmClusterManager.java +++ b/platform-controller/src/main/java/org/hobbit/controller/cloud/aws/swarm/SwarmClusterManager.java @@ -132,8 +132,8 @@ protected Map getStackConfig(JsonObject configuration, String st ret.put(entry.getKey(), entry.getValue().getAsString()); if(entry.getKey().equals("DesiredCapacity")) - ret.put("MaxSize", entry.getValue().getAsString()); - //ret.put("MaxSize", String.valueOf(Integer.parseInt(entry.getValue().toString())+1)); + //ret.put("MaxSize", "10"); + ret.put("MaxSize", String.valueOf(Integer.parseInt(entry.getValue().toString())+1)); } return ret; } From 100c09e6060c4c822c0512411f9fccf8288d7f02 Mon Sep 17 00:00:00 2001 From: Pavel Smirnov Date: Wed, 30 Jan 2019 17:52:25 +0100 Subject: [PATCH 7/7] TaskId replaced with ServiceId --- platform-controller/pom.xml | 1 + .../hobbit/controller/ExperimentManager.java | 16 +- .../hobbit/controller/PlatformController.java | 103 +++++----- .../docker/CloudContainerManager.java | 13 +- .../controller/docker/ContainerManager.java | 80 ++++---- .../docker/ContainerManagerImpl.java | 186 ++++++++++------- .../docker/ContainerStateObserverImpl.java | 40 +--- .../docker/ResourceInformationCollector.java | 20 +- .../controller/utils/ServiceLogsReader.java | 169 ++++++++-------- .../controller/utils/ServiceLogsReader0.java | 190 ++++++++++++++++++ .../org/hobbit/PlatformControllerTest.java | 6 +- .../controller/ExperimentTimeoutTest.java | 24 ++- .../docker/ContainerManagerImplTest.java | 4 +- 13 files changed, 531 insertions(+), 321 deletions(-) create mode 100644 platform-controller/src/main/java/org/hobbit/controller/utils/ServiceLogsReader0.java diff --git a/platform-controller/pom.xml b/platform-controller/pom.xml index e1053751..643c7ee3 100644 --- a/platform-controller/pom.xml +++ b/platform-controller/pom.xml @@ -42,6 +42,7 @@ org.hobbit core + 1.0.12-ASYNC-COMMANDS