From 58fa7a56642920b384d1f3de792a67af44baa6b8 Mon Sep 17 00:00:00 2001 From: David McCormick Date: Thu, 14 Jun 2018 11:45:20 +0100 Subject: [PATCH 1/7] Remove the etcd-environment metadata section on the nodepools if Kubernetes.Networking.SelfHosting is Enabled. This is to break the dependency that exists on the nodepool stacks on etcd stack resources. --- .../config/templates/stack-template.json | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/core/nodepool/config/templates/stack-template.json b/core/nodepool/config/templates/stack-template.json index ff7fe167a..51c96d27d 100644 --- a/core/nodepool/config/templates/stack-template.json +++ b/core/nodepool/config/templates/stack-template.json @@ -1,11 +1,11 @@ -{{define "Metadata"}} +{{define "Metadata" -}} { "AWS::CloudFormation::Init" : { "configSets" : { - "etcd-client": [ "etcd-client-env" ]{{ if .AwsEnvironment.Enabled }}, - "aws-environment": [ "aws-environment-env" ]{{end}} + {{ if not .Kubernetes.Networking.SelfHosting.Enabled }}"etcd-client": [ "etcd-client-env" ]{{if .AwsEnvironment.Enabled}},{{end}}{{end}} + {{ if .AwsEnvironment.Enabled }}"aws-environment": [ "aws-environment-env" ]{{end}} }, - {{ if .AwsEnvironment.Enabled }} + {{ if .AwsEnvironment.Enabled -}} "aws-environment-env" : { "commands": { "write-environment": { @@ -18,8 +18,9 @@ } } } - }, - {{end}} + }{{ if not .Kubernetes.Networking.SelfHosting.Enabled }},{{end}} + {{end -}} + {{ if not .Kubernetes.Networking.SelfHosting.Enabled -}} "etcd-client-env": { "files" : { "/var/run/coreos/etcd-environment": { @@ -35,6 +36,7 @@ } } } + {{end}} } } {{end}} @@ -117,8 +119,9 @@ {{end}} ] } - }, + }{{ if or (not .Kubernetes.Networking.SelfHosting.Enabled) .AwsEnvironment.Enabled }}, "Metadata": {{template "Metadata" .}} + {{- end }} }, {{end}} {{define "AutoScaling"}} @@ -216,8 +219,9 @@ "PauseTime": "PT2M" {{end}} } - }, + }{{ if or (not .Kubernetes.Networking.SelfHosting.Enabled) .AwsEnvironment.Enabled }}, "Metadata": {{template "Metadata" .}} + {{- end }} }, {{if .NodeDrainer.Enabled }} "{{.LogicalName}}NodeDrainerLH" : { From e16f916bc8ca03e85a2e63b2f73b6b927197c1b9 Mon Sep 17 00:00:00 2001 From: David McCormick Date: Wed, 20 Jun 2018 14:28:22 +0100 Subject: [PATCH 2/7] disable fail fast --- core/root/cluster.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/core/root/cluster.go b/core/root/cluster.go index 586c66e00..ff5023e20 100644 --- a/core/root/cluster.go +++ b/core/root/cluster.go @@ -511,10 +511,11 @@ func (c clusterImpl) update(cfSvc *cloudformation.CloudFormation, targets Operat } // fail fast if it looks like we are trying to update a legacy cluster. if !exists { - logger.Errorf("the %s stack must exist in order to be able to update your cluster.", naming.FromStackToCfnResource(c.etcd.Etcd.LogicalName())) - logger.Error("we're sorry, but kube-aws can not presently upgrade your cluster to the new release with a separate ETCD Cloudformation stack.") - logger.Error("please consider backing up, destroying and recreating to upgrade.") - return "", fmt.Errorf("update not supported for clusters without a separate etcd cloudformation stack") + // logger.Errorf("the %s stack must exist in order to be able to update your cluster.", naming.FromStackToCfnResource(c.etcd.Etcd.LogicalName())) + // logger.Error("we're sorry, but kube-aws can not presently upgrade your cluster to the new release with a separate ETCD Cloudformation stack.") + // logger.Error("please consider backing up, destroying and recreating to upgrade.") + // return "", fmt.Errorf("update not supported for clusters without a separate etcd cloudformation stack") + logger.Warnf("WARNING!!! you are updating the cluster with legacy etcd stack... be careful, tread lightly...") } assets, err := c.generateAssets(c.operationTargetsFromUserInput([]OperationTargets{targets})) From 215d240e800ca25f122f3d2d7f88a8b83d3226ff Mon Sep 17 00:00:00 2001 From: Dave McCormick Date: Mon, 18 Jun 2018 01:35:50 +0100 Subject: [PATCH 3/7] Remove the etcd-environment metadata section on the nodepools if Kubernetes.Networking.SelfHosting is Enabled. (#1367) This is to break the dependency that exists on the nodepool stacks on etcd stack resources. Ref #1370 --- build | 5 +- .../config/templates/cloud-config-controller | 8 ++- .../config/templates/stack-template.json | 57 ++++++++++++++++++- .../config/templates/cloud-config-worker | 8 ++- 4 files changed, 70 insertions(+), 8 deletions(-) diff --git a/build b/build index 67dc02690..225779039 100755 --- a/build +++ b/build @@ -3,10 +3,13 @@ set -euo pipefail COMMIT=$(git rev-parse HEAD) TAG=$(git describe --exact-match --abbrev=0 --tags "${COMMIT}" 2> /dev/null || true) +BRANCH=$(git branch | grep \* | cut -d ' ' -f2 || true) OUTPUT_PATH=${OUTPUT_PATH:-"bin/kube-aws"} +VERSION="" if [ -z "$TAG" ]; then - VERSION=$COMMIT + [[ -n "$BRANCH" ]] && VERSION="${BRANCH}/" + VERSION="${VERSION}${COMMIT:0:8}" else VERSION=$TAG fi diff --git a/core/controlplane/config/templates/cloud-config-controller b/core/controlplane/config/templates/cloud-config-controller index 4822a97b9..4ea975e16 100644 --- a/core/controlplane/config/templates/cloud-config-controller +++ b/core/controlplane/config/templates/cloud-config-controller @@ -354,6 +354,7 @@ coreos: {{ if not .Kubernetes.Networking.SelfHosting.Enabled -}} Wants=flanneld.service {{ end -}} + Wants=rpc-statd.service Wants=decrypt-assets.service After=decrypt-assets.service @@ -444,6 +445,10 @@ coreos: [Install] WantedBy=multi-user.target + - name: rpc-statd.service + command: start + enable: true + {{ if eq .ContainerRuntime "rkt" }} - name: rkt-api.service enable: true @@ -508,9 +513,6 @@ coreos: ExecStart=/opt/bin/retry 3 /opt/bin/apply-kube-aws-plugins {{ if $.ElasticFileSystemID }} - - name: rpc-statd.service - command: start - enable: true - name: efs.service command: start content: | diff --git a/core/controlplane/config/templates/stack-template.json b/core/controlplane/config/templates/stack-template.json index fc236893e..3ebbf8b47 100644 --- a/core/controlplane/config/templates/stack-template.json +++ b/core/controlplane/config/templates/stack-template.json @@ -158,6 +158,57 @@ } } }, + "SecurityGroupWorker": { + "Properties": { + "GroupDescription": { + "Ref": "AWS::StackName" + }, + "SecurityGroupEgress": [ + { + "CidrIp": "0.0.0.0/0", + "FromPort": -1, + "IpProtocol": "icmp", + "ToPort": -1 + }, + { + "CidrIp": "0.0.0.0/0", + "FromPort": 0, + "IpProtocol": "tcp", + "ToPort": 65535 + }, + { + "CidrIp": "0.0.0.0/0", + "FromPort": 0, + "IpProtocol": "udp", + "ToPort": 65535 + } + ], + "SecurityGroupIngress": [ + {{ range $_, $r := $.SSHAccessAllowedSourceCIDRs -}} + { + "CidrIp": "{{$r}}", + "FromPort": 22, + "IpProtocol": "tcp", + "ToPort": 22 + }, + {{end -}} + { + "CidrIp": "0.0.0.0/0", + "FromPort": -1, + "IpProtocol": "icmp", + "ToPort": -1 + } + ], + "Tags": [ + { + "Key": "Name", + "Value": "{{$.ClusterName}}-sg-worker" + } + ], + "VpcId": {{.VPCRef}} + }, + "Type": "AWS::EC2::SecurityGroup" + }, {{ if not .Controller.IAMConfig.InstanceProfile.Arn }} "IAMInstanceProfileController": { "Properties": { @@ -617,7 +668,6 @@ {{quote $n}}: {{toJSON $r}} {{end}} }, - "Outputs": { {{ if not .Controller.IAMConfig.InstanceProfile.Arn }} "ControllerIAMRoleArn": { @@ -626,6 +676,11 @@ "Export": { "Name": { "Fn::Sub": "${AWS::StackName}-ControllerIAMRoleArn" } } }, {{end}} + "WorkerSecurityGroup" : { + "Description" : "The security group assigned to worker nodes", + "Value" : { "Ref" : "SecurityGroupWorker" }, + "Export" : { "Name" : {"Fn::Sub": "${AWS::StackName}-WorkerSecurityGroup" }} + }, "StackName": { "Description": "The name of this stack which is used by node pool stacks to import outputs from this stack", "Value": { "Ref": "AWS::StackName" } diff --git a/core/nodepool/config/templates/cloud-config-worker b/core/nodepool/config/templates/cloud-config-worker index 1bb497402..bbeb6a07a 100644 --- a/core/nodepool/config/templates/cloud-config-worker +++ b/core/nodepool/config/templates/cloud-config-worker @@ -407,6 +407,7 @@ coreos: Wants=flanneld.service cfn-etcd-environment.service After=cfn-etcd-environment.service {{ end -}} + Wants=rpc-statd.service Wants=decrypt-assets.service After=decrypt-assets.service {{- if .Gpu.Nvidia.IsEnabledOn .InstanceType }} @@ -517,6 +518,10 @@ coreos: [Install] WantedBy=multi-user.target + - name: rpc-statd.service + command: start + enable: true + {{ if eq .ContainerRuntime "rkt" }} - name: rkt-api.service enable: true @@ -614,9 +619,6 @@ coreos: {{end}} {{ if $.ElasticFileSystemID }} - - name: rpc-statd.service - command: start - enable: true - name: efs.service command: start content: | From 0687f89a87bdb9a6758950be5bcf893247124f2c Mon Sep 17 00:00:00 2001 From: David McCormick Date: Fri, 22 Jun 2018 11:34:42 +0100 Subject: [PATCH 4/7] Modified etcdadm with special export and import commands that will copy state from existing etcd over to the new ones during a migration. --- core/controlplane/config/config.go | 4 +- core/root/cluster.go | 62 +++++++++++++++++++++++++++--- etcdadm/etcdadm | 38 +++++++++++++++++- 3 files changed, 96 insertions(+), 8 deletions(-) diff --git a/core/controlplane/config/config.go b/core/controlplane/config/config.go index 653ab20e4..f0ea91836 100644 --- a/core/controlplane/config/config.go +++ b/core/controlplane/config/config.go @@ -571,7 +571,9 @@ type ControllerSettings struct { // Part of configuration which is specific to etcd nodes type EtcdSettings struct { - model.Etcd `yaml:"etcd,omitempty"` + model.Etcd `yaml:"etcd,omitempty"` + EtcdMigrationEnabled bool + EtcdMigrationExistingEndpoints string } // Cluster is the container of all the configurable parameters of a kube-aws cluster, customizable via cluster.yaml diff --git a/core/root/cluster.go b/core/root/cluster.go index ff5023e20..0e8ce1440 100644 --- a/core/root/cluster.go +++ b/core/root/cluster.go @@ -13,6 +13,7 @@ import ( "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/cloudformation" "github.com/aws/aws-sdk-go/service/cloudwatchlogs" + "github.com/aws/aws-sdk-go/service/ec2" "github.com/aws/aws-sdk-go/service/s3" "github.com/kubernetes-incubator/kube-aws/awsconn" "github.com/kubernetes-incubator/kube-aws/cfnstack" @@ -504,18 +505,23 @@ func (c clusterImpl) LegacyUpdate(targets OperationTargets) (string, error) { func (c clusterImpl) update(cfSvc *cloudformation.CloudFormation, targets OperationTargets) (string, error) { + // Look at existing state of cloud formation and stacks to determine if we need to take special measures in migrating our etcd + // clusters from the control plane stack to their own Etcd stack. exists, err := cfnstack.NestedStackExists(cfSvc, c.controlPlane.ClusterName, naming.FromStackToCfnResource(c.etcd.Etcd.LogicalName())) if err != nil { logger.Errorf("please check your AWS credentials/permissions") return "", fmt.Errorf("can't lookup AWS CloudFormation stacks: %s", err) } - // fail fast if it looks like we are trying to update a legacy cluster. if !exists { - // logger.Errorf("the %s stack must exist in order to be able to update your cluster.", naming.FromStackToCfnResource(c.etcd.Etcd.LogicalName())) - // logger.Error("we're sorry, but kube-aws can not presently upgrade your cluster to the new release with a separate ETCD Cloudformation stack.") - // logger.Error("please consider backing up, destroying and recreating to upgrade.") - // return "", fmt.Errorf("update not supported for clusters without a separate etcd cloudformation stack") - logger.Warnf("WARNING!!! you are updating the cluster with legacy etcd stack... be careful, tread lightly...") + if !c.controlPlane.Kubernetes.Networking.SelfHosting.Enabled { + return "", fmt.Errorf("sorry, you can only update an existing legacy cluster with Kubernetes.Networking.SelfHosting enabled") + } + logger.Warnf("your cluster does not have a separate etcd stack, this update will spin up a new etcd cluster and attempt to import your existing state.") + c.etcd.EtcdMigrationEnabled = true + // look up existing etcd servers + if c.etcd.EtcdMigrationExistingEndpoints, err = c.lookupExistingEtcdEndpoints(); err != nil { + return "", fmt.Errorf("can't lookup existing etcd endpoints: %v", err) + } } assets, err := c.generateAssets(c.operationTargetsFromUserInput([]OperationTargets{targets})) @@ -547,6 +553,50 @@ func (c clusterImpl) update(cfSvc *cloudformation.CloudFormation, targets Operat return c.stackProvisioner().UpdateStackAtURLAndWait(cfSvc, templateUrl) } +// lookupExistingEtcdEndpoints supports the migration from embedded etcd servers to their own stack +// by looking up the existing etcd servers for a specific cluster and constructing and etcd endpoints +// list as used by tools such as etcdctl and the etcdadm script. +func (c clusterImpl) lookupExistingEtcdEndpoints() (string, error) { + ec2svc := ec2.New(c.session) + clusterTag := fmt.Sprintf("tag:kubernetes.io/cluster/%s", c.controlPlane.ClusterName) + params := &ec2.DescribeInstancesInput{ + Filters: []*ec2.Filter{ + { + Name: aws.String("tag:kube-aws:role"), + Values: []*string{aws.String("etcd")}, + }, + { + Name: aws.String(clusterTag), + Values: []*string{aws.String("owned")}, + }, + { + Name: aws.String("instance-state-name"), + Values: []*string{aws.String("running"), aws.String("pending")}, + }, + }, + } + logger.Debugf("Calling AWS EC2 DescribeInstances ->") + resp, err := ec2svc.DescribeInstances(params) + if err != nil { + return "", fmt.Errorf("can't lookup ec2 instances: %v", err) + } + + logger.Debugf("<- received %d instances from AWS", len(resp.Reservations)) + if len(resp.Reservations) == 0 { + return "", nil + } + // construct comma separated endpoints string + endpoints := []string{} + for _, res := range resp.Reservations { + for _, inst := range res.Instances { + endpoints = append(endpoints, fmt.Sprintf("https://%s:2379", *inst.PrivateDnsName)) + } + } + result := strings.Join(endpoints, ",") + logger.Debugf("Existing etcd endpoints found: %s", result) + return result, nil +} + func (c clusterImpl) ValidateTemplates() error { _, err := c.renderTemplateAsString() if err != nil { diff --git a/etcdadm/etcdadm b/etcdadm/etcdadm index a1f863730..c52a095ae 100755 --- a/etcdadm/etcdadm +++ b/etcdadm/etcdadm @@ -775,6 +775,10 @@ member_data_dir() { } member_etcdctl() { + raw_etcdctl etcdctl --endpoints "$(member_client_url)" ${*} +} + +raw_etcdctl() { local uuid_file local docker_opts=(--rm) @@ -796,7 +800,7 @@ member_etcdctl() { --volume="$(member_data_dir)":/var/lib/etcd \ --volume "$(member_snapshots_dir_name)":"$(member_host_snapshots_dir_path)" \ quay.io/coreos/etcd:v$etcd_version \ - etcdctl --endpoints "$(member_client_url)" ${*} + ${*} } member_is_healthy() { @@ -858,6 +862,29 @@ member_validate() { fi } +export_kubernetes_registry() { + local file_name=$1 + echo "Exporting kubernetes objects to $(member_host_snapshots_dir_path)/$file_name" + if cluster_is_healthy; then + (member_etcdctl get '/registry' --prefix --write-out="json") | jq -r '.kvs[] | "\(.key):\(.value)"' >"$(member_host_snapshots_dir_path)/$file_name" + else + _panic 'cluster is not healthy, can not export keys from an unhealthy cluster' + fi +} + +import_kubernetes_registry() { + local file_name=$1 + echo "Importing kubernetes objects to $(member_host_snapshots_dir_path)/$file_name" + if [[ ! -f "$(member_host_snapshots_dir_path)/$file_name" ]]; then + _panic "Can't import objects, file not found: $(member_host_snapshots_dir_path)/$file_name" + fi + if cluster_is_healthy; then + raw_etcdctl /bin/sh -c 'while read -u 10 l; do k=$(echo "${l%%:*}" | base64 -d); v=${l##*:}; echo "saving $k"; echo -e "$v" | base64 -d | etcdctl --endpoints='$(member_client_url)' put "$k"; done 10<'$(member_snapshots_dir_name)/$file_name + else + _panic 'cluster is not healthy, can not import keys to an unhealthy cluster' + fi +} + etcdadm_main() { local cmd=$1 @@ -865,6 +892,15 @@ etcdadm_main() { "save" ) member_save_snapshot ;; + "migration-export-kube-state" ) + export_kubernetes_registry migration-exported-objects.json + ;; + "migration-import-kube-state" ) + import_kubernetes_registry migration-exported-objects.json + ;; + "member-is-leader" ) + exit member_is_leader + ;; "replace" ) member_replace_failed ;; From 42f60290eec5a5728d1e3508015aea8b3c8d5c15 Mon Sep 17 00:00:00 2001 From: David McCormick Date: Fri, 22 Jun 2018 17:30:18 +0100 Subject: [PATCH 5/7] creating systemd migration units --- cfnstack/ec2.go | 9 ++ core/controlplane/config/config.go | 31 +++--- core/etcd/cluster/cluster.go | 105 ++++++++++++++++-- core/etcd/config/templates/cloud-config-etcd | 56 ++++++++++ .../etcd/config/templates/stack-template.json | 56 +++++++++- core/root/cluster.go | 50 --------- core/root/config/config.go | 3 +- etcdadm/etcdadm | 11 +- test/helper/cfn.go | 9 ++ test/integration/maincluster_test.go | 2 +- test/integration/plugin_test.go | 2 +- 11 files changed, 252 insertions(+), 82 deletions(-) create mode 100644 cfnstack/ec2.go diff --git a/cfnstack/ec2.go b/cfnstack/ec2.go new file mode 100644 index 000000000..802064439 --- /dev/null +++ b/cfnstack/ec2.go @@ -0,0 +1,9 @@ +package cfnstack + +import ( + "github.com/aws/aws-sdk-go/service/ec2" +) + +type EC2Interrogator interface { + DescribeInstances(input *ec2.DescribeInstancesInput) (*ec2.DescribeInstancesOutput, error) +} diff --git a/core/controlplane/config/config.go b/core/controlplane/config/config.go index f0ea91836..8eb6d8954 100644 --- a/core/controlplane/config/config.go +++ b/core/controlplane/config/config.go @@ -571,26 +571,25 @@ type ControllerSettings struct { // Part of configuration which is specific to etcd nodes type EtcdSettings struct { - model.Etcd `yaml:"etcd,omitempty"` - EtcdMigrationEnabled bool - EtcdMigrationExistingEndpoints string + model.Etcd `yaml:"etcd,omitempty"` } // Cluster is the container of all the configurable parameters of a kube-aws cluster, customizable via cluster.yaml type Cluster struct { - KubeClusterSettings `yaml:",inline"` - DeploymentSettings `yaml:",inline"` - DefaultWorkerSettings `yaml:",inline"` - ControllerSettings `yaml:",inline"` - EtcdSettings `yaml:",inline"` - AdminAPIEndpointName string `yaml:"adminAPIEndpointName,omitempty"` - RecordSetTTL int `yaml:"recordSetTTL,omitempty"` - TLSCADurationDays int `yaml:"tlsCADurationDays,omitempty"` - TLSCertDurationDays int `yaml:"tlsCertDurationDays,omitempty"` - HostedZoneID string `yaml:"hostedZoneId,omitempty"` - PluginConfigs model.PluginConfigs `yaml:"kubeAwsPlugins,omitempty"` - ProvidedEncryptService EncryptService - ProvidedCFInterrogator cfnstack.CFInterrogator + KubeClusterSettings `yaml:",inline"` + DeploymentSettings `yaml:",inline"` + DefaultWorkerSettings `yaml:",inline"` + ControllerSettings `yaml:",inline"` + EtcdSettings `yaml:",inline"` + AdminAPIEndpointName string `yaml:"adminAPIEndpointName,omitempty"` + RecordSetTTL int `yaml:"recordSetTTL,omitempty"` + TLSCADurationDays int `yaml:"tlsCADurationDays,omitempty"` + TLSCertDurationDays int `yaml:"tlsCertDurationDays,omitempty"` + HostedZoneID string `yaml:"hostedZoneId,omitempty"` + PluginConfigs model.PluginConfigs `yaml:"kubeAwsPlugins,omitempty"` + ProvidedEncryptService EncryptService + ProvidedCFInterrogator cfnstack.CFInterrogator + ProvidedEC2Interrogator cfnstack.EC2Interrogator // SSHAccessAllowedSourceCIDRs is network ranges of sources you'd like SSH accesses to be allowed from, in CIDR notation SSHAccessAllowedSourceCIDRs model.CIDRRanges `yaml:"sshAccessAllowedSourceCIDRs,omitempty"` CustomSettings map[string]interface{} `yaml:"customSettings,omitempty"` diff --git a/core/etcd/cluster/cluster.go b/core/etcd/cluster/cluster.go index 2755fb304..08ef146cd 100644 --- a/core/etcd/cluster/cluster.go +++ b/core/etcd/cluster/cluster.go @@ -2,6 +2,7 @@ package cluster import ( "fmt" + "strings" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/session" @@ -41,6 +42,20 @@ type Cluster struct { assets cfnstack.Assets } +// ExistingState describes the existing state of the etcd cluster +type ExistingState struct { + StackExists bool + EtcdMigrationEnabled bool + EtcdMigrationExistingEndpoints string +} + +// An EtcdConfigurationContext contains configuration settings/options mixed with existing state in a way that can be +// consumed by stack and cloud-config templates. +type EtcdConfigurationContext struct { + *config.Config + ExistingState +} + type ec2Service interface { CreateVolume(*ec2.CreateVolumeInput) (*ec2.Volume, error) DescribeVpcs(*ec2.DescribeVpcsInput) (*ec2.DescribeVpcsOutput, error) @@ -117,9 +132,6 @@ func (c *ClusterRef) validateExistingVPCState(ec2Svc ec2Service) error { func NewCluster(cfgRef *config.Cluster, opts config.StackTemplateOptions, plugins []*pluginmodel.Plugin, session *session.Session) (*Cluster, error) { cfg := &config.Cluster{} *cfg = *cfgRef - if cfg.ProvidedCFInterrogator == nil { - cfg.ProvidedCFInterrogator = cloudformation.New(session) - } // Import all the managed subnets from the network stack var err error @@ -164,16 +176,48 @@ func NewCluster(cfgRef *config.Cluster, opts config.StackTemplateOptions, plugin c.StackConfig.Etcd.CustomSystemdUnits = append(c.StackConfig.Etcd.CustomSystemdUnits, extraEtcd.SystemdUnits...) c.StackConfig.Etcd.CustomFiles = append(c.StackConfig.Etcd.CustomFiles, extraEtcd.Files...) c.StackConfig.Etcd.IAMConfig.Policy.Statements = append(c.StackConfig.Etcd.IAMConfig.Policy.Statements, extraEtcd.IAMPolicyStatements...) - c.StackConfig.Etcd.StackExists, err = cfnstack.NestedStackExists(cfg.ProvidedCFInterrogator, c.ClusterName, naming.FromStackToCfnResource(c.Etcd.LogicalName())) + + // create the context that will be used to build the assets (combination of config + existing state) + state, err := c.inspectExistingState() if err != nil { - return nil, fmt.Errorf("failed to check for existence of etcd cloud-formation stack: %v", err) + return nil, fmt.Errorf("Could not inspect existing etcd state: %v", err) } - - c.assets, err = c.buildAssets() + ctx := EtcdConfigurationContext{ + Config: c.StackConfig.Config, + ExistingState: state, + } + c.assets, err = c.buildAssets(ctx) return c, err } +func (c *Cluster) inspectExistingState() (ExistingState, error) { + var err error + if c.ProvidedCFInterrogator == nil { + c.ProvidedCFInterrogator = cloudformation.New(c.session) + } + if c.ProvidedEC2Interrogator == nil { + c.ProvidedEC2Interrogator = ec2.New(c.session) + } + + state := ExistingState{} + state.StackExists, err = cfnstack.NestedStackExists(c.ProvidedCFInterrogator, c.ClusterName, naming.FromStackToCfnResource(c.Etcd.LogicalName())) + if err != nil { + return state, fmt.Errorf("failed to check for existence of etcd cloud-formation stack: %v", err) + } + // when the Etcd stack does not exist but we have existing etcd instances then we need to enable the + // etcd migration units. + if !state.StackExists { + if state.EtcdMigrationExistingEndpoints, err = c.lookupExistingEtcdEndpoints(); err != nil { + return state, fmt.Errorf("failed to lookup existing etcd endpoints: %v", err) + } + if state.EtcdMigrationExistingEndpoints != "" { + state.EtcdMigrationEnabled = true + } + } + return state, nil +} + func (c *Cluster) Assets() cfnstack.Assets { return c.assets } @@ -185,11 +229,11 @@ func (c Cluster) NestedStackName() string { return naming.FromStackToCfnResource(c.StackName) } -func (c *Cluster) buildAssets() (cfnstack.Assets, error) { +func (c *Cluster) buildAssets(ctx EtcdConfigurationContext) (cfnstack.Assets, error) { var err error assets := cfnstack.NewAssetsBuilder(c.StackName, c.StackConfig.ClusterExportedStacksS3URI(), c.StackConfig.Region) - if c.StackConfig.UserDataEtcd, err = model.NewUserData(c.StackTemplateOptions.EtcdTmplFile, c.StackConfig.Config); err != nil { + if c.StackConfig.UserDataEtcd, err = model.NewUserData(c.StackTemplateOptions.EtcdTmplFile, ctx); err != nil { return nil, fmt.Errorf("failed to render etcd cloud config: %v", err) } @@ -265,3 +309,46 @@ func (c *Cluster) String() string { func (c *ClusterRef) Destroy() error { return cfnstack.NewDestroyer("etcd", c.session, c.CloudFormation.RoleARN).Destroy() } + +// lookupExistingEtcdEndpoints supports the migration from embedded etcd servers to their own stack +// by looking up the existing etcd servers for a specific cluster and constructing and etcd endpoints +// list as used by tools such as etcdctl and the etcdadm script. +func (c Cluster) lookupExistingEtcdEndpoints() (string, error) { + clusterTag := fmt.Sprintf("tag:kubernetes.io/cluster/%s", c.ClusterName) + params := &ec2.DescribeInstancesInput{ + Filters: []*ec2.Filter{ + { + Name: aws.String("tag:kube-aws:role"), + Values: []*string{aws.String("etcd")}, + }, + { + Name: aws.String(clusterTag), + Values: []*string{aws.String("owned")}, + }, + { + Name: aws.String("instance-state-name"), + Values: []*string{aws.String("running"), aws.String("pending")}, + }, + }, + } + logger.Debugf("Calling AWS EC2 DescribeInstances ->") + resp, err := c.ProvidedEC2Interrogator.DescribeInstances(params) + if err != nil { + return "", fmt.Errorf("can't lookup ec2 instances: %v", err) + } + + logger.Debugf("<- received %d instances from AWS", len(resp.Reservations)) + if len(resp.Reservations) == 0 { + return "", nil + } + // construct comma separated endpoints string + endpoints := []string{} + for _, res := range resp.Reservations { + for _, inst := range res.Instances { + endpoints = append(endpoints, fmt.Sprintf("https://%s:2379", *inst.PrivateDnsName)) + } + } + result := strings.Join(endpoints, ",") + logger.Debugf("Existing etcd endpoints found: %s", result) + return result, nil +} diff --git a/core/etcd/config/templates/cloud-config-etcd b/core/etcd/config/templates/cloud-config-etcd index 83449c455..d9ef3a066 100644 --- a/core/etcd/config/templates/cloud-config-etcd +++ b/core/etcd/config/templates/cloud-config-etcd @@ -207,6 +207,56 @@ coreos: [Install] WantedBy=multi-user.target {{end}} + {{ if .EtcdMigrationEnabled -}} + - name: export-existing-etcd-state.service + enable: true + content: | + [Unit] + Description=Exports Kubernetes Values from a remote Etcd cluster + Wants=cfn-etcd-environment.service + After=etcd-member.service + + [Service] + Type=simple + Restart=Restart=on-failure + RestartSec=5 + EnvironmentFile=-/etc/etcd-environment + EnvironmentFile=/var/run/coreos/etcdadm-environment-migration + ExecStartPre=/opt/bin/etcdadm cluster-is-healthy + ExecStartPre=/bin/bash -c "(/opt/bin/etcdadm member-is-leader && /opt/bin/etcdadm migration-export-kube-state existing-state-file.json && mv /var/run/coreos/etcdadm/snapshots/existing-state-file.json /var/run/coreos/etcdadm/snapshots/exported-state-file.json) || touch /var/run/coreos/etcdadm/snapshots/exported-state-file.json)" + ExecStart=/bin/sleep 3600 + TimeoutStartSec=900 + - name: import-existing-etcd-state.path + enable: true + command: start + content: | + [Path] + PathExists=/var/run/coreos/etcdadm/snapshots/exported-state-file.json + + [Install] + WantedBy=default.target + - name: import-existing-etcd-state.service + enable: false + command: stop + content: | + [Unit] + Description=Imports Kubernetes Values from export file into this cluster + Wants=cfn-etcd-environment.service + After=etcd-member.service + After=export-existing-etcd-state.service + + [Service] + Type=simple + Restart=Restart=on-failure + RestartSec=5 + EnvironmentFile=-/etc/etcd-environment + EnvironmentFile=/var/run/coreos/etcdadm-environment + ExecStartPre=/usr/bin/systemctl is-active export-existing-etcd-state.service + ExecStartPre=/opt/bin/etcdadm cluster-is-healthy + ExecStartPre=/bin/bash -c "(/opt/bin/etcdadm member-is-leader && /opt/bin/etcdadm migration-import-kube-state /var/run/coreos/etcdadm/snapshots/exported-state-file.json) || /bin/true)" + ExecStart=/bin/sleep 3600 + TimeoutStartSec=900 + {{ end -}} {{if .Etcd.DisasterRecovery.SupportsEtcdVersion .Etcd.Version -}} - name: etcdadm-reconfigure.service enable: true @@ -433,6 +483,9 @@ coreos: [Unit] Wants={{.Etcd.SystemdUnitName}} After={{.Etcd.SystemdUnitName}} + {{ if .EtcdMigrationEnabled -}} + After=import-existing-etcd-state.service + {{ end -}} [Service] Type=simple @@ -441,6 +494,9 @@ coreos: EnvironmentFile={{.EtcdNodeEnvFileName}} ExecStartPre=/usr/bin/systemctl is-active {{.Etcd.SystemdUnitName}} + {{ if .EtcdMigrationEnabled -}} + ExecStartPre=/usr/bin/systemctl is-active import-existing-etcd-state.service + {{ end -}} ExecStartPre=/usr/bin/rkt fetch {{.AWSCliImage.Options}}{{.AWSCliImage.RktRepo}} ExecStart=-/opt/bin/cfn-signal {{end}} diff --git a/core/etcd/config/templates/stack-template.json b/core/etcd/config/templates/stack-template.json index 8a9fbc513..2f3142534 100644 --- a/core/etcd/config/templates/stack-template.json +++ b/core/etcd/config/templates/stack-template.json @@ -388,6 +388,60 @@ "'\n" ]]} }, + {{ if .EtcdMigrationEnabled -}} + "/var/run/coreos/etcdadm-environment-migration": { + "content": { "Fn::Join" : [ "", [ + "ETCD_ENDPOINTS='", + "{{ .EtcdMigrationExistingEndpoints }}", + "'\n", + "AWS_DEFAULT_REGION='", + "{{$.Region}}", + "'\n", + "KUBERNETES_CLUSTER='", + "{{$.ClusterName}}", + "'\n", + "ETCDCTL_CACERT='", + "/etc/ssl/certs/etcd-trusted-ca.pem", + "'\n", + "ETCDCTL_CERT='", + "/etc/ssl/certs/etcd-client.pem", + "'\n", + "ETCDCTL_KEY='", + "/etc/ssl/certs/etcd-client-key.pem", + "'\n", + "ETCDCTL_CA_FILE='", + "/etc/ssl/certs/etcd-trusted-ca.pem", + "'\n", + "ETCDCTL_CERT_FILE='", + "/etc/ssl/certs/etcd-client.pem", + "'\n", + "ETCDCTL_KEY_FILE='", + "/etc/ssl/certs/etcd-client-key.pem", + "'\n", + "ETCDADM_MEMBER_SYSTEMD_SERVICE_NAME='", + "etcd-member", + "'\n", + "ETCDADM_CLUSTER_SNAPSHOTS_S3_URI='", + { "Fn::Join" : [ "", ["s3://", {{$.EtcdSnapshotsS3PathRef}} ]] }, + "'\n", + "ETCDADM_STATE_FILES_DIR='", + "/var/run/coreos/etcdadm", + "'\n", + "ETCDADM_MEMBER_ENV_FILE='", + "/var/run/coreos/etcdadm/etcd-member.env", + "'\n", + "ETCDADM_MEMBER_COUNT='", + "{{$.Etcd.Count}}", + "'\n", + "ETCDADM_MEMBER_INDEX='", + "{{$etcdIndex}}", + "'\n", + "ETCD_VERSION='", + "{{$.Etcd.Version}}", + "'\n" + ]]} + }, + {{ end -}} "/var/run/coreos/etcdadm-environment": { "content": { "Fn::Join" : [ "", [ "ETCD_ENDPOINTS='", @@ -451,7 +505,7 @@ }, "DependsOn": [ {{if $etcdInstance.DependencyExists}}{{$etcdInstance.DependencyRef}},{{end}} - {{if $.Etcd.StackExists}}{{if $etcdIndex}}"{{$.Etcd.LogicalName}}{{sub $etcdIndex 1}}",{{end}}{{end}} + {{if $.StackExists}}{{if $etcdIndex}}"{{$.Etcd.LogicalName}}{{sub $etcdIndex 1}}",{{end}}{{end}} {{if $etcdInstance.EIPManaged}} "{{$etcdInstance.EIPLogicalName}}", {{end}} diff --git a/core/root/cluster.go b/core/root/cluster.go index 0e8ce1440..d203c3cb4 100644 --- a/core/root/cluster.go +++ b/core/root/cluster.go @@ -13,7 +13,6 @@ import ( "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/cloudformation" "github.com/aws/aws-sdk-go/service/cloudwatchlogs" - "github.com/aws/aws-sdk-go/service/ec2" "github.com/aws/aws-sdk-go/service/s3" "github.com/kubernetes-incubator/kube-aws/awsconn" "github.com/kubernetes-incubator/kube-aws/cfnstack" @@ -517,11 +516,6 @@ func (c clusterImpl) update(cfSvc *cloudformation.CloudFormation, targets Operat return "", fmt.Errorf("sorry, you can only update an existing legacy cluster with Kubernetes.Networking.SelfHosting enabled") } logger.Warnf("your cluster does not have a separate etcd stack, this update will spin up a new etcd cluster and attempt to import your existing state.") - c.etcd.EtcdMigrationEnabled = true - // look up existing etcd servers - if c.etcd.EtcdMigrationExistingEndpoints, err = c.lookupExistingEtcdEndpoints(); err != nil { - return "", fmt.Errorf("can't lookup existing etcd endpoints: %v", err) - } } assets, err := c.generateAssets(c.operationTargetsFromUserInput([]OperationTargets{targets})) @@ -553,50 +547,6 @@ func (c clusterImpl) update(cfSvc *cloudformation.CloudFormation, targets Operat return c.stackProvisioner().UpdateStackAtURLAndWait(cfSvc, templateUrl) } -// lookupExistingEtcdEndpoints supports the migration from embedded etcd servers to their own stack -// by looking up the existing etcd servers for a specific cluster and constructing and etcd endpoints -// list as used by tools such as etcdctl and the etcdadm script. -func (c clusterImpl) lookupExistingEtcdEndpoints() (string, error) { - ec2svc := ec2.New(c.session) - clusterTag := fmt.Sprintf("tag:kubernetes.io/cluster/%s", c.controlPlane.ClusterName) - params := &ec2.DescribeInstancesInput{ - Filters: []*ec2.Filter{ - { - Name: aws.String("tag:kube-aws:role"), - Values: []*string{aws.String("etcd")}, - }, - { - Name: aws.String(clusterTag), - Values: []*string{aws.String("owned")}, - }, - { - Name: aws.String("instance-state-name"), - Values: []*string{aws.String("running"), aws.String("pending")}, - }, - }, - } - logger.Debugf("Calling AWS EC2 DescribeInstances ->") - resp, err := ec2svc.DescribeInstances(params) - if err != nil { - return "", fmt.Errorf("can't lookup ec2 instances: %v", err) - } - - logger.Debugf("<- received %d instances from AWS", len(resp.Reservations)) - if len(resp.Reservations) == 0 { - return "", nil - } - // construct comma separated endpoints string - endpoints := []string{} - for _, res := range resp.Reservations { - for _, inst := range res.Instances { - endpoints = append(endpoints, fmt.Sprintf("https://%s:2379", *inst.PrivateDnsName)) - } - } - result := strings.Join(endpoints, ",") - logger.Debugf("Existing etcd endpoints found: %s", result) - return result, nil -} - func (c clusterImpl) ValidateTemplates() error { _, err := c.renderTemplateAsString() if err != nil { diff --git a/core/root/config/config.go b/core/root/config/config.go index f4122f449..a329bd11f 100644 --- a/core/root/config/config.go +++ b/core/root/config/config.go @@ -186,13 +186,14 @@ func failFastWhenUnknownKeysFound(vs []unknownKeyValidation) error { return nil } -func ConfigFromBytesWithStubs(data []byte, plugins []*pluginmodel.Plugin, encryptService controlplane.EncryptService, cf cfnstack.CFInterrogator) (*Config, error) { +func ConfigFromBytesWithStubs(data []byte, plugins []*pluginmodel.Plugin, encryptService controlplane.EncryptService, cf cfnstack.CFInterrogator, ec cfnstack.EC2Interrogator) (*Config, error) { c, err := ConfigFromBytes(data, plugins) if err != nil { return nil, err } c.ProvidedEncryptService = encryptService c.ProvidedCFInterrogator = cf + c.ProvidedEC2Interrogator = ec // Uses the same encrypt service for node pools for consistency for _, p := range c.NodePools { diff --git a/etcdadm/etcdadm b/etcdadm/etcdadm index c52a095ae..97d201e72 100755 --- a/etcdadm/etcdadm +++ b/etcdadm/etcdadm @@ -892,14 +892,19 @@ etcdadm_main() { "save" ) member_save_snapshot ;; + "cluster-is-healthy" ) + cluster_is_healthy + ;; "migration-export-kube-state" ) - export_kubernetes_registry migration-exported-objects.json + export_kubernetes_registry $2 + shift ;; "migration-import-kube-state" ) - import_kubernetes_registry migration-exported-objects.json + import_kubernetes_registry $2 + shift ;; "member-is-leader" ) - exit member_is_leader + member_is_leader ;; "replace" ) member_replace_failed diff --git a/test/helper/cfn.go b/test/helper/cfn.go index 32ba663c1..e9be5c6c8 100644 --- a/test/helper/cfn.go +++ b/test/helper/cfn.go @@ -4,6 +4,7 @@ import ( "fmt" "github.com/aws/aws-sdk-go/service/cloudformation" + "github.com/aws/aws-sdk-go/service/ec2" ) type DummyCloudformationService struct { @@ -26,6 +27,14 @@ func (cf DummyCFInterrogator) ListStackResources(input *cloudformation.ListStack return cf.ListStacksResourcesResult, nil } +type DummyEC2Interrogator struct { + DescribeInstancesOutput *ec2.DescribeInstancesOutput +} + +func (ec DummyEC2Interrogator) DescribeInstances(input *ec2.DescribeInstancesInput) (*ec2.DescribeInstancesOutput, error) { + return ec.DescribeInstancesOutput, nil +} + func (cfSvc *DummyCloudformationService) CreateStack(req *cloudformation.CreateStackInput) (*cloudformation.CreateStackOutput, error) { if len(cfSvc.ExpectedTags) != len(req.Tags) { diff --git a/test/integration/maincluster_test.go b/test/integration/maincluster_test.go index 4e8a784f0..058414035 100644 --- a/test/integration/maincluster_test.go +++ b/test/integration/maincluster_test.go @@ -3489,7 +3489,7 @@ worker: configBytes := validCase.configYaml // TODO Allow including plugins in test data? plugins := []*pluginmodel.Plugin{} - providedConfig, err := config.ConfigFromBytesWithStubs([]byte(configBytes), plugins, helper.DummyEncryptService{}, helper.DummyCFInterrogator{}) + providedConfig, err := config.ConfigFromBytesWithStubs([]byte(configBytes), plugins, helper.DummyEncryptService{}, helper.DummyCFInterrogator{}, helper.DummyEC2Interrogator{}) if err != nil { t.Errorf("failed to parse config %s: %v", configBytes, err) t.FailNow() diff --git a/test/integration/plugin_test.go b/test/integration/plugin_test.go index c37770405..e8059429b 100644 --- a/test/integration/plugin_test.go +++ b/test/integration/plugin_test.go @@ -495,7 +495,7 @@ spec: } configBytes := validCase.clusterYaml - providedConfig, err := config.ConfigFromBytesWithStubs([]byte(configBytes), plugins, helper.DummyEncryptService{}, helper.DummyCFInterrogator{}) + providedConfig, err := config.ConfigFromBytesWithStubs([]byte(configBytes), plugins, helper.DummyEncryptService{}, helper.DummyCFInterrogator{}, helper.DummyEC2Interrogator{}) if err != nil { t.Errorf("failed to parse config %s: %v", configBytes, err) t.FailNow() From d8bc8acbf572238674a92a7a88a543837b13710b Mon Sep 17 00:00:00 2001 From: David McCormick Date: Thu, 28 Jun 2018 15:17:06 +0100 Subject: [PATCH 6/7] save existing state into etcd's stack config and pass to cloud-config render routine. --- core/controlplane/config/stack_config.go | 2 + core/etcd/cluster/cluster.go | 44 +++++----- core/etcd/config/stack_config.go | 83 +++++++++++++++++++ core/etcd/config/templates/cloud-config-etcd | 17 ++-- .../etcd/config/templates/stack-template.json | 4 +- model/existing_etcd.go | 8 ++ 6 files changed, 130 insertions(+), 28 deletions(-) create mode 100644 core/etcd/config/stack_config.go create mode 100644 model/existing_etcd.go diff --git a/core/controlplane/config/stack_config.go b/core/controlplane/config/stack_config.go index 1bc208197..4aef8d283 100644 --- a/core/controlplane/config/stack_config.go +++ b/core/controlplane/config/stack_config.go @@ -6,6 +6,7 @@ import ( "strings" "github.com/kubernetes-incubator/kube-aws/filereader/jsontemplate" + "github.com/kubernetes-incubator/kube-aws/logger" "github.com/kubernetes-incubator/kube-aws/model" ) @@ -63,6 +64,7 @@ func (c *StackConfig) RenderStackTemplateAsBytes() ([]byte, error) { } func (c *StackConfig) RenderStackTemplateAsString() (string, error) { + logger.Debugf("Called RenderStackTemplateAsString on %s", c.StackName) bytes, err := c.RenderStackTemplateAsBytes() return string(bytes), err } diff --git a/core/etcd/cluster/cluster.go b/core/etcd/cluster/cluster.go index 08ef146cd..87c70fe6d 100644 --- a/core/etcd/cluster/cluster.go +++ b/core/etcd/cluster/cluster.go @@ -11,7 +11,8 @@ import ( "github.com/kubernetes-incubator/kube-aws/cfnstack" controlplanecluster "github.com/kubernetes-incubator/kube-aws/core/controlplane/cluster" - "github.com/kubernetes-incubator/kube-aws/core/controlplane/config" + controlplaneconfig "github.com/kubernetes-incubator/kube-aws/core/controlplane/config" + "github.com/kubernetes-incubator/kube-aws/core/etcd/config" "github.com/kubernetes-incubator/kube-aws/logger" "github.com/kubernetes-incubator/kube-aws/model" "github.com/kubernetes-incubator/kube-aws/naming" @@ -24,7 +25,7 @@ var VERSION = "UNKNOWN" const STACK_TEMPLATE_FILENAME = "stack.json" -func newClusterRef(cfg *config.Cluster, session *session.Session) *ClusterRef { +func newClusterRef(cfg *controlplaneconfig.Cluster, session *session.Session) *ClusterRef { return &ClusterRef{ Cluster: cfg, session: session, @@ -32,7 +33,7 @@ func newClusterRef(cfg *config.Cluster, session *session.Session) *ClusterRef { } type ClusterRef struct { - *config.Cluster + *controlplaneconfig.Cluster session *session.Session } @@ -42,18 +43,11 @@ type Cluster struct { assets cfnstack.Assets } -// ExistingState describes the existing state of the etcd cluster -type ExistingState struct { - StackExists bool - EtcdMigrationEnabled bool - EtcdMigrationExistingEndpoints string -} - // An EtcdConfigurationContext contains configuration settings/options mixed with existing state in a way that can be // consumed by stack and cloud-config templates. type EtcdConfigurationContext struct { - *config.Config - ExistingState + *controlplaneconfig.Config + model.EtcdExistingState } type ec2Service interface { @@ -129,8 +123,9 @@ func (c *ClusterRef) validateExistingVPCState(ec2Svc ec2Service) error { return nil } -func NewCluster(cfgRef *config.Cluster, opts config.StackTemplateOptions, plugins []*pluginmodel.Plugin, session *session.Session) (*Cluster, error) { - cfg := &config.Cluster{} +func NewCluster(cfgRef *controlplaneconfig.Cluster, opts controlplaneconfig.StackTemplateOptions, plugins []*pluginmodel.Plugin, session *session.Session) (*Cluster, error) { + logger.Debugf("Called etcd.NewCluster") + cfg := &controlplaneconfig.Cluster{} *cfg = *cfgRef // Import all the managed subnets from the network stack @@ -148,14 +143,16 @@ func NewCluster(cfgRef *config.Cluster, opts config.StackTemplateOptions, plugin clusterRef.KubeAWSVersion = controlplanecluster.VERSION clusterRef.HostOS = cfgRef.HostOS - stackConfig, err := clusterRef.StackConfig("etcd", opts, session, plugins) + cpStackConfig, err := clusterRef.StackConfig("etcd", opts, session, plugins) if err != nil { return nil, err } + // hack - mutate our controlplane generated stack config into our own specific etcd version + etcdStackConfig := config.NewEtcdStackConfig(cpStackConfig) c := &Cluster{ ClusterRef: clusterRef, - StackConfig: stackConfig, + StackConfig: etcdStackConfig, } // Notes: @@ -178,20 +175,21 @@ func NewCluster(cfgRef *config.Cluster, opts config.StackTemplateOptions, plugin c.StackConfig.Etcd.IAMConfig.Policy.Statements = append(c.StackConfig.Etcd.IAMConfig.Policy.Statements, extraEtcd.IAMPolicyStatements...) // create the context that will be used to build the assets (combination of config + existing state) - state, err := c.inspectExistingState() + c.StackConfig.EtcdExistingState, err = c.inspectExistingState() if err != nil { return nil, fmt.Errorf("Could not inspect existing etcd state: %v", err) } ctx := EtcdConfigurationContext{ - Config: c.StackConfig.Config, - ExistingState: state, + Config: c.StackConfig.Config, + EtcdExistingState: c.StackConfig.EtcdExistingState, } + c.assets, err = c.buildAssets(ctx) return c, err } -func (c *Cluster) inspectExistingState() (ExistingState, error) { +func (c *Cluster) inspectExistingState() (model.EtcdExistingState, error) { var err error if c.ProvidedCFInterrogator == nil { c.ProvidedCFInterrogator = cloudformation.New(c.session) @@ -200,7 +198,7 @@ func (c *Cluster) inspectExistingState() (ExistingState, error) { c.ProvidedEC2Interrogator = ec2.New(c.session) } - state := ExistingState{} + state := model.EtcdExistingState{} state.StackExists, err = cfnstack.NestedStackExists(c.ProvidedCFInterrogator, c.ClusterName, naming.FromStackToCfnResource(c.Etcd.LogicalName())) if err != nil { return state, fmt.Errorf("failed to check for existence of etcd cloud-formation stack: %v", err) @@ -230,6 +228,8 @@ func (c Cluster) NestedStackName() string { } func (c *Cluster) buildAssets(ctx EtcdConfigurationContext) (cfnstack.Assets, error) { + logger.Debugf("Called etcd.buildAssets") + logger.Debugf("Context is: %+v", ctx) var err error assets := cfnstack.NewAssetsBuilder(c.StackName, c.StackConfig.ClusterExportedStacksS3URI(), c.StackConfig.Region) @@ -246,8 +246,10 @@ func (c *Cluster) buildAssets(ctx EtcdConfigurationContext) (cfnstack.Assets, er return nil, fmt.Errorf("Error while rendering template: %v", err) } + logger.Debugf("Calling assets.Add on %s", STACK_TEMPLATE_FILENAME) assets.Add(STACK_TEMPLATE_FILENAME, stackTemplate) + logger.Debugf("Calling assets.Build for etcd...") return assets.Build(), nil } diff --git a/core/etcd/config/stack_config.go b/core/etcd/config/stack_config.go new file mode 100644 index 000000000..514715d60 --- /dev/null +++ b/core/etcd/config/stack_config.go @@ -0,0 +1,83 @@ +package config + +import ( + "fmt" + "net/url" + "strings" + + controlplaneconfig "github.com/kubernetes-incubator/kube-aws/core/controlplane/config" + "github.com/kubernetes-incubator/kube-aws/filereader/jsontemplate" + "github.com/kubernetes-incubator/kube-aws/logger" + "github.com/kubernetes-incubator/kube-aws/model" +) + +// StackConfig contains configuration parameters available when rendering CFN stack template from golang text templates +type StackConfig struct { + *controlplaneconfig.Config + StackName string + controlplaneconfig.StackTemplateOptions + UserDataEtcd model.UserData + ExtraCfnResources map[string]interface{} + model.EtcdExistingState +} + +func (c *StackConfig) s3Folders() model.S3Folders { + return model.NewS3Folders(c.S3URI, c.ClusterName) +} + +func (c *StackConfig) ClusterS3URI() string { + return c.s3Folders().Cluster().URI() +} + +func (c *StackConfig) ClusterExportedStacksS3URI() string { + return c.s3Folders().ClusterExportedStacks().URI() +} + +// EtcdSnapshotsS3Path is a pair of a S3 bucket and a key of an S3 object containing an etcd cluster snapshot +func (c StackConfig) EtcdSnapshotsS3PathRef() (string, error) { + s3uri, err := url.Parse(c.ClusterS3URI()) + if err != nil { + return "", fmt.Errorf("Error in EtcdSnapshotsS3PathRef : %v", err) + } + return fmt.Sprintf(`{ "Fn::Join" : [ "", [ "%s%s/instances/", { "Fn::Select" : [ "2", { "Fn::Split": [ "/", { "Ref": "AWS::StackId" }]} ]}, "/etcd-snapshots" ]]}`, s3uri.Host, s3uri.Path), nil +} + +func (c StackConfig) EtcdSnapshotsS3Bucket() (string, error) { + s3uri, err := url.Parse(c.ClusterS3URI()) + if err != nil { + return "", fmt.Errorf("Error in EtcdSnapshotsS3Bucket : %v", err) + } + return s3uri.Host, nil +} + +func (c StackConfig) EtcdSnapshotsS3PrefixRef() (string, error) { + s3uri, err := url.Parse(c.ClusterS3URI()) + if err != nil { + return "", fmt.Errorf("Error in EtcdSnapshotsS3Prefix : %v", err) + } + s3path := fmt.Sprintf(`{ "Fn::Join" : [ "", [ "%s/instances/", { "Fn::Select" : [ "2", { "Fn::Split": [ "/", { "Ref": "AWS::StackId" }]} ]}, "/etcd-snapshots" ]]}`, strings.TrimLeft(s3uri.Path, "/")) + return s3path, nil +} + +func (c *StackConfig) RenderStackTemplateAsBytes() ([]byte, error) { + logger.Debugf("Template Context:-\n%+v\n", c) + return jsontemplate.GetBytes(c.StackTemplateTmplFile, *c, c.PrettyPrint) +} + +func (c *StackConfig) RenderStackTemplateAsString() (string, error) { + logger.Debugf("Called etcd version of RenderStackTemplateAsString on %s", c.StackName) + bytes, err := c.RenderStackTemplateAsBytes() + return string(bytes), err +} + +// NewEtcdStackConfig: Convert a controlplane StackConfig to an Etcd flavour StackConfig +func NewEtcdStackConfig(cp *controlplaneconfig.StackConfig) *StackConfig { + config := new(StackConfig) + config.Config = cp.Config + config.StackName = cp.StackName + config.StackTemplateOptions = cp.StackTemplateOptions + config.UserDataEtcd = cp.UserDataEtcd + config.ExtraCfnResources = cp.ExtraCfnResources + + return config +} diff --git a/core/etcd/config/templates/cloud-config-etcd b/core/etcd/config/templates/cloud-config-etcd index d9ef3a066..3b5cb1185 100644 --- a/core/etcd/config/templates/cloud-config-etcd +++ b/core/etcd/config/templates/cloud-config-etcd @@ -210,6 +210,7 @@ coreos: {{ if .EtcdMigrationEnabled -}} - name: export-existing-etcd-state.service enable: true + command: start content: | [Unit] Description=Exports Kubernetes Values from a remote Etcd cluster @@ -218,12 +219,18 @@ coreos: [Service] Type=simple - Restart=Restart=on-failure + Restart=on-failure RestartSec=5 EnvironmentFile=-/etc/etcd-environment EnvironmentFile=/var/run/coreos/etcdadm-environment-migration ExecStartPre=/opt/bin/etcdadm cluster-is-healthy - ExecStartPre=/bin/bash -c "(/opt/bin/etcdadm member-is-leader && /opt/bin/etcdadm migration-export-kube-state existing-state-file.json && mv /var/run/coreos/etcdadm/snapshots/existing-state-file.json /var/run/coreos/etcdadm/snapshots/exported-state-file.json) || touch /var/run/coreos/etcdadm/snapshots/exported-state-file.json)" + ExecStartPre=/bin/bash -c "\ + if /opt/bin/etcdadm member-is-leader; then \ + /opt/bin/etcdadm migration-export-kube-state existing-state-file.json && \ + mv /var/run/coreos/etcdadm/snapshots/existing-state-file.json /var/run/coreos/etcdadm/snapshots/exported-state-file.json; \ + else \ + touch /var/run/coreos/etcdadm/snapshots/exported-state-file.json; \ + fi" ExecStart=/bin/sleep 3600 TimeoutStartSec=900 - name: import-existing-etcd-state.path @@ -237,7 +244,6 @@ coreos: WantedBy=default.target - name: import-existing-etcd-state.service enable: false - command: stop content: | [Unit] Description=Imports Kubernetes Values from export file into this cluster @@ -247,13 +253,14 @@ coreos: [Service] Type=simple - Restart=Restart=on-failure + Restart=on-failure RestartSec=5 EnvironmentFile=-/etc/etcd-environment EnvironmentFile=/var/run/coreos/etcdadm-environment ExecStartPre=/usr/bin/systemctl is-active export-existing-etcd-state.service ExecStartPre=/opt/bin/etcdadm cluster-is-healthy - ExecStartPre=/bin/bash -c "(/opt/bin/etcdadm member-is-leader && /opt/bin/etcdadm migration-import-kube-state /var/run/coreos/etcdadm/snapshots/exported-state-file.json) || /bin/true)" + ExecStartPre=/bin/bash -c 'if [[ -s "/var/run/coreos/etcdadm/snapshots/exported-state-file.json" ]]; then \ + /opt/bin/etcdadm migration-import-kube-state exported-state-file.json; fi' ExecStart=/bin/sleep 3600 TimeoutStartSec=900 {{ end -}} diff --git a/core/etcd/config/templates/stack-template.json b/core/etcd/config/templates/stack-template.json index 2f3142534..003040ca2 100644 --- a/core/etcd/config/templates/stack-template.json +++ b/core/etcd/config/templates/stack-template.json @@ -388,11 +388,11 @@ "'\n" ]]} }, - {{ if .EtcdMigrationEnabled -}} + {{ if $.EtcdMigrationEnabled -}} "/var/run/coreos/etcdadm-environment-migration": { "content": { "Fn::Join" : [ "", [ "ETCD_ENDPOINTS='", - "{{ .EtcdMigrationExistingEndpoints }}", + "{{ $.EtcdMigrationExistingEndpoints }}", "'\n", "AWS_DEFAULT_REGION='", "{{$.Region}}", diff --git a/model/existing_etcd.go b/model/existing_etcd.go new file mode 100644 index 000000000..61c5112ff --- /dev/null +++ b/model/existing_etcd.go @@ -0,0 +1,8 @@ +package model + +// ExistingState describes the existing state of the etcd cluster +type EtcdExistingState struct { + StackExists bool + EtcdMigrationEnabled bool + EtcdMigrationExistingEndpoints string +} From 8ae91905f7e3c0c266ca14b9d2efb74887eaa23c Mon Sep 17 00:00:00 2001 From: David McCormick Date: Mon, 2 Jul 2018 15:17:09 +0100 Subject: [PATCH 7/7] Resolve tests, 1) package cycle with control plane tests depending on etcdconfig which depends on controlplane config 2) Allow mocks to return nil response and not crash lookupExistingEtcdEndpoints --- core/controlplane/config/user_data_config_test.go | 5 ----- core/etcd/cluster/cluster.go | 3 +++ 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/core/controlplane/config/user_data_config_test.go b/core/controlplane/config/user_data_config_test.go index 28016a704..aaa80f3d1 100644 --- a/core/controlplane/config/user_data_config_test.go +++ b/core/controlplane/config/user_data_config_test.go @@ -9,7 +9,6 @@ import ( "github.com/aws/aws-sdk-go/service/kms" "github.com/coreos/coreos-cloudinit/config/validate" - etcdconfig "github.com/kubernetes-incubator/kube-aws/core/etcd/config" "github.com/kubernetes-incubator/kube-aws/model" "github.com/kubernetes-incubator/kube-aws/test/helper" "github.com/stretchr/testify/assert" @@ -102,10 +101,6 @@ func TestCloudConfigTemplating(t *testing.T) { Name string Template []byte }{ - { - Name: "CloudConfigEtcd", - Template: etcdconfig.CloudConfigEtcd, - }, { Name: "CloudConfigController", Template: CloudConfigController, diff --git a/core/etcd/cluster/cluster.go b/core/etcd/cluster/cluster.go index 87c70fe6d..14abde3c4 100644 --- a/core/etcd/cluster/cluster.go +++ b/core/etcd/cluster/cluster.go @@ -338,6 +338,9 @@ func (c Cluster) lookupExistingEtcdEndpoints() (string, error) { if err != nil { return "", fmt.Errorf("can't lookup ec2 instances: %v", err) } + if resp == nil { + return "", nil + } logger.Debugf("<- received %d instances from AWS", len(resp.Reservations)) if len(resp.Reservations) == 0 {