diff --git a/cfnstack/ec2.go b/cfnstack/ec2.go new file mode 100644 index 000000000..802064439 --- /dev/null +++ b/cfnstack/ec2.go @@ -0,0 +1,9 @@ +package cfnstack + +import ( + "github.com/aws/aws-sdk-go/service/ec2" +) + +type EC2Interrogator interface { + DescribeInstances(input *ec2.DescribeInstancesInput) (*ec2.DescribeInstancesOutput, error) +} diff --git a/core/controlplane/config/config.go b/core/controlplane/config/config.go index 379d4d44d..69b239d72 100644 --- a/core/controlplane/config/config.go +++ b/core/controlplane/config/config.go @@ -576,19 +576,20 @@ type EtcdSettings struct { // Cluster is the container of all the configurable parameters of a kube-aws cluster, customizable via cluster.yaml type Cluster struct { - KubeClusterSettings `yaml:",inline"` - DeploymentSettings `yaml:",inline"` - DefaultWorkerSettings `yaml:",inline"` - ControllerSettings `yaml:",inline"` - EtcdSettings `yaml:",inline"` - AdminAPIEndpointName string `yaml:"adminAPIEndpointName,omitempty"` - RecordSetTTL int `yaml:"recordSetTTL,omitempty"` - TLSCADurationDays int `yaml:"tlsCADurationDays,omitempty"` - TLSCertDurationDays int `yaml:"tlsCertDurationDays,omitempty"` - HostedZoneID string `yaml:"hostedZoneId,omitempty"` - PluginConfigs model.PluginConfigs `yaml:"kubeAwsPlugins,omitempty"` - ProvidedEncryptService EncryptService - ProvidedCFInterrogator cfnstack.CFInterrogator + KubeClusterSettings `yaml:",inline"` + DeploymentSettings `yaml:",inline"` + DefaultWorkerSettings `yaml:",inline"` + ControllerSettings `yaml:",inline"` + EtcdSettings `yaml:",inline"` + AdminAPIEndpointName string `yaml:"adminAPIEndpointName,omitempty"` + RecordSetTTL int `yaml:"recordSetTTL,omitempty"` + TLSCADurationDays int `yaml:"tlsCADurationDays,omitempty"` + TLSCertDurationDays int `yaml:"tlsCertDurationDays,omitempty"` + HostedZoneID string `yaml:"hostedZoneId,omitempty"` + PluginConfigs model.PluginConfigs `yaml:"kubeAwsPlugins,omitempty"` + ProvidedEncryptService EncryptService + ProvidedCFInterrogator cfnstack.CFInterrogator + ProvidedEC2Interrogator cfnstack.EC2Interrogator // SSHAccessAllowedSourceCIDRs is network ranges of sources you'd like SSH accesses to be allowed from, in CIDR notation SSHAccessAllowedSourceCIDRs model.CIDRRanges `yaml:"sshAccessAllowedSourceCIDRs,omitempty"` CustomSettings map[string]interface{} `yaml:"customSettings,omitempty"` diff --git a/core/controlplane/config/stack_config.go b/core/controlplane/config/stack_config.go index 1bc208197..4aef8d283 100644 --- a/core/controlplane/config/stack_config.go +++ b/core/controlplane/config/stack_config.go @@ -6,6 +6,7 @@ import ( "strings" "github.com/kubernetes-incubator/kube-aws/filereader/jsontemplate" + "github.com/kubernetes-incubator/kube-aws/logger" "github.com/kubernetes-incubator/kube-aws/model" ) @@ -63,6 +64,7 @@ func (c *StackConfig) RenderStackTemplateAsBytes() ([]byte, error) { } func (c *StackConfig) RenderStackTemplateAsString() (string, error) { + logger.Debugf("Called RenderStackTemplateAsString on %s", c.StackName) bytes, err := c.RenderStackTemplateAsBytes() return string(bytes), err } diff --git a/core/controlplane/config/templates/stack-template.json b/core/controlplane/config/templates/stack-template.json index fc236893e..3ebbf8b47 100644 --- a/core/controlplane/config/templates/stack-template.json +++ b/core/controlplane/config/templates/stack-template.json @@ -158,6 +158,57 @@ } } }, + "SecurityGroupWorker": { + "Properties": { + "GroupDescription": { + "Ref": "AWS::StackName" + }, + "SecurityGroupEgress": [ + { + "CidrIp": "0.0.0.0/0", + "FromPort": -1, + "IpProtocol": "icmp", + "ToPort": -1 + }, + { + "CidrIp": "0.0.0.0/0", + "FromPort": 0, + "IpProtocol": "tcp", + "ToPort": 65535 + }, + { + "CidrIp": "0.0.0.0/0", + "FromPort": 0, + "IpProtocol": "udp", + "ToPort": 65535 + } + ], + "SecurityGroupIngress": [ + {{ range $_, $r := $.SSHAccessAllowedSourceCIDRs -}} + { + "CidrIp": "{{$r}}", + "FromPort": 22, + "IpProtocol": "tcp", + "ToPort": 22 + }, + {{end -}} + { + "CidrIp": "0.0.0.0/0", + "FromPort": -1, + "IpProtocol": "icmp", + "ToPort": -1 + } + ], + "Tags": [ + { + "Key": "Name", + "Value": "{{$.ClusterName}}-sg-worker" + } + ], + "VpcId": {{.VPCRef}} + }, + "Type": "AWS::EC2::SecurityGroup" + }, {{ if not .Controller.IAMConfig.InstanceProfile.Arn }} "IAMInstanceProfileController": { "Properties": { @@ -617,7 +668,6 @@ {{quote $n}}: {{toJSON $r}} {{end}} }, - "Outputs": { {{ if not .Controller.IAMConfig.InstanceProfile.Arn }} "ControllerIAMRoleArn": { @@ -626,6 +676,11 @@ "Export": { "Name": { "Fn::Sub": "${AWS::StackName}-ControllerIAMRoleArn" } } }, {{end}} + "WorkerSecurityGroup" : { + "Description" : "The security group assigned to worker nodes", + "Value" : { "Ref" : "SecurityGroupWorker" }, + "Export" : { "Name" : {"Fn::Sub": "${AWS::StackName}-WorkerSecurityGroup" }} + }, "StackName": { "Description": "The name of this stack which is used by node pool stacks to import outputs from this stack", "Value": { "Ref": "AWS::StackName" } diff --git a/core/controlplane/config/user_data_config_test.go b/core/controlplane/config/user_data_config_test.go index 28016a704..aaa80f3d1 100644 --- a/core/controlplane/config/user_data_config_test.go +++ b/core/controlplane/config/user_data_config_test.go @@ -9,7 +9,6 @@ import ( "github.com/aws/aws-sdk-go/service/kms" "github.com/coreos/coreos-cloudinit/config/validate" - etcdconfig "github.com/kubernetes-incubator/kube-aws/core/etcd/config" "github.com/kubernetes-incubator/kube-aws/model" "github.com/kubernetes-incubator/kube-aws/test/helper" "github.com/stretchr/testify/assert" @@ -102,10 +101,6 @@ func TestCloudConfigTemplating(t *testing.T) { Name string Template []byte }{ - { - Name: "CloudConfigEtcd", - Template: etcdconfig.CloudConfigEtcd, - }, { Name: "CloudConfigController", Template: CloudConfigController, diff --git a/core/etcd/cluster/cluster.go b/core/etcd/cluster/cluster.go index 2755fb304..14abde3c4 100644 --- a/core/etcd/cluster/cluster.go +++ b/core/etcd/cluster/cluster.go @@ -2,6 +2,7 @@ package cluster import ( "fmt" + "strings" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/session" @@ -10,7 +11,8 @@ import ( "github.com/kubernetes-incubator/kube-aws/cfnstack" controlplanecluster "github.com/kubernetes-incubator/kube-aws/core/controlplane/cluster" - "github.com/kubernetes-incubator/kube-aws/core/controlplane/config" + controlplaneconfig "github.com/kubernetes-incubator/kube-aws/core/controlplane/config" + "github.com/kubernetes-incubator/kube-aws/core/etcd/config" "github.com/kubernetes-incubator/kube-aws/logger" "github.com/kubernetes-incubator/kube-aws/model" "github.com/kubernetes-incubator/kube-aws/naming" @@ -23,7 +25,7 @@ var VERSION = "UNKNOWN" const STACK_TEMPLATE_FILENAME = "stack.json" -func newClusterRef(cfg *config.Cluster, session *session.Session) *ClusterRef { +func newClusterRef(cfg *controlplaneconfig.Cluster, session *session.Session) *ClusterRef { return &ClusterRef{ Cluster: cfg, session: session, @@ -31,7 +33,7 @@ func newClusterRef(cfg *config.Cluster, session *session.Session) *ClusterRef { } type ClusterRef struct { - *config.Cluster + *controlplaneconfig.Cluster session *session.Session } @@ -41,6 +43,13 @@ type Cluster struct { assets cfnstack.Assets } +// An EtcdConfigurationContext contains configuration settings/options mixed with existing state in a way that can be +// consumed by stack and cloud-config templates. +type EtcdConfigurationContext struct { + *controlplaneconfig.Config + model.EtcdExistingState +} + type ec2Service interface { CreateVolume(*ec2.CreateVolumeInput) (*ec2.Volume, error) DescribeVpcs(*ec2.DescribeVpcsInput) (*ec2.DescribeVpcsOutput, error) @@ -114,12 +123,10 @@ func (c *ClusterRef) validateExistingVPCState(ec2Svc ec2Service) error { return nil } -func NewCluster(cfgRef *config.Cluster, opts config.StackTemplateOptions, plugins []*pluginmodel.Plugin, session *session.Session) (*Cluster, error) { - cfg := &config.Cluster{} +func NewCluster(cfgRef *controlplaneconfig.Cluster, opts controlplaneconfig.StackTemplateOptions, plugins []*pluginmodel.Plugin, session *session.Session) (*Cluster, error) { + logger.Debugf("Called etcd.NewCluster") + cfg := &controlplaneconfig.Cluster{} *cfg = *cfgRef - if cfg.ProvidedCFInterrogator == nil { - cfg.ProvidedCFInterrogator = cloudformation.New(session) - } // Import all the managed subnets from the network stack var err error @@ -136,14 +143,16 @@ func NewCluster(cfgRef *config.Cluster, opts config.StackTemplateOptions, plugin clusterRef.KubeAWSVersion = controlplanecluster.VERSION clusterRef.HostOS = cfgRef.HostOS - stackConfig, err := clusterRef.StackConfig("etcd", opts, session, plugins) + cpStackConfig, err := clusterRef.StackConfig("etcd", opts, session, plugins) if err != nil { return nil, err } + // hack - mutate our controlplane generated stack config into our own specific etcd version + etcdStackConfig := config.NewEtcdStackConfig(cpStackConfig) c := &Cluster{ ClusterRef: clusterRef, - StackConfig: stackConfig, + StackConfig: etcdStackConfig, } // Notes: @@ -164,16 +173,49 @@ func NewCluster(cfgRef *config.Cluster, opts config.StackTemplateOptions, plugin c.StackConfig.Etcd.CustomSystemdUnits = append(c.StackConfig.Etcd.CustomSystemdUnits, extraEtcd.SystemdUnits...) c.StackConfig.Etcd.CustomFiles = append(c.StackConfig.Etcd.CustomFiles, extraEtcd.Files...) c.StackConfig.Etcd.IAMConfig.Policy.Statements = append(c.StackConfig.Etcd.IAMConfig.Policy.Statements, extraEtcd.IAMPolicyStatements...) - c.StackConfig.Etcd.StackExists, err = cfnstack.NestedStackExists(cfg.ProvidedCFInterrogator, c.ClusterName, naming.FromStackToCfnResource(c.Etcd.LogicalName())) + + // create the context that will be used to build the assets (combination of config + existing state) + c.StackConfig.EtcdExistingState, err = c.inspectExistingState() if err != nil { - return nil, fmt.Errorf("failed to check for existence of etcd cloud-formation stack: %v", err) + return nil, fmt.Errorf("Could not inspect existing etcd state: %v", err) + } + ctx := EtcdConfigurationContext{ + Config: c.StackConfig.Config, + EtcdExistingState: c.StackConfig.EtcdExistingState, } - c.assets, err = c.buildAssets() + c.assets, err = c.buildAssets(ctx) return c, err } +func (c *Cluster) inspectExistingState() (model.EtcdExistingState, error) { + var err error + if c.ProvidedCFInterrogator == nil { + c.ProvidedCFInterrogator = cloudformation.New(c.session) + } + if c.ProvidedEC2Interrogator == nil { + c.ProvidedEC2Interrogator = ec2.New(c.session) + } + + state := model.EtcdExistingState{} + state.StackExists, err = cfnstack.NestedStackExists(c.ProvidedCFInterrogator, c.ClusterName, naming.FromStackToCfnResource(c.Etcd.LogicalName())) + if err != nil { + return state, fmt.Errorf("failed to check for existence of etcd cloud-formation stack: %v", err) + } + // when the Etcd stack does not exist but we have existing etcd instances then we need to enable the + // etcd migration units. + if !state.StackExists { + if state.EtcdMigrationExistingEndpoints, err = c.lookupExistingEtcdEndpoints(); err != nil { + return state, fmt.Errorf("failed to lookup existing etcd endpoints: %v", err) + } + if state.EtcdMigrationExistingEndpoints != "" { + state.EtcdMigrationEnabled = true + } + } + return state, nil +} + func (c *Cluster) Assets() cfnstack.Assets { return c.assets } @@ -185,11 +227,13 @@ func (c Cluster) NestedStackName() string { return naming.FromStackToCfnResource(c.StackName) } -func (c *Cluster) buildAssets() (cfnstack.Assets, error) { +func (c *Cluster) buildAssets(ctx EtcdConfigurationContext) (cfnstack.Assets, error) { + logger.Debugf("Called etcd.buildAssets") + logger.Debugf("Context is: %+v", ctx) var err error assets := cfnstack.NewAssetsBuilder(c.StackName, c.StackConfig.ClusterExportedStacksS3URI(), c.StackConfig.Region) - if c.StackConfig.UserDataEtcd, err = model.NewUserData(c.StackTemplateOptions.EtcdTmplFile, c.StackConfig.Config); err != nil { + if c.StackConfig.UserDataEtcd, err = model.NewUserData(c.StackTemplateOptions.EtcdTmplFile, ctx); err != nil { return nil, fmt.Errorf("failed to render etcd cloud config: %v", err) } @@ -202,8 +246,10 @@ func (c *Cluster) buildAssets() (cfnstack.Assets, error) { return nil, fmt.Errorf("Error while rendering template: %v", err) } + logger.Debugf("Calling assets.Add on %s", STACK_TEMPLATE_FILENAME) assets.Add(STACK_TEMPLATE_FILENAME, stackTemplate) + logger.Debugf("Calling assets.Build for etcd...") return assets.Build(), nil } @@ -265,3 +311,49 @@ func (c *Cluster) String() string { func (c *ClusterRef) Destroy() error { return cfnstack.NewDestroyer("etcd", c.session, c.CloudFormation.RoleARN).Destroy() } + +// lookupExistingEtcdEndpoints supports the migration from embedded etcd servers to their own stack +// by looking up the existing etcd servers for a specific cluster and constructing and etcd endpoints +// list as used by tools such as etcdctl and the etcdadm script. +func (c Cluster) lookupExistingEtcdEndpoints() (string, error) { + clusterTag := fmt.Sprintf("tag:kubernetes.io/cluster/%s", c.ClusterName) + params := &ec2.DescribeInstancesInput{ + Filters: []*ec2.Filter{ + { + Name: aws.String("tag:kube-aws:role"), + Values: []*string{aws.String("etcd")}, + }, + { + Name: aws.String(clusterTag), + Values: []*string{aws.String("owned")}, + }, + { + Name: aws.String("instance-state-name"), + Values: []*string{aws.String("running"), aws.String("pending")}, + }, + }, + } + logger.Debugf("Calling AWS EC2 DescribeInstances ->") + resp, err := c.ProvidedEC2Interrogator.DescribeInstances(params) + if err != nil { + return "", fmt.Errorf("can't lookup ec2 instances: %v", err) + } + if resp == nil { + return "", nil + } + + logger.Debugf("<- received %d instances from AWS", len(resp.Reservations)) + if len(resp.Reservations) == 0 { + return "", nil + } + // construct comma separated endpoints string + endpoints := []string{} + for _, res := range resp.Reservations { + for _, inst := range res.Instances { + endpoints = append(endpoints, fmt.Sprintf("https://%s:2379", *inst.PrivateDnsName)) + } + } + result := strings.Join(endpoints, ",") + logger.Debugf("Existing etcd endpoints found: %s", result) + return result, nil +} diff --git a/core/etcd/config/stack_config.go b/core/etcd/config/stack_config.go new file mode 100644 index 000000000..514715d60 --- /dev/null +++ b/core/etcd/config/stack_config.go @@ -0,0 +1,83 @@ +package config + +import ( + "fmt" + "net/url" + "strings" + + controlplaneconfig "github.com/kubernetes-incubator/kube-aws/core/controlplane/config" + "github.com/kubernetes-incubator/kube-aws/filereader/jsontemplate" + "github.com/kubernetes-incubator/kube-aws/logger" + "github.com/kubernetes-incubator/kube-aws/model" +) + +// StackConfig contains configuration parameters available when rendering CFN stack template from golang text templates +type StackConfig struct { + *controlplaneconfig.Config + StackName string + controlplaneconfig.StackTemplateOptions + UserDataEtcd model.UserData + ExtraCfnResources map[string]interface{} + model.EtcdExistingState +} + +func (c *StackConfig) s3Folders() model.S3Folders { + return model.NewS3Folders(c.S3URI, c.ClusterName) +} + +func (c *StackConfig) ClusterS3URI() string { + return c.s3Folders().Cluster().URI() +} + +func (c *StackConfig) ClusterExportedStacksS3URI() string { + return c.s3Folders().ClusterExportedStacks().URI() +} + +// EtcdSnapshotsS3Path is a pair of a S3 bucket and a key of an S3 object containing an etcd cluster snapshot +func (c StackConfig) EtcdSnapshotsS3PathRef() (string, error) { + s3uri, err := url.Parse(c.ClusterS3URI()) + if err != nil { + return "", fmt.Errorf("Error in EtcdSnapshotsS3PathRef : %v", err) + } + return fmt.Sprintf(`{ "Fn::Join" : [ "", [ "%s%s/instances/", { "Fn::Select" : [ "2", { "Fn::Split": [ "/", { "Ref": "AWS::StackId" }]} ]}, "/etcd-snapshots" ]]}`, s3uri.Host, s3uri.Path), nil +} + +func (c StackConfig) EtcdSnapshotsS3Bucket() (string, error) { + s3uri, err := url.Parse(c.ClusterS3URI()) + if err != nil { + return "", fmt.Errorf("Error in EtcdSnapshotsS3Bucket : %v", err) + } + return s3uri.Host, nil +} + +func (c StackConfig) EtcdSnapshotsS3PrefixRef() (string, error) { + s3uri, err := url.Parse(c.ClusterS3URI()) + if err != nil { + return "", fmt.Errorf("Error in EtcdSnapshotsS3Prefix : %v", err) + } + s3path := fmt.Sprintf(`{ "Fn::Join" : [ "", [ "%s/instances/", { "Fn::Select" : [ "2", { "Fn::Split": [ "/", { "Ref": "AWS::StackId" }]} ]}, "/etcd-snapshots" ]]}`, strings.TrimLeft(s3uri.Path, "/")) + return s3path, nil +} + +func (c *StackConfig) RenderStackTemplateAsBytes() ([]byte, error) { + logger.Debugf("Template Context:-\n%+v\n", c) + return jsontemplate.GetBytes(c.StackTemplateTmplFile, *c, c.PrettyPrint) +} + +func (c *StackConfig) RenderStackTemplateAsString() (string, error) { + logger.Debugf("Called etcd version of RenderStackTemplateAsString on %s", c.StackName) + bytes, err := c.RenderStackTemplateAsBytes() + return string(bytes), err +} + +// NewEtcdStackConfig: Convert a controlplane StackConfig to an Etcd flavour StackConfig +func NewEtcdStackConfig(cp *controlplaneconfig.StackConfig) *StackConfig { + config := new(StackConfig) + config.Config = cp.Config + config.StackName = cp.StackName + config.StackTemplateOptions = cp.StackTemplateOptions + config.UserDataEtcd = cp.UserDataEtcd + config.ExtraCfnResources = cp.ExtraCfnResources + + return config +} diff --git a/core/etcd/config/templates/cloud-config-etcd b/core/etcd/config/templates/cloud-config-etcd index 83449c455..3b5cb1185 100644 --- a/core/etcd/config/templates/cloud-config-etcd +++ b/core/etcd/config/templates/cloud-config-etcd @@ -207,6 +207,63 @@ coreos: [Install] WantedBy=multi-user.target {{end}} + {{ if .EtcdMigrationEnabled -}} + - name: export-existing-etcd-state.service + enable: true + command: start + content: | + [Unit] + Description=Exports Kubernetes Values from a remote Etcd cluster + Wants=cfn-etcd-environment.service + After=etcd-member.service + + [Service] + Type=simple + Restart=on-failure + RestartSec=5 + EnvironmentFile=-/etc/etcd-environment + EnvironmentFile=/var/run/coreos/etcdadm-environment-migration + ExecStartPre=/opt/bin/etcdadm cluster-is-healthy + ExecStartPre=/bin/bash -c "\ + if /opt/bin/etcdadm member-is-leader; then \ + /opt/bin/etcdadm migration-export-kube-state existing-state-file.json && \ + mv /var/run/coreos/etcdadm/snapshots/existing-state-file.json /var/run/coreos/etcdadm/snapshots/exported-state-file.json; \ + else \ + touch /var/run/coreos/etcdadm/snapshots/exported-state-file.json; \ + fi" + ExecStart=/bin/sleep 3600 + TimeoutStartSec=900 + - name: import-existing-etcd-state.path + enable: true + command: start + content: | + [Path] + PathExists=/var/run/coreos/etcdadm/snapshots/exported-state-file.json + + [Install] + WantedBy=default.target + - name: import-existing-etcd-state.service + enable: false + content: | + [Unit] + Description=Imports Kubernetes Values from export file into this cluster + Wants=cfn-etcd-environment.service + After=etcd-member.service + After=export-existing-etcd-state.service + + [Service] + Type=simple + Restart=on-failure + RestartSec=5 + EnvironmentFile=-/etc/etcd-environment + EnvironmentFile=/var/run/coreos/etcdadm-environment + ExecStartPre=/usr/bin/systemctl is-active export-existing-etcd-state.service + ExecStartPre=/opt/bin/etcdadm cluster-is-healthy + ExecStartPre=/bin/bash -c 'if [[ -s "/var/run/coreos/etcdadm/snapshots/exported-state-file.json" ]]; then \ + /opt/bin/etcdadm migration-import-kube-state exported-state-file.json; fi' + ExecStart=/bin/sleep 3600 + TimeoutStartSec=900 + {{ end -}} {{if .Etcd.DisasterRecovery.SupportsEtcdVersion .Etcd.Version -}} - name: etcdadm-reconfigure.service enable: true @@ -433,6 +490,9 @@ coreos: [Unit] Wants={{.Etcd.SystemdUnitName}} After={{.Etcd.SystemdUnitName}} + {{ if .EtcdMigrationEnabled -}} + After=import-existing-etcd-state.service + {{ end -}} [Service] Type=simple @@ -441,6 +501,9 @@ coreos: EnvironmentFile={{.EtcdNodeEnvFileName}} ExecStartPre=/usr/bin/systemctl is-active {{.Etcd.SystemdUnitName}} + {{ if .EtcdMigrationEnabled -}} + ExecStartPre=/usr/bin/systemctl is-active import-existing-etcd-state.service + {{ end -}} ExecStartPre=/usr/bin/rkt fetch {{.AWSCliImage.Options}}{{.AWSCliImage.RktRepo}} ExecStart=-/opt/bin/cfn-signal {{end}} diff --git a/core/etcd/config/templates/stack-template.json b/core/etcd/config/templates/stack-template.json index 8a9fbc513..003040ca2 100644 --- a/core/etcd/config/templates/stack-template.json +++ b/core/etcd/config/templates/stack-template.json @@ -388,6 +388,60 @@ "'\n" ]]} }, + {{ if $.EtcdMigrationEnabled -}} + "/var/run/coreos/etcdadm-environment-migration": { + "content": { "Fn::Join" : [ "", [ + "ETCD_ENDPOINTS='", + "{{ $.EtcdMigrationExistingEndpoints }}", + "'\n", + "AWS_DEFAULT_REGION='", + "{{$.Region}}", + "'\n", + "KUBERNETES_CLUSTER='", + "{{$.ClusterName}}", + "'\n", + "ETCDCTL_CACERT='", + "/etc/ssl/certs/etcd-trusted-ca.pem", + "'\n", + "ETCDCTL_CERT='", + "/etc/ssl/certs/etcd-client.pem", + "'\n", + "ETCDCTL_KEY='", + "/etc/ssl/certs/etcd-client-key.pem", + "'\n", + "ETCDCTL_CA_FILE='", + "/etc/ssl/certs/etcd-trusted-ca.pem", + "'\n", + "ETCDCTL_CERT_FILE='", + "/etc/ssl/certs/etcd-client.pem", + "'\n", + "ETCDCTL_KEY_FILE='", + "/etc/ssl/certs/etcd-client-key.pem", + "'\n", + "ETCDADM_MEMBER_SYSTEMD_SERVICE_NAME='", + "etcd-member", + "'\n", + "ETCDADM_CLUSTER_SNAPSHOTS_S3_URI='", + { "Fn::Join" : [ "", ["s3://", {{$.EtcdSnapshotsS3PathRef}} ]] }, + "'\n", + "ETCDADM_STATE_FILES_DIR='", + "/var/run/coreos/etcdadm", + "'\n", + "ETCDADM_MEMBER_ENV_FILE='", + "/var/run/coreos/etcdadm/etcd-member.env", + "'\n", + "ETCDADM_MEMBER_COUNT='", + "{{$.Etcd.Count}}", + "'\n", + "ETCDADM_MEMBER_INDEX='", + "{{$etcdIndex}}", + "'\n", + "ETCD_VERSION='", + "{{$.Etcd.Version}}", + "'\n" + ]]} + }, + {{ end -}} "/var/run/coreos/etcdadm-environment": { "content": { "Fn::Join" : [ "", [ "ETCD_ENDPOINTS='", @@ -451,7 +505,7 @@ }, "DependsOn": [ {{if $etcdInstance.DependencyExists}}{{$etcdInstance.DependencyRef}},{{end}} - {{if $.Etcd.StackExists}}{{if $etcdIndex}}"{{$.Etcd.LogicalName}}{{sub $etcdIndex 1}}",{{end}}{{end}} + {{if $.StackExists}}{{if $etcdIndex}}"{{$.Etcd.LogicalName}}{{sub $etcdIndex 1}}",{{end}}{{end}} {{if $etcdInstance.EIPManaged}} "{{$etcdInstance.EIPLogicalName}}", {{end}} diff --git a/core/root/cluster.go b/core/root/cluster.go index 586c66e00..d203c3cb4 100644 --- a/core/root/cluster.go +++ b/core/root/cluster.go @@ -504,17 +504,18 @@ func (c clusterImpl) LegacyUpdate(targets OperationTargets) (string, error) { func (c clusterImpl) update(cfSvc *cloudformation.CloudFormation, targets OperationTargets) (string, error) { + // Look at existing state of cloud formation and stacks to determine if we need to take special measures in migrating our etcd + // clusters from the control plane stack to their own Etcd stack. exists, err := cfnstack.NestedStackExists(cfSvc, c.controlPlane.ClusterName, naming.FromStackToCfnResource(c.etcd.Etcd.LogicalName())) if err != nil { logger.Errorf("please check your AWS credentials/permissions") return "", fmt.Errorf("can't lookup AWS CloudFormation stacks: %s", err) } - // fail fast if it looks like we are trying to update a legacy cluster. if !exists { - logger.Errorf("the %s stack must exist in order to be able to update your cluster.", naming.FromStackToCfnResource(c.etcd.Etcd.LogicalName())) - logger.Error("we're sorry, but kube-aws can not presently upgrade your cluster to the new release with a separate ETCD Cloudformation stack.") - logger.Error("please consider backing up, destroying and recreating to upgrade.") - return "", fmt.Errorf("update not supported for clusters without a separate etcd cloudformation stack") + if !c.controlPlane.Kubernetes.Networking.SelfHosting.Enabled { + return "", fmt.Errorf("sorry, you can only update an existing legacy cluster with Kubernetes.Networking.SelfHosting enabled") + } + logger.Warnf("your cluster does not have a separate etcd stack, this update will spin up a new etcd cluster and attempt to import your existing state.") } assets, err := c.generateAssets(c.operationTargetsFromUserInput([]OperationTargets{targets})) diff --git a/core/root/config/config.go b/core/root/config/config.go index f4122f449..a329bd11f 100644 --- a/core/root/config/config.go +++ b/core/root/config/config.go @@ -186,13 +186,14 @@ func failFastWhenUnknownKeysFound(vs []unknownKeyValidation) error { return nil } -func ConfigFromBytesWithStubs(data []byte, plugins []*pluginmodel.Plugin, encryptService controlplane.EncryptService, cf cfnstack.CFInterrogator) (*Config, error) { +func ConfigFromBytesWithStubs(data []byte, plugins []*pluginmodel.Plugin, encryptService controlplane.EncryptService, cf cfnstack.CFInterrogator, ec cfnstack.EC2Interrogator) (*Config, error) { c, err := ConfigFromBytes(data, plugins) if err != nil { return nil, err } c.ProvidedEncryptService = encryptService c.ProvidedCFInterrogator = cf + c.ProvidedEC2Interrogator = ec // Uses the same encrypt service for node pools for consistency for _, p := range c.NodePools { diff --git a/etcdadm/etcdadm b/etcdadm/etcdadm index a1f863730..97d201e72 100755 --- a/etcdadm/etcdadm +++ b/etcdadm/etcdadm @@ -775,6 +775,10 @@ member_data_dir() { } member_etcdctl() { + raw_etcdctl etcdctl --endpoints "$(member_client_url)" ${*} +} + +raw_etcdctl() { local uuid_file local docker_opts=(--rm) @@ -796,7 +800,7 @@ member_etcdctl() { --volume="$(member_data_dir)":/var/lib/etcd \ --volume "$(member_snapshots_dir_name)":"$(member_host_snapshots_dir_path)" \ quay.io/coreos/etcd:v$etcd_version \ - etcdctl --endpoints "$(member_client_url)" ${*} + ${*} } member_is_healthy() { @@ -858,6 +862,29 @@ member_validate() { fi } +export_kubernetes_registry() { + local file_name=$1 + echo "Exporting kubernetes objects to $(member_host_snapshots_dir_path)/$file_name" + if cluster_is_healthy; then + (member_etcdctl get '/registry' --prefix --write-out="json") | jq -r '.kvs[] | "\(.key):\(.value)"' >"$(member_host_snapshots_dir_path)/$file_name" + else + _panic 'cluster is not healthy, can not export keys from an unhealthy cluster' + fi +} + +import_kubernetes_registry() { + local file_name=$1 + echo "Importing kubernetes objects to $(member_host_snapshots_dir_path)/$file_name" + if [[ ! -f "$(member_host_snapshots_dir_path)/$file_name" ]]; then + _panic "Can't import objects, file not found: $(member_host_snapshots_dir_path)/$file_name" + fi + if cluster_is_healthy; then + raw_etcdctl /bin/sh -c 'while read -u 10 l; do k=$(echo "${l%%:*}" | base64 -d); v=${l##*:}; echo "saving $k"; echo -e "$v" | base64 -d | etcdctl --endpoints='$(member_client_url)' put "$k"; done 10<'$(member_snapshots_dir_name)/$file_name + else + _panic 'cluster is not healthy, can not import keys to an unhealthy cluster' + fi +} + etcdadm_main() { local cmd=$1 @@ -865,6 +892,20 @@ etcdadm_main() { "save" ) member_save_snapshot ;; + "cluster-is-healthy" ) + cluster_is_healthy + ;; + "migration-export-kube-state" ) + export_kubernetes_registry $2 + shift + ;; + "migration-import-kube-state" ) + import_kubernetes_registry $2 + shift + ;; + "member-is-leader" ) + member_is_leader + ;; "replace" ) member_replace_failed ;; diff --git a/model/existing_etcd.go b/model/existing_etcd.go new file mode 100644 index 000000000..61c5112ff --- /dev/null +++ b/model/existing_etcd.go @@ -0,0 +1,8 @@ +package model + +// ExistingState describes the existing state of the etcd cluster +type EtcdExistingState struct { + StackExists bool + EtcdMigrationEnabled bool + EtcdMigrationExistingEndpoints string +} diff --git a/test/helper/cfn.go b/test/helper/cfn.go index 32ba663c1..e9be5c6c8 100644 --- a/test/helper/cfn.go +++ b/test/helper/cfn.go @@ -4,6 +4,7 @@ import ( "fmt" "github.com/aws/aws-sdk-go/service/cloudformation" + "github.com/aws/aws-sdk-go/service/ec2" ) type DummyCloudformationService struct { @@ -26,6 +27,14 @@ func (cf DummyCFInterrogator) ListStackResources(input *cloudformation.ListStack return cf.ListStacksResourcesResult, nil } +type DummyEC2Interrogator struct { + DescribeInstancesOutput *ec2.DescribeInstancesOutput +} + +func (ec DummyEC2Interrogator) DescribeInstances(input *ec2.DescribeInstancesInput) (*ec2.DescribeInstancesOutput, error) { + return ec.DescribeInstancesOutput, nil +} + func (cfSvc *DummyCloudformationService) CreateStack(req *cloudformation.CreateStackInput) (*cloudformation.CreateStackOutput, error) { if len(cfSvc.ExpectedTags) != len(req.Tags) { diff --git a/test/integration/maincluster_test.go b/test/integration/maincluster_test.go index 4e8a784f0..058414035 100644 --- a/test/integration/maincluster_test.go +++ b/test/integration/maincluster_test.go @@ -3489,7 +3489,7 @@ worker: configBytes := validCase.configYaml // TODO Allow including plugins in test data? plugins := []*pluginmodel.Plugin{} - providedConfig, err := config.ConfigFromBytesWithStubs([]byte(configBytes), plugins, helper.DummyEncryptService{}, helper.DummyCFInterrogator{}) + providedConfig, err := config.ConfigFromBytesWithStubs([]byte(configBytes), plugins, helper.DummyEncryptService{}, helper.DummyCFInterrogator{}, helper.DummyEC2Interrogator{}) if err != nil { t.Errorf("failed to parse config %s: %v", configBytes, err) t.FailNow() diff --git a/test/integration/plugin_test.go b/test/integration/plugin_test.go index c37770405..e8059429b 100644 --- a/test/integration/plugin_test.go +++ b/test/integration/plugin_test.go @@ -495,7 +495,7 @@ spec: } configBytes := validCase.clusterYaml - providedConfig, err := config.ConfigFromBytesWithStubs([]byte(configBytes), plugins, helper.DummyEncryptService{}, helper.DummyCFInterrogator{}) + providedConfig, err := config.ConfigFromBytesWithStubs([]byte(configBytes), plugins, helper.DummyEncryptService{}, helper.DummyCFInterrogator{}, helper.DummyEC2Interrogator{}) if err != nil { t.Errorf("failed to parse config %s: %v", configBytes, err) t.FailNow()