Skip to content
This repository has been archived by the owner on Sep 30, 2020. It is now read-only.

Commit

Permalink
0.11.x migration from existing clusters without losing state (#1380)
Browse files Browse the repository at this point in the history
Changelog:

* Remove the etcd-environment metadata section on the nodepools if Kubernetes.Networking.SelfHosting is Enabled.
This is to break the dependency that exists on the nodepool stacks on etcd stack resources.

* Disable the fail-fast on updating legacy kube-aws clusters when the self-hosted network is enabled

* Remove the etcd-environment metadata section on the nodepools if Kubernetes.Networking.SelfHosting is Enabled. (#1367)

This is to break the dependency that exists on the nodepool stacks on etcd stack resources.

Ref #1370

* Modified etcdadm with special export and import commands that will copy state from existing etcd over to the new ones during a migration.

* Added systemd migration units

* Save existing state into etcd's stack config and pass to cloud-config render routine.

* Resolve tests, 1) package cycle with control plane tests depending on etcdconfig which depends on controlplane config 2) Allow mocks to return nil response and not crash lookupExistingEtcdEndpoints
  • Loading branch information
davidmccormick authored and mumoshu committed Jul 7, 2018
1 parent 1ead69a commit cdceab6
Show file tree
Hide file tree
Showing 16 changed files with 458 additions and 44 deletions.
9 changes: 9 additions & 0 deletions cfnstack/ec2.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package cfnstack

import (
"github.com/aws/aws-sdk-go/service/ec2"
)

type EC2Interrogator interface {
DescribeInstances(input *ec2.DescribeInstancesInput) (*ec2.DescribeInstancesOutput, error)
}
27 changes: 14 additions & 13 deletions core/controlplane/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -576,19 +576,20 @@ type EtcdSettings struct {

// Cluster is the container of all the configurable parameters of a kube-aws cluster, customizable via cluster.yaml
type Cluster struct {
KubeClusterSettings `yaml:",inline"`
DeploymentSettings `yaml:",inline"`
DefaultWorkerSettings `yaml:",inline"`
ControllerSettings `yaml:",inline"`
EtcdSettings `yaml:",inline"`
AdminAPIEndpointName string `yaml:"adminAPIEndpointName,omitempty"`
RecordSetTTL int `yaml:"recordSetTTL,omitempty"`
TLSCADurationDays int `yaml:"tlsCADurationDays,omitempty"`
TLSCertDurationDays int `yaml:"tlsCertDurationDays,omitempty"`
HostedZoneID string `yaml:"hostedZoneId,omitempty"`
PluginConfigs model.PluginConfigs `yaml:"kubeAwsPlugins,omitempty"`
ProvidedEncryptService EncryptService
ProvidedCFInterrogator cfnstack.CFInterrogator
KubeClusterSettings `yaml:",inline"`
DeploymentSettings `yaml:",inline"`
DefaultWorkerSettings `yaml:",inline"`
ControllerSettings `yaml:",inline"`
EtcdSettings `yaml:",inline"`
AdminAPIEndpointName string `yaml:"adminAPIEndpointName,omitempty"`
RecordSetTTL int `yaml:"recordSetTTL,omitempty"`
TLSCADurationDays int `yaml:"tlsCADurationDays,omitempty"`
TLSCertDurationDays int `yaml:"tlsCertDurationDays,omitempty"`
HostedZoneID string `yaml:"hostedZoneId,omitempty"`
PluginConfigs model.PluginConfigs `yaml:"kubeAwsPlugins,omitempty"`
ProvidedEncryptService EncryptService
ProvidedCFInterrogator cfnstack.CFInterrogator
ProvidedEC2Interrogator cfnstack.EC2Interrogator
// SSHAccessAllowedSourceCIDRs is network ranges of sources you'd like SSH accesses to be allowed from, in CIDR notation
SSHAccessAllowedSourceCIDRs model.CIDRRanges `yaml:"sshAccessAllowedSourceCIDRs,omitempty"`
CustomSettings map[string]interface{} `yaml:"customSettings,omitempty"`
Expand Down
2 changes: 2 additions & 0 deletions core/controlplane/config/stack_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"strings"

"github.com/kubernetes-incubator/kube-aws/filereader/jsontemplate"
"github.com/kubernetes-incubator/kube-aws/logger"
"github.com/kubernetes-incubator/kube-aws/model"
)

Expand Down Expand Up @@ -63,6 +64,7 @@ func (c *StackConfig) RenderStackTemplateAsBytes() ([]byte, error) {
}

func (c *StackConfig) RenderStackTemplateAsString() (string, error) {
logger.Debugf("Called RenderStackTemplateAsString on %s", c.StackName)
bytes, err := c.RenderStackTemplateAsBytes()
return string(bytes), err
}
57 changes: 56 additions & 1 deletion core/controlplane/config/templates/stack-template.json
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,57 @@
}
}
},
"SecurityGroupWorker": {
"Properties": {
"GroupDescription": {
"Ref": "AWS::StackName"
},
"SecurityGroupEgress": [
{
"CidrIp": "0.0.0.0/0",
"FromPort": -1,
"IpProtocol": "icmp",
"ToPort": -1
},
{
"CidrIp": "0.0.0.0/0",
"FromPort": 0,
"IpProtocol": "tcp",
"ToPort": 65535
},
{
"CidrIp": "0.0.0.0/0",
"FromPort": 0,
"IpProtocol": "udp",
"ToPort": 65535
}
],
"SecurityGroupIngress": [
{{ range $_, $r := $.SSHAccessAllowedSourceCIDRs -}}
{
"CidrIp": "{{$r}}",
"FromPort": 22,
"IpProtocol": "tcp",
"ToPort": 22
},
{{end -}}
{
"CidrIp": "0.0.0.0/0",
"FromPort": -1,
"IpProtocol": "icmp",
"ToPort": -1
}
],
"Tags": [
{
"Key": "Name",
"Value": "{{$.ClusterName}}-sg-worker"
}
],
"VpcId": {{.VPCRef}}
},
"Type": "AWS::EC2::SecurityGroup"
},
{{ if not .Controller.IAMConfig.InstanceProfile.Arn }}
"IAMInstanceProfileController": {
"Properties": {
Expand Down Expand Up @@ -617,7 +668,6 @@
{{quote $n}}: {{toJSON $r}}
{{end}}
},

"Outputs": {
{{ if not .Controller.IAMConfig.InstanceProfile.Arn }}
"ControllerIAMRoleArn": {
Expand All @@ -626,6 +676,11 @@
"Export": { "Name": { "Fn::Sub": "${AWS::StackName}-ControllerIAMRoleArn" } }
},
{{end}}
"WorkerSecurityGroup" : {
"Description" : "The security group assigned to worker nodes",
"Value" : { "Ref" : "SecurityGroupWorker" },
"Export" : { "Name" : {"Fn::Sub": "${AWS::StackName}-WorkerSecurityGroup" }}
},
"StackName": {
"Description": "The name of this stack which is used by node pool stacks to import outputs from this stack",
"Value": { "Ref": "AWS::StackName" }
Expand Down
5 changes: 0 additions & 5 deletions core/controlplane/config/user_data_config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (

"github.com/aws/aws-sdk-go/service/kms"
"github.com/coreos/coreos-cloudinit/config/validate"
etcdconfig "github.com/kubernetes-incubator/kube-aws/core/etcd/config"
"github.com/kubernetes-incubator/kube-aws/model"
"github.com/kubernetes-incubator/kube-aws/test/helper"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -102,10 +101,6 @@ func TestCloudConfigTemplating(t *testing.T) {
Name string
Template []byte
}{
{
Name: "CloudConfigEtcd",
Template: etcdconfig.CloudConfigEtcd,
},
{
Name: "CloudConfigController",
Template: CloudConfigController,
Expand Down
122 changes: 107 additions & 15 deletions core/etcd/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package cluster

import (
"fmt"
"strings"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
Expand All @@ -10,7 +11,8 @@ import (

"github.com/kubernetes-incubator/kube-aws/cfnstack"
controlplanecluster "github.com/kubernetes-incubator/kube-aws/core/controlplane/cluster"
"github.com/kubernetes-incubator/kube-aws/core/controlplane/config"
controlplaneconfig "github.com/kubernetes-incubator/kube-aws/core/controlplane/config"
"github.com/kubernetes-incubator/kube-aws/core/etcd/config"
"github.com/kubernetes-incubator/kube-aws/logger"
"github.com/kubernetes-incubator/kube-aws/model"
"github.com/kubernetes-incubator/kube-aws/naming"
Expand All @@ -23,15 +25,15 @@ var VERSION = "UNKNOWN"

const STACK_TEMPLATE_FILENAME = "stack.json"

func newClusterRef(cfg *config.Cluster, session *session.Session) *ClusterRef {
func newClusterRef(cfg *controlplaneconfig.Cluster, session *session.Session) *ClusterRef {
return &ClusterRef{
Cluster: cfg,
session: session,
}
}

type ClusterRef struct {
*config.Cluster
*controlplaneconfig.Cluster
session *session.Session
}

Expand All @@ -41,6 +43,13 @@ type Cluster struct {
assets cfnstack.Assets
}

// An EtcdConfigurationContext contains configuration settings/options mixed with existing state in a way that can be
// consumed by stack and cloud-config templates.
type EtcdConfigurationContext struct {
*controlplaneconfig.Config
model.EtcdExistingState
}

type ec2Service interface {
CreateVolume(*ec2.CreateVolumeInput) (*ec2.Volume, error)
DescribeVpcs(*ec2.DescribeVpcsInput) (*ec2.DescribeVpcsOutput, error)
Expand Down Expand Up @@ -114,12 +123,10 @@ func (c *ClusterRef) validateExistingVPCState(ec2Svc ec2Service) error {
return nil
}

func NewCluster(cfgRef *config.Cluster, opts config.StackTemplateOptions, plugins []*pluginmodel.Plugin, session *session.Session) (*Cluster, error) {
cfg := &config.Cluster{}
func NewCluster(cfgRef *controlplaneconfig.Cluster, opts controlplaneconfig.StackTemplateOptions, plugins []*pluginmodel.Plugin, session *session.Session) (*Cluster, error) {
logger.Debugf("Called etcd.NewCluster")
cfg := &controlplaneconfig.Cluster{}
*cfg = *cfgRef
if cfg.ProvidedCFInterrogator == nil {
cfg.ProvidedCFInterrogator = cloudformation.New(session)
}

// Import all the managed subnets from the network stack
var err error
Expand All @@ -136,14 +143,16 @@ func NewCluster(cfgRef *config.Cluster, opts config.StackTemplateOptions, plugin
clusterRef.KubeAWSVersion = controlplanecluster.VERSION
clusterRef.HostOS = cfgRef.HostOS

stackConfig, err := clusterRef.StackConfig("etcd", opts, session, plugins)
cpStackConfig, err := clusterRef.StackConfig("etcd", opts, session, plugins)
if err != nil {
return nil, err
}
// hack - mutate our controlplane generated stack config into our own specific etcd version
etcdStackConfig := config.NewEtcdStackConfig(cpStackConfig)

c := &Cluster{
ClusterRef: clusterRef,
StackConfig: stackConfig,
StackConfig: etcdStackConfig,
}

// Notes:
Expand All @@ -164,16 +173,49 @@ func NewCluster(cfgRef *config.Cluster, opts config.StackTemplateOptions, plugin
c.StackConfig.Etcd.CustomSystemdUnits = append(c.StackConfig.Etcd.CustomSystemdUnits, extraEtcd.SystemdUnits...)
c.StackConfig.Etcd.CustomFiles = append(c.StackConfig.Etcd.CustomFiles, extraEtcd.Files...)
c.StackConfig.Etcd.IAMConfig.Policy.Statements = append(c.StackConfig.Etcd.IAMConfig.Policy.Statements, extraEtcd.IAMPolicyStatements...)
c.StackConfig.Etcd.StackExists, err = cfnstack.NestedStackExists(cfg.ProvidedCFInterrogator, c.ClusterName, naming.FromStackToCfnResource(c.Etcd.LogicalName()))

// create the context that will be used to build the assets (combination of config + existing state)
c.StackConfig.EtcdExistingState, err = c.inspectExistingState()
if err != nil {
return nil, fmt.Errorf("failed to check for existence of etcd cloud-formation stack: %v", err)
return nil, fmt.Errorf("Could not inspect existing etcd state: %v", err)
}
ctx := EtcdConfigurationContext{
Config: c.StackConfig.Config,
EtcdExistingState: c.StackConfig.EtcdExistingState,
}

c.assets, err = c.buildAssets()
c.assets, err = c.buildAssets(ctx)

return c, err
}

func (c *Cluster) inspectExistingState() (model.EtcdExistingState, error) {
var err error
if c.ProvidedCFInterrogator == nil {
c.ProvidedCFInterrogator = cloudformation.New(c.session)
}
if c.ProvidedEC2Interrogator == nil {
c.ProvidedEC2Interrogator = ec2.New(c.session)
}

state := model.EtcdExistingState{}
state.StackExists, err = cfnstack.NestedStackExists(c.ProvidedCFInterrogator, c.ClusterName, naming.FromStackToCfnResource(c.Etcd.LogicalName()))
if err != nil {
return state, fmt.Errorf("failed to check for existence of etcd cloud-formation stack: %v", err)
}
// when the Etcd stack does not exist but we have existing etcd instances then we need to enable the
// etcd migration units.
if !state.StackExists {
if state.EtcdMigrationExistingEndpoints, err = c.lookupExistingEtcdEndpoints(); err != nil {
return state, fmt.Errorf("failed to lookup existing etcd endpoints: %v", err)
}
if state.EtcdMigrationExistingEndpoints != "" {
state.EtcdMigrationEnabled = true
}
}
return state, nil
}

func (c *Cluster) Assets() cfnstack.Assets {
return c.assets
}
Expand All @@ -185,11 +227,13 @@ func (c Cluster) NestedStackName() string {
return naming.FromStackToCfnResource(c.StackName)
}

func (c *Cluster) buildAssets() (cfnstack.Assets, error) {
func (c *Cluster) buildAssets(ctx EtcdConfigurationContext) (cfnstack.Assets, error) {
logger.Debugf("Called etcd.buildAssets")
logger.Debugf("Context is: %+v", ctx)
var err error
assets := cfnstack.NewAssetsBuilder(c.StackName, c.StackConfig.ClusterExportedStacksS3URI(), c.StackConfig.Region)

if c.StackConfig.UserDataEtcd, err = model.NewUserData(c.StackTemplateOptions.EtcdTmplFile, c.StackConfig.Config); err != nil {
if c.StackConfig.UserDataEtcd, err = model.NewUserData(c.StackTemplateOptions.EtcdTmplFile, ctx); err != nil {
return nil, fmt.Errorf("failed to render etcd cloud config: %v", err)
}

Expand All @@ -202,8 +246,10 @@ func (c *Cluster) buildAssets() (cfnstack.Assets, error) {
return nil, fmt.Errorf("Error while rendering template: %v", err)
}

logger.Debugf("Calling assets.Add on %s", STACK_TEMPLATE_FILENAME)
assets.Add(STACK_TEMPLATE_FILENAME, stackTemplate)

logger.Debugf("Calling assets.Build for etcd...")
return assets.Build(), nil
}

Expand Down Expand Up @@ -265,3 +311,49 @@ func (c *Cluster) String() string {
func (c *ClusterRef) Destroy() error {
return cfnstack.NewDestroyer("etcd", c.session, c.CloudFormation.RoleARN).Destroy()
}

// lookupExistingEtcdEndpoints supports the migration from embedded etcd servers to their own stack
// by looking up the existing etcd servers for a specific cluster and constructing and etcd endpoints
// list as used by tools such as etcdctl and the etcdadm script.
func (c Cluster) lookupExistingEtcdEndpoints() (string, error) {
clusterTag := fmt.Sprintf("tag:kubernetes.io/cluster/%s", c.ClusterName)
params := &ec2.DescribeInstancesInput{
Filters: []*ec2.Filter{
{
Name: aws.String("tag:kube-aws:role"),
Values: []*string{aws.String("etcd")},
},
{
Name: aws.String(clusterTag),
Values: []*string{aws.String("owned")},
},
{
Name: aws.String("instance-state-name"),
Values: []*string{aws.String("running"), aws.String("pending")},
},
},
}
logger.Debugf("Calling AWS EC2 DescribeInstances ->")
resp, err := c.ProvidedEC2Interrogator.DescribeInstances(params)
if err != nil {
return "", fmt.Errorf("can't lookup ec2 instances: %v", err)
}
if resp == nil {
return "", nil
}

logger.Debugf("<- received %d instances from AWS", len(resp.Reservations))
if len(resp.Reservations) == 0 {
return "", nil
}
// construct comma separated endpoints string
endpoints := []string{}
for _, res := range resp.Reservations {
for _, inst := range res.Instances {
endpoints = append(endpoints, fmt.Sprintf("https://%s:2379", *inst.PrivateDnsName))
}
}
result := strings.Join(endpoints, ",")
logger.Debugf("Existing etcd endpoints found: %s", result)
return result, nil
}
Loading

0 comments on commit cdceab6

Please sign in to comment.