Skip to content
This repository has been archived by the owner on Sep 30, 2020. It is now read-only.

0.11.x migration from existing clusters without losing state #1380

9 changes: 9 additions & 0 deletions cfnstack/ec2.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package cfnstack

import (
"github.com/aws/aws-sdk-go/service/ec2"
)

type EC2Interrogator interface {
DescribeInstances(input *ec2.DescribeInstancesInput) (*ec2.DescribeInstancesOutput, error)
}
27 changes: 14 additions & 13 deletions core/controlplane/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -576,19 +576,20 @@ type EtcdSettings struct {

// Cluster is the container of all the configurable parameters of a kube-aws cluster, customizable via cluster.yaml
type Cluster struct {
KubeClusterSettings `yaml:",inline"`
DeploymentSettings `yaml:",inline"`
DefaultWorkerSettings `yaml:",inline"`
ControllerSettings `yaml:",inline"`
EtcdSettings `yaml:",inline"`
AdminAPIEndpointName string `yaml:"adminAPIEndpointName,omitempty"`
RecordSetTTL int `yaml:"recordSetTTL,omitempty"`
TLSCADurationDays int `yaml:"tlsCADurationDays,omitempty"`
TLSCertDurationDays int `yaml:"tlsCertDurationDays,omitempty"`
HostedZoneID string `yaml:"hostedZoneId,omitempty"`
PluginConfigs model.PluginConfigs `yaml:"kubeAwsPlugins,omitempty"`
ProvidedEncryptService EncryptService
ProvidedCFInterrogator cfnstack.CFInterrogator
KubeClusterSettings `yaml:",inline"`
DeploymentSettings `yaml:",inline"`
DefaultWorkerSettings `yaml:",inline"`
ControllerSettings `yaml:",inline"`
EtcdSettings `yaml:",inline"`
AdminAPIEndpointName string `yaml:"adminAPIEndpointName,omitempty"`
RecordSetTTL int `yaml:"recordSetTTL,omitempty"`
TLSCADurationDays int `yaml:"tlsCADurationDays,omitempty"`
TLSCertDurationDays int `yaml:"tlsCertDurationDays,omitempty"`
HostedZoneID string `yaml:"hostedZoneId,omitempty"`
PluginConfigs model.PluginConfigs `yaml:"kubeAwsPlugins,omitempty"`
ProvidedEncryptService EncryptService
ProvidedCFInterrogator cfnstack.CFInterrogator
ProvidedEC2Interrogator cfnstack.EC2Interrogator
// SSHAccessAllowedSourceCIDRs is network ranges of sources you'd like SSH accesses to be allowed from, in CIDR notation
SSHAccessAllowedSourceCIDRs model.CIDRRanges `yaml:"sshAccessAllowedSourceCIDRs,omitempty"`
CustomSettings map[string]interface{} `yaml:"customSettings,omitempty"`
Expand Down
2 changes: 2 additions & 0 deletions core/controlplane/config/stack_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"strings"

"github.com/kubernetes-incubator/kube-aws/filereader/jsontemplate"
"github.com/kubernetes-incubator/kube-aws/logger"
"github.com/kubernetes-incubator/kube-aws/model"
)

Expand Down Expand Up @@ -63,6 +64,7 @@ func (c *StackConfig) RenderStackTemplateAsBytes() ([]byte, error) {
}

func (c *StackConfig) RenderStackTemplateAsString() (string, error) {
logger.Debugf("Called RenderStackTemplateAsString on %s", c.StackName)
bytes, err := c.RenderStackTemplateAsBytes()
return string(bytes), err
}
57 changes: 56 additions & 1 deletion core/controlplane/config/templates/stack-template.json
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,57 @@
}
}
},
"SecurityGroupWorker": {
Copy link
Contributor Author

@davidmccormick davidmccormick Jun 29, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This security group needed returning to the control plane. We can remove it again in later releases but without it in the updated control plane stack will throw an error about it being in use by the nodepools

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. Fine with reviving this then.

// Wish we had something like reference counting to keep AWS resources only while they're used 😆

"Properties": {
"GroupDescription": {
"Ref": "AWS::StackName"
},
"SecurityGroupEgress": [
{
"CidrIp": "0.0.0.0/0",
"FromPort": -1,
"IpProtocol": "icmp",
"ToPort": -1
},
{
"CidrIp": "0.0.0.0/0",
"FromPort": 0,
"IpProtocol": "tcp",
"ToPort": 65535
},
{
"CidrIp": "0.0.0.0/0",
"FromPort": 0,
"IpProtocol": "udp",
"ToPort": 65535
}
],
"SecurityGroupIngress": [
{{ range $_, $r := $.SSHAccessAllowedSourceCIDRs -}}
{
"CidrIp": "{{$r}}",
"FromPort": 22,
"IpProtocol": "tcp",
"ToPort": 22
},
{{end -}}
{
"CidrIp": "0.0.0.0/0",
"FromPort": -1,
"IpProtocol": "icmp",
"ToPort": -1
}
],
"Tags": [
{
"Key": "Name",
"Value": "{{$.ClusterName}}-sg-worker"
}
],
"VpcId": {{.VPCRef}}
},
"Type": "AWS::EC2::SecurityGroup"
},
{{ if not .Controller.IAMConfig.InstanceProfile.Arn }}
"IAMInstanceProfileController": {
"Properties": {
Expand Down Expand Up @@ -617,7 +668,6 @@
{{quote $n}}: {{toJSON $r}}
{{end}}
},

"Outputs": {
{{ if not .Controller.IAMConfig.InstanceProfile.Arn }}
"ControllerIAMRoleArn": {
Expand All @@ -626,6 +676,11 @@
"Export": { "Name": { "Fn::Sub": "${AWS::StackName}-ControllerIAMRoleArn" } }
},
{{end}}
"WorkerSecurityGroup" : {
"Description" : "The security group assigned to worker nodes",
"Value" : { "Ref" : "SecurityGroupWorker" },
"Export" : { "Name" : {"Fn::Sub": "${AWS::StackName}-WorkerSecurityGroup" }}
},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"StackName": {
"Description": "The name of this stack which is used by node pool stacks to import outputs from this stack",
"Value": { "Ref": "AWS::StackName" }
Expand Down
5 changes: 0 additions & 5 deletions core/controlplane/config/user_data_config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (

"github.com/aws/aws-sdk-go/service/kms"
"github.com/coreos/coreos-cloudinit/config/validate"
etcdconfig "github.com/kubernetes-incubator/kube-aws/core/etcd/config"
"github.com/kubernetes-incubator/kube-aws/model"
"github.com/kubernetes-incubator/kube-aws/test/helper"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -102,10 +101,6 @@ func TestCloudConfigTemplating(t *testing.T) {
Name string
Template []byte
}{
{
Name: "CloudConfigEtcd",
Template: etcdconfig.CloudConfigEtcd,
},
{
Name: "CloudConfigController",
Template: CloudConfigController,
Expand Down
122 changes: 107 additions & 15 deletions core/etcd/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package cluster

import (
"fmt"
"strings"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
Expand All @@ -10,7 +11,8 @@ import (

"github.com/kubernetes-incubator/kube-aws/cfnstack"
controlplanecluster "github.com/kubernetes-incubator/kube-aws/core/controlplane/cluster"
"github.com/kubernetes-incubator/kube-aws/core/controlplane/config"
controlplaneconfig "github.com/kubernetes-incubator/kube-aws/core/controlplane/config"
"github.com/kubernetes-incubator/kube-aws/core/etcd/config"
"github.com/kubernetes-incubator/kube-aws/logger"
"github.com/kubernetes-incubator/kube-aws/model"
"github.com/kubernetes-incubator/kube-aws/naming"
Expand All @@ -23,15 +25,15 @@ var VERSION = "UNKNOWN"

const STACK_TEMPLATE_FILENAME = "stack.json"

func newClusterRef(cfg *config.Cluster, session *session.Session) *ClusterRef {
func newClusterRef(cfg *controlplaneconfig.Cluster, session *session.Session) *ClusterRef {
return &ClusterRef{
Cluster: cfg,
session: session,
}
}

type ClusterRef struct {
*config.Cluster
*controlplaneconfig.Cluster
session *session.Session
}

Expand All @@ -41,6 +43,13 @@ type Cluster struct {
assets cfnstack.Assets
}

// An EtcdConfigurationContext contains configuration settings/options mixed with existing state in a way that can be
// consumed by stack and cloud-config templates.
type EtcdConfigurationContext struct {
*controlplaneconfig.Config
model.EtcdExistingState
}

type ec2Service interface {
CreateVolume(*ec2.CreateVolumeInput) (*ec2.Volume, error)
DescribeVpcs(*ec2.DescribeVpcsInput) (*ec2.DescribeVpcsOutput, error)
Expand Down Expand Up @@ -114,12 +123,10 @@ func (c *ClusterRef) validateExistingVPCState(ec2Svc ec2Service) error {
return nil
}

func NewCluster(cfgRef *config.Cluster, opts config.StackTemplateOptions, plugins []*pluginmodel.Plugin, session *session.Session) (*Cluster, error) {
cfg := &config.Cluster{}
func NewCluster(cfgRef *controlplaneconfig.Cluster, opts controlplaneconfig.StackTemplateOptions, plugins []*pluginmodel.Plugin, session *session.Session) (*Cluster, error) {
logger.Debugf("Called etcd.NewCluster")
cfg := &controlplaneconfig.Cluster{}
*cfg = *cfgRef
if cfg.ProvidedCFInterrogator == nil {
cfg.ProvidedCFInterrogator = cloudformation.New(session)
}

// Import all the managed subnets from the network stack
var err error
Expand All @@ -136,14 +143,16 @@ func NewCluster(cfgRef *config.Cluster, opts config.StackTemplateOptions, plugin
clusterRef.KubeAWSVersion = controlplanecluster.VERSION
clusterRef.HostOS = cfgRef.HostOS

stackConfig, err := clusterRef.StackConfig("etcd", opts, session, plugins)
cpStackConfig, err := clusterRef.StackConfig("etcd", opts, session, plugins)
if err != nil {
return nil, err
}
// hack - mutate our controlplane generated stack config into our own specific etcd version
etcdStackConfig := config.NewEtcdStackConfig(cpStackConfig)

c := &Cluster{
ClusterRef: clusterRef,
StackConfig: stackConfig,
StackConfig: etcdStackConfig,
}

// Notes:
Expand All @@ -164,16 +173,49 @@ func NewCluster(cfgRef *config.Cluster, opts config.StackTemplateOptions, plugin
c.StackConfig.Etcd.CustomSystemdUnits = append(c.StackConfig.Etcd.CustomSystemdUnits, extraEtcd.SystemdUnits...)
c.StackConfig.Etcd.CustomFiles = append(c.StackConfig.Etcd.CustomFiles, extraEtcd.Files...)
c.StackConfig.Etcd.IAMConfig.Policy.Statements = append(c.StackConfig.Etcd.IAMConfig.Policy.Statements, extraEtcd.IAMPolicyStatements...)
c.StackConfig.Etcd.StackExists, err = cfnstack.NestedStackExists(cfg.ProvidedCFInterrogator, c.ClusterName, naming.FromStackToCfnResource(c.Etcd.LogicalName()))

// create the context that will be used to build the assets (combination of config + existing state)
c.StackConfig.EtcdExistingState, err = c.inspectExistingState()
if err != nil {
return nil, fmt.Errorf("failed to check for existence of etcd cloud-formation stack: %v", err)
return nil, fmt.Errorf("Could not inspect existing etcd state: %v", err)
}
ctx := EtcdConfigurationContext{
Config: c.StackConfig.Config,
EtcdExistingState: c.StackConfig.EtcdExistingState,
}

c.assets, err = c.buildAssets()
c.assets, err = c.buildAssets(ctx)

return c, err
}

func (c *Cluster) inspectExistingState() (model.EtcdExistingState, error) {
var err error
if c.ProvidedCFInterrogator == nil {
c.ProvidedCFInterrogator = cloudformation.New(c.session)
}
if c.ProvidedEC2Interrogator == nil {
c.ProvidedEC2Interrogator = ec2.New(c.session)
}

state := model.EtcdExistingState{}
state.StackExists, err = cfnstack.NestedStackExists(c.ProvidedCFInterrogator, c.ClusterName, naming.FromStackToCfnResource(c.Etcd.LogicalName()))
if err != nil {
return state, fmt.Errorf("failed to check for existence of etcd cloud-formation stack: %v", err)
}
// when the Etcd stack does not exist but we have existing etcd instances then we need to enable the
// etcd migration units.
if !state.StackExists {
if state.EtcdMigrationExistingEndpoints, err = c.lookupExistingEtcdEndpoints(); err != nil {
return state, fmt.Errorf("failed to lookup existing etcd endpoints: %v", err)
}
if state.EtcdMigrationExistingEndpoints != "" {
state.EtcdMigrationEnabled = true
}
}
return state, nil
}

func (c *Cluster) Assets() cfnstack.Assets {
return c.assets
}
Expand All @@ -185,11 +227,13 @@ func (c Cluster) NestedStackName() string {
return naming.FromStackToCfnResource(c.StackName)
}

func (c *Cluster) buildAssets() (cfnstack.Assets, error) {
func (c *Cluster) buildAssets(ctx EtcdConfigurationContext) (cfnstack.Assets, error) {
logger.Debugf("Called etcd.buildAssets")
logger.Debugf("Context is: %+v", ctx)
var err error
assets := cfnstack.NewAssetsBuilder(c.StackName, c.StackConfig.ClusterExportedStacksS3URI(), c.StackConfig.Region)

if c.StackConfig.UserDataEtcd, err = model.NewUserData(c.StackTemplateOptions.EtcdTmplFile, c.StackConfig.Config); err != nil {
if c.StackConfig.UserDataEtcd, err = model.NewUserData(c.StackTemplateOptions.EtcdTmplFile, ctx); err != nil {
return nil, fmt.Errorf("failed to render etcd cloud config: %v", err)
}

Expand All @@ -202,8 +246,10 @@ func (c *Cluster) buildAssets() (cfnstack.Assets, error) {
return nil, fmt.Errorf("Error while rendering template: %v", err)
}

logger.Debugf("Calling assets.Add on %s", STACK_TEMPLATE_FILENAME)
assets.Add(STACK_TEMPLATE_FILENAME, stackTemplate)

logger.Debugf("Calling assets.Build for etcd...")
return assets.Build(), nil
}

Expand Down Expand Up @@ -265,3 +311,49 @@ func (c *Cluster) String() string {
func (c *ClusterRef) Destroy() error {
return cfnstack.NewDestroyer("etcd", c.session, c.CloudFormation.RoleARN).Destroy()
}

// lookupExistingEtcdEndpoints supports the migration from embedded etcd servers to their own stack
// by looking up the existing etcd servers for a specific cluster and constructing and etcd endpoints
// list as used by tools such as etcdctl and the etcdadm script.
func (c Cluster) lookupExistingEtcdEndpoints() (string, error) {
clusterTag := fmt.Sprintf("tag:kubernetes.io/cluster/%s", c.ClusterName)
params := &ec2.DescribeInstancesInput{
Filters: []*ec2.Filter{
{
Name: aws.String("tag:kube-aws:role"),
Values: []*string{aws.String("etcd")},
},
{
Name: aws.String(clusterTag),
Values: []*string{aws.String("owned")},
},
{
Name: aws.String("instance-state-name"),
Values: []*string{aws.String("running"), aws.String("pending")},
},
},
}
logger.Debugf("Calling AWS EC2 DescribeInstances ->")
resp, err := c.ProvidedEC2Interrogator.DescribeInstances(params)
if err != nil {
return "", fmt.Errorf("can't lookup ec2 instances: %v", err)
}
if resp == nil {
return "", nil
}

logger.Debugf("<- received %d instances from AWS", len(resp.Reservations))
if len(resp.Reservations) == 0 {
return "", nil
}
// construct comma separated endpoints string
endpoints := []string{}
for _, res := range resp.Reservations {
for _, inst := range res.Instances {
endpoints = append(endpoints, fmt.Sprintf("https://%s:2379", *inst.PrivateDnsName))
}
}
result := strings.Join(endpoints, ",")
logger.Debugf("Existing etcd endpoints found: %s", result)
return result, nil
}
Loading