diff --git a/.gitignore b/.gitignore index d379665..faed0c7 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ .rspec_status /kubeconfig /cluster_config.yaml +/token dist/hetzner-k3s.jar dist/hetzner-k3s diff --git a/README.md b/README.md index 08f8360..2bc5d87 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,7 @@ See my public profile with links for connecting with me [here](https://vitobotta - [Load balancers](docs/Load_balancers.md) - [Storage](docs/Storage.md) - [Troubleshooting](docs/Troubleshooting.md) +- [etcd S3 Backups](docs/etcd%20S3%20backups.md) - [Contributing and support](docs/Contributing_and_support.md) ___ diff --git a/docs/Creating_a_cluster.md b/docs/Creating_a_cluster.md index 20c4bb9..e3c2f9d 100644 --- a/docs/Creating_a_cluster.md +++ b/docs/Creating_a_cluster.md @@ -7,6 +7,7 @@ The tool requires a simple configuration file in order to create/upgrade/delete hetzner_token: cluster_name: test kubeconfig_path: "./kubeconfig" +# token_path: "./token" # optional, saves the server token file k3s_version: v1.30.3+k3s1 networking: @@ -47,6 +48,25 @@ networking: datastore: mode: etcd # etcd (default) or external external_datastore_endpoint: postgres://.... + # etcd: # optional + # backups: # optional + # enabled: true # optional + # retention: 5 # optional + # schedule_cron: "0 */12 * * *" # optional + # dir: ${data-dir}/db/snapshots # optional + # compress: false # optional + # s3: # optional - can only be enabled for etcd mode + # enabled: true + # endpoint: "s3.amazonaws.com" # optional + # endpoint_ca: "" # optional + # skip_ssl_verify: false # optional + # access_key: "" + # secret_key: "" + # bucket: "" + # region: "us-east-1" # optional + # folder: "" # optional + # insecure: false # optional + # timeout: "5m0s" # optional schedule_workloads_on_masters: false diff --git a/docs/etcd S3 backups.md b/docs/etcd S3 backups.md new file mode 100644 index 0000000..8c1c33a --- /dev/null +++ b/docs/etcd S3 backups.md @@ -0,0 +1,13 @@ +### etcd S3 Backups + +S3 backups can be enabled for the embedded etcd mode only. You can see the explainations for each option in the [K3s docs](https://docs.k3s.io/cli/etcd-snapshot). + +A backup is created for each server instance. If you run your cluster in high availability mode you may want to update the retention value to be how many master instances you have multiplied by how many backups you want for each master. For example if you have 3 masters and you want 3 backups per master you would set retention to 9. The default is 5, so when running 3 master instances, one instance will only have 1 backup. + +> "In addition to backing up the datastore itself, you must also back up the server token file at /var/lib/rancher/k3s/server/token. You must restore this file, or pass its value into the --token option, when restoring from backup. If you do not use the same token value when restoring, the snapshot will be unusable, as the token is used to encrypt confidential data within the datastore itself." [K3S Backup/Restore Docs](https://docs.k3s.io/datastore/backup-restore) + +You can save the server token file to disk by setting the `token_path` value in your cluster_config.yaml: + +```yaml +token_path: "./token" +``` diff --git a/src/configuration/datastore.cr b/src/configuration/datastore.cr index 4d28fe2..64b37f2 100644 --- a/src/configuration/datastore.cr +++ b/src/configuration/datastore.cr @@ -1,11 +1,18 @@ require "yaml" +require "./datastore/etcd" -class Configuration::Datastore - include YAML::Serializable +module Configuration + module Datastore + class Config + include YAML::Serializable - getter mode : String = "etcd" - getter external_datastore_endpoint : String = "" + getter mode : String = "etcd" + getter external_datastore_endpoint : String = "" - def initialize(@mode : String = "etcd", @external_datastore_endpoint : String = "") + getter etcd : ::Configuration::Datastore::Etcd = ::Configuration::Datastore::Etcd.new + + def initialize(@mode : String = "etcd", @external_datastore_endpoint : String = "") + end + end end end diff --git a/src/configuration/datastore/etcd.cr b/src/configuration/datastore/etcd.cr new file mode 100644 index 0000000..646313f --- /dev/null +++ b/src/configuration/datastore/etcd.cr @@ -0,0 +1,11 @@ +require "yaml" +require "./etcd_backups" + +class Configuration::Datastore::Etcd + include YAML::Serializable + + getter backups : Configuration::Datastore::Backups = Configuration::Datastore::Backups.new + + def initialize(@backups : Configuration::Datastore::Backups = Configuration::Datastore::Backups.new) + end +end diff --git a/src/configuration/datastore/etcd_backups.cr b/src/configuration/datastore/etcd_backups.cr new file mode 100644 index 0000000..4dc5046 --- /dev/null +++ b/src/configuration/datastore/etcd_backups.cr @@ -0,0 +1,16 @@ +require "yaml" +require "./etcd_s3" + +class Configuration::Datastore::Backups + include YAML::Serializable + + getter enabled : Bool = true + getter schedule_cron : String? + getter retention : Int32? + getter dir : String? + getter compress : Bool = false + getter s3 : Configuration::Datastore::S3 = Configuration::Datastore::S3.new + + def initialize(@enabled : Bool = true, @compress : Bool = false, @s3 : Configuration::Datastore::S3 = Configuration::Datastore::S3.new) + end +end diff --git a/src/configuration/datastore/etcd_s3.cr b/src/configuration/datastore/etcd_s3.cr new file mode 100644 index 0000000..4542f91 --- /dev/null +++ b/src/configuration/datastore/etcd_s3.cr @@ -0,0 +1,20 @@ +require "yaml" + +class Configuration::Datastore::S3 + include YAML::Serializable + + getter enabled : Bool = false + getter endpoint : String? + getter endpoint_ca : String? + getter skip_ssl_verify : Bool = false + getter access_key : String = "" + getter secret_key : String = "" + getter bucket : String = "" + getter region : String? + getter folder : String? + getter insecure : Bool = false + getter timeout : String? + + def initialize(@enabled : Bool = false, @skip_ssl_verify : Bool = false, @access_key : String = "", @secret_key : String = "", @bucket : String = "", @insecure : Bool = false) + end +end diff --git a/src/configuration/main.cr b/src/configuration/main.cr index 358e9e6..301a2c5 100644 --- a/src/configuration/main.cr +++ b/src/configuration/main.cr @@ -11,6 +11,7 @@ class Configuration::Main getter hetzner_token : String = ENV.fetch("HCLOUD_TOKEN", "") getter cluster_name : String getter kubeconfig_path : String + getter token_path : String? getter k3s_version : String getter api_server_hostname : String? getter schedule_workloads_on_masters : Bool = false @@ -28,7 +29,7 @@ class Configuration::Main getter autoscaling_image : String? getter snapshot_os : String = "default" getter networking : Configuration::Networking = Configuration::Networking.new - getter datastore : Configuration::Datastore = Configuration::Datastore.new + getter datastore : Configuration::Datastore::Config = Configuration::Datastore::Config.new getter manifests : Configuration::Manifests = Configuration::Manifests.new getter embedded_registry_mirror : Configuration::EmbeddedRegistryMirror = Configuration::EmbeddedRegistryMirror.new getter include_instance_type_in_instance_name : Bool = false diff --git a/src/configuration/settings/datastore.cr b/src/configuration/settings/datastore.cr index e199fba..c1e76bb 100644 --- a/src/configuration/settings/datastore.cr +++ b/src/configuration/settings/datastore.cr @@ -1,6 +1,6 @@ class Configuration::Settings::Datastore getter errors : Array(String) - getter datastore : Configuration::Datastore + getter datastore : Configuration::Datastore::Config def initialize(@errors, @datastore) end @@ -8,6 +8,12 @@ class Configuration::Settings::Datastore def validate case datastore.mode when "etcd" + return unless datastore.etcd.backups.s3.enabled + + s3 = datastore.etcd.backups.s3 + errors << "access_key is required for S3 backups" if s3.access_key.strip.empty? + errors << "secret_key is required for S3 backups" if s3.secret_key.strip.empty? + errors << "bucket is required for S3 backups" if s3.bucket.strip.empty? when "external" errors << "external_datastore_endpoint is required for external datastore" if datastore.external_datastore_endpoint.strip.empty? else diff --git a/src/configuration/settings/node_pool.cr b/src/configuration/settings/node_pool.cr index 5e20be7..b71fdbf 100644 --- a/src/configuration/settings/node_pool.cr +++ b/src/configuration/settings/node_pool.cr @@ -14,7 +14,7 @@ class Configuration::Settings::NodePool getter pool_name : String { masters? ? "masters" : pool.try(&.name) || "" } getter pool_description : String { workers? ? "Worker mode pool '#{pool_name}'" : "Masters pool" } - getter datastore : Configuration::Datastore + getter datastore : Configuration::Datastore::Config def initialize(@errors, @pool, @pool_type, @masters_location, @instance_types, @locations, @datastore) end diff --git a/src/configuration/settings/node_pool/instance_count.cr b/src/configuration/settings/node_pool/instance_count.cr index 256482e..4156f1b 100644 --- a/src/configuration/settings/node_pool/instance_count.cr +++ b/src/configuration/settings/node_pool/instance_count.cr @@ -5,7 +5,7 @@ class Configuration::Settings::NodePool::InstanceCount getter errors : Array(String) getter pool : Configuration::NodePool getter pool_type : Symbol - getter datastore : Configuration::Datastore + getter datastore : Configuration::Datastore::Config def initialize(@errors, @pool, @pool_type, @datastore) end diff --git a/src/kubernetes/installer.cr b/src/kubernetes/installer.cr index 53241ad..9b7ae1e 100644 --- a/src/kubernetes/installer.cr +++ b/src/kubernetes/installer.cr @@ -116,6 +116,8 @@ class Kubernetes::Installer save_kubeconfig(master_count) + save_server_token() + sleep 5 command = "kubectl cluster-info 2> /dev/null" @@ -153,6 +155,7 @@ class Kubernetes::Installer server = "" datastore_endpoint = "" etcd_arguments = "" + etcd_schedule_cron = "" if settings.datastore.mode == "etcd" server = master == first_master ? " --cluster-init " : " --server https://#{api_server_ip_address}:6443 " @@ -164,6 +167,8 @@ class Kubernetes::Installer extra_args = "#{kube_api_server_args_list} #{kube_scheduler_args_list} #{kube_controller_manager_args_list} #{kube_cloud_controller_manager_args_list} #{kubelet_args_list} #{kube_proxy_args_list}" taint = settings.schedule_workloads_on_masters ? " " : " --node-taint CriticalAddonsOnly=true:NoExecute " + etcd_schedule_cron = settings.datastore.etcd.backups.schedule_cron if present?(settings.datastore.etcd.backups.schedule_cron) + Crinja.render(MASTER_INSTALL_SCRIPT, { cluster_name: settings.cluster_name, k3s_version: settings.k3s_version, @@ -183,6 +188,8 @@ class Kubernetes::Installer cluster_dns: settings.networking.cluster_dns, datastore_endpoint: datastore_endpoint, etcd_arguments: etcd_arguments, + etcd_backup_settings: etcd_backup_settings, + etcd_schedule_cron: etcd_schedule_cron, embedded_registry_mirror_enabled: settings.embedded_registry_mirror.enabled.to_s, }) end @@ -316,6 +323,19 @@ class Kubernetes::Installer log_line "...kubeconfig file generated as #{kubeconfig_path}.", "Control plane" end + private def save_server_token() + return unless present?(settings.token_path) + token_path = settings.token_path.not_nil! + + log_line "Generating the token file to #{token_path}...", "Control plane" + + token = ssh.run(first_master, settings.networking.ssh.port, "cat /var/lib/rancher/k3s/server/token", settings.networking.ssh.use_agent, print_output: false) + + File.write(token_path, token) + + log_line "...token file generated as #{token_path}.", "Control plane" + end + private def add_labels_and_taints_to_masters add_labels_or_taints(:label, masters, settings.masters_pool.labels, "masters_pool") add_labels_or_taints(:taint, masters, settings.masters_pool.taints, "masters_pool") @@ -388,4 +408,36 @@ class Kubernetes::Installer first_master.private_ip_address end end + + private def etcd_backup_settings + return "" unless settings.datastore.mode == "etcd" + + unless settings.datastore.etcd.backups.enabled + return "--etcd-disable-snapshots" + end + + opts = [] of String + + backups = settings.datastore.etcd.backups + opts << "--etcd-snapshot-retention=#{backups.retention}" if present?(backups.retention) + opts << "--etcd-snapshot-dir=#{backups.dir}" if present?(backups.dir) + opts << "--etcd-snapshot-compress" if backups.compress + + s3 = backups.s3 + if s3.enabled + opts << "--etcd-s3" + opts << "--etcd-s3-endpoint=#{s3.endpoint}" if present?(s3.endpoint) + opts << "--etcd-s3-endpoint-ca=#{s3.endpoint_ca}" if present?(s3.endpoint_ca) + opts << "--etcd-s3-skip-ssl-verify" if s3.skip_ssl_verify + opts << "--etcd-s3-access-key=#{s3.access_key}" if present?(s3.access_key) + opts << "--etcd-s3-secret-key=#{s3.secret_key}" if present?(s3.secret_key) + opts << "--etcd-s3-bucket=#{s3.bucket}" if present?(s3.bucket) + opts << "--etcd-s3-region=#{s3.region}" if present?(s3.region) + opts << "--etcd-s3-folder=#{s3.folder}" if present?(s3.folder) + opts << "--etcd-s3-insecure" if s3.insecure + opts << "--etcd-s3-timeout=#{s3.timeout}" if present?(s3.timeout) + end + + opts.uniq.sort.join(" ") + end end diff --git a/src/kubernetes/util.cr b/src/kubernetes/util.cr index 4ac020b..d1f53d3 100644 --- a/src/kubernetes/util.cr +++ b/src/kubernetes/util.cr @@ -83,4 +83,15 @@ module Kubernetes::Util port_open?(ip_address, port, timeout = 1.0) end + + private def present?(value : Object?) + case value + when String + !value.strip.empty? + when Int32 + value != 0 + else + !value.nil? + end + end end diff --git a/templates/master_install_script.sh b/templates/master_install_script.sh index 76a592d..51cab6d 100644 --- a/templates/master_install_script.sh +++ b/templates/master_install_script.sh @@ -45,7 +45,15 @@ else EMBEDDED_REGISTRY_MIRROR=" " fi -mkdir -p /etc/rancher/k3s +mkdir -p /etc/rancher/k3s/config.yaml.d + +if [ "{{ etcd_schedule_cron }}" != "" ]; then + cat > /etc/rancher/k3s/config.yaml.d/hetzner-k3s.yaml </dev/null +fi cat > /etc/rancher/k3s/registries.yaml <&1 | tee -a /var/log/hetzner-k3s.log +if [ -x /sbin/openrc-run ]; then + rc-service k3s restart +fi +if [ -d /run/systemd ]; then + systemctl restart k3s +fi + + echo true > /etc/initialized