Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
cengizhanR committed Aug 19, 2024
0 parents commit fe63cb4
Show file tree
Hide file tree
Showing 15 changed files with 621 additions and 0 deletions.
4 changes: 4 additions & 0 deletions DnsChangeOrder.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
nmcli connection show
sudo nmcli connection modify eth0 ipv4.dns "10.0.0.10,192.168.8.100"
sudo nmcli connection modify Eth1 ipv4.dns "10.0.0.10,192.168.8.100"
sudo systemctl restart NetworkManager
4 changes: 4 additions & 0 deletions FipsModeRedhat.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
fips-mode-setup --enable
reboot
fips-mode-setup --check
cat /proc/sys/crypto/fips_enabled(If the file contains 1, RHEL core cryptographic components switch to mode, in which they use only FIPS-approved implementations of cryptographic algorithms. If /proc/sys/crypto/fips_enabled contains 0, the cryptographic components do not enable their FIPS mode.)
11 changes: 11 additions & 0 deletions agent1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
cat << EOF >> /etc/rancher/rke2/config.yaml
server: https://rancher.exemple.com:9345
token: fuzzybunnyslippers
write-kubeconfig-mode: 0600
kube-apiserver-arg:
- authorization-mode=RBAC,Node
kubelet-arg:
- protect-kernel-defaults=true
- read-only-port=0
- authorization-mode=Webhook
EOF
12 changes: 12 additions & 0 deletions agent2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
cat << EOF >> /etc/rancher/rke2/config.yaml
server: https://rancher.exemple.com:9345
token: fuzzybunnyslippers
write-kubeconfig-mode: 0600
node-ip: "10.0.0.6"
kube-apiserver-arg:
- authorization-mode=RBAC,Node
kubelet-arg:
- protect-kernel-defaults=true
- read-only-port=0
- authorization-mode=Webhook
EOF
13 changes: 13 additions & 0 deletions certManagerAirgapinstall.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
kubectl create namespace cert-manager
kubectl apply --validate=false -f cert-manager.crds.yaml
helm install cert-manager ./cert-manager-v1.14.3.tgz --namespace cert-manager --set image.repository=200.10.10.5:5000/quay.io/jetstack/cert-manager-controller --set webhook.image.repository=200.10.10.5:5000/quay.io/jetstack/cert-manager-webhook --set cainjector.image.repository=200.10.10.5:5000/quay.io/jetstack/cert-manager-cainjector --set startupapicheck.image.repository=200.10.10.5:5000/quay.io/jetstack/cert-manager-startupapicheck

kubectl create namespace cattle-system
helm install rancher ./rancher-2.8.4.tgz \
--namespace cattle-system \
--set hostname=rancher.rancher.exemple.com \
--set certmanager.version=1.14.5 \
--set rancherImage=10.0.0.10:5000/rancher/rancher \
--set systemDefaultRegistry=10.0.0.10:5000 \
--set useBundledSystemChart=true

141 changes: 141 additions & 0 deletions gerekli seyler.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
#Gerekli seyler
for app in rke2; do output=$(curl -ks "https://update.${app}.io/v1-release/channels" | jq --arg app "${app}" -r '.data[]|select(.id==("stable","latest","testing"))|[$app, .name, .latest]|@tsv'); [ -n "$output" ] && echo "$output"; done && echo
piserver pod logs (in /var/log/pods)
kubectl rollout restart ds/rke2-canal -n kube-system
kubectl rollout restart deployment/rancher -n cattle-system
kubectl delete pod -l k8s-app=canal -n kube-system
tail -f /var/lib/rancher/rke2/agent/containerd/containerd.log

kubectl get pod -n kube-system -l tier=control-plane -o wide

kubelet logs /var/lib/rancher/rke2/agent/log


kubectl patch pvc <pvc_name> -p '{"metadata":{"finalizers":null}}'

kubectl delete pvc <pvc_name> --grace-period=0 --force

kubectl patch pv <pv_name> -p '{"metadata":{"finalizers":null}}'

kubectl delete pv <pv_name> --grace-period=0 --force

127.0.0.1 rke21
10.0.0.3 rke22
10.0.0.4 rke23
10.0.0.5 rke2agent1
10.0.0.6 rke2agent2


127.0.0.1 localhost.localdomain localhost
::1 localhost6.localdomain6 localhost6
# cluster nodes.
10.0.0.2 rke21.rancher.exemple.com rke21
10.0.0.3 rke22.rancher.exemple.com rke22
10.0.0.4 rke23.rancher.exemple.com rke23
10.0.0.5 rke2agent1.rancher.exemple.com rke2agent1
10.0.0.6 rke2agent2.rancher.exemple.com rke2agent2


kubectl delete -n cattle-system MutatingWebhookConfiguration rancher.cattle.io //// Internal error occurred: failed calling webhook "rancher.cattle.io.clusters.management.cattle.io": failed to call webhook: Post "https://rancher-webhook.cattle-system.svc:443/v1/webhook/mutation/clusters.management.cattle.io?timeout=10s": no endpoints available for service "rancher-webhook"
then re-run helm install/upgrade rancher-helm-chart, it will re-create these webhooks.

this error will happen if you had installed rancher helm chart before and the rancher webhook pod not up running when you update cluster. because rancher has installed two webhooks, you can check them by below commands

kubectl get -n cattle-system MutatingWebhookConfiguration rancher.cattle.io
kubectl get -n cattle-system validatingwebhookconfigurations rancher.cattle.io

in some cases, rancher webhook pod is not up running when upgrade cluster. you can run below to delete them.

kubectl delete -n cattle-system MutatingWebhookConfiguration rancher.cattle.io
kubectl delete -n cattle-system validatingwebhookconfigurations rancher.cattle.io

then re-run helm install/upgrade rancher-helm-chart, it will re-create these webhooks.
##### etcd speak
kubectl exec -it etcd-rke21 -n kube-system -- etcdctl --cert /var/lib/rancher/rke2/server/tls/etcd/server-client.crt --key /var/lib/rancher/rke2/server/tls/etcd/server-client.key --endpoints https://127.0.0.1:2379 --cacert /var/lib/rancher/rke2/server/tls/etcd/server-ca.crt member list
###
kubectl exec -it etcd-rke21 -n kube-system -- etcdctl --cert /var/lib/rancher/rke2/server/tls/etcd/server-client.crt --key /var/lib/rancher/rke2/server/tls/etcd/server-client.key --endpoints https://127.0.0.1:2379 --cacert /var/lib/rancher/rke2/server/tls/etcd/server-ca.crt endpoint health
##
kubectl exec -it etcd-rke21 -n kube-system -- etcdctl --cert /var/lib/rancher/rke2/server/tls/etcd/server-client.crt --key /var/lib/rancher/rke2/server/tls/etcd/server-client.key --endpoints https://127.0.0.1:2379 --cacert /var/lib/rancher/rke2/server/tls/etcd/server-ca.crt endpoint status
##
To get all etcd endpoint status
for etcdpod in $(kubectl -n kube-system get pod -l component=etcd --no-headers -o custom-columns=NAME:.metadata.name); do kubectl exec -it $etcdpod -n kube-system -- etcdctl --cert /var/lib/rancher/rke2/server/tls/etcd/server-client.crt --key /var/lib/rancher/rke2/server/tls/etcd/server-client.key --endpoints https://127.0.0.1:2379 --cacert /var/lib/rancher/rke2/server/tls/etcd/server-ca.crt endpoint status; done


unset ETCDCTL_ENDPOINTS
export ETCDCTL_API=3
export ETCDCTL_CACERT=/var/lib/rancher/rke2/server/tls/etcd/server-ca.crt
export ETCDCTL_CERT=/var/lib/rancher/rke2/server/tls/etcd/server-client.crt
export ETCDCTL_KEY=/var/lib/rancher/rke2/server/tls/etcd/server-client.key

etcdctl --endpoints=https://127.0.0.1:2379 endpoint status

&& see domain of certificate
openssl x509 -in /var/lib/rancher/rke2/server/tls/serving-kube-apiserver.crt -text -noout
curl -v -k https://127.0.0.1:9345/v1-rke2/readyz

# /etc/rancher/rke2/config.yaml
kubelet-arg:
- "cpu-manager-policy=static"
- "cpu-manager-reconcile-period=5s"
- "topology-manager-policy=best-effort"
- "topology-manager-scope=container"
- "kube-reserved=cpu=500m,memory=1Gi,ephemeral-storage=1Gi"
- "system-reserved=cpu=500m,memory=1Gi,ephemeral-storage=1Gi"
- "reserved-system-cpus=0,1"
- "eviction-hard=memory.available<500Mi,nodefs.available<1Gi,imagefs.available<15Gi"
- "eviction-soft=memory.available<1Gi,nodefs.available<5Gi,imagefs.available<20Gi"
- "eviction-soft-grace-period=memory.available=1m,nodefs.available=1m,imagefs.available=1m"


What is pod disruption budget
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: myapp-pdb
spec:
minAvailable: 1
selector:
matchLabels:
app: myapp

openssl x509 -in /var/lib/rancher/rke2/agent/client-kube-proxy.crt -noout -enddate



Try adding --kubelet-insecure-tls

kubectl edit deploy metrics-server -n kube-system

containers:
- args:
- --cert-dir=/tmp
- --secure-port=8448
- --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
- --kubelet-insecure-tls


Also if the --kubelet-insecure-tls did not fixed it, you might need to extend the time when it check for ready ( this was an issue I had )

Had to change to:

initialDelaySeconds: 300
periodSeconds: 30


if [[ $(timedatectl status | grep -i "NTP service" | awk '{print $3}') == "active" ]]; then echo "NTP is running"; else echo "NTP is not running"; fi

curl --cacert /var/lib/rancher/rke2/server/tls/server-ca.crt \
--cert /var/lib/rancher/rke2/server/tls/client-admin.crt \
--key /var/lib/rancher/rke2/server/tls/client-admin.key \
"https://127.0.0.1:6443/apis/coordination.k8s.io/v1/namespaces/kube-system/leases/kube-scheduler?timeout=5s"



curl --cacert /var/lib/rancher/rke2/server/tls/server-ca.crt \
--cert /var/lib/rancher/rke2/server/tls/client-admin.crt \
--key /var/lib/rancher/rke2/server/tls/client-admin.key \
"https://127.0.0.1:6443/apis/coordination.k8s.io/v1/namespaces/kube-system/leases/kube-controller-manager?timeout=5s"


kubectl get --raw='/healthz'
journalctl -u rke2-server
75 changes: 75 additions & 0 deletions kubeletGarbageConf.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
kubelet garbage conf
secrets-encryption: true
token: fuzzybunnyslippers
tls-san:
- rancher.exemple.com
node-ip: "10.0.0.2"
write-kubeconfig-mode: 0600
etcd-expose-metrics: true
use-service-account-credentials: true
kube-controller-manager-arg:
- bind-address=127.0.0.1
- use-service-account-credentials=true
- tls-min-version=VersionTLS12
- tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384
kube-scheduler-arg:
- tls-min-version=VersionTLS12
- tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384
kube-apiserver-arg:
- tls-min-version=VersionTLS12
- tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384
- authorization-mode=RBAC,Node
- anonymous-auth=false
- audit-policy-file=/etc/rancher/rke2/audit-policy.yaml
- audit-log-mode=blocking-strict
- audit-log-maxage=30
- audit-log-path=/var/lib/rancher/rke2/server/logs/audit.log
kubelet-arg:
- protect-kernel-defaults=true
- read-only-port=0
- authorization-mode=Webhook
- streaming-connection-idle-timeout=5m
- minimum-container-ttl-duration=10s
- maximum-dead-containers-per-container=2
- maximum-dead-containers=240
- image-gc-high-threshold=85
- image-gc-low-threshold=80



protect-kernel-defaults=true

Description: Ensures kubelet does not alter certain kernel settings required for system stability and security.
Potential Impact: This is generally a safe setting. It prevents accidental or malicious changes to critical kernel parameters. Unless your workloads require specific kernel tweaks, this should not cause harm.
read-only-port=0

Description: Disables the read-only port (default is 10255), which exposes the kubelet’s metrics and debugging endpoints.
Potential Impact: Improves security by preventing unauthorized access to kubelet endpoints. If monitoring tools rely on the read-only port, ensure they are updated to use secure endpoints. Typically, this is a safe and recommended setting.
authorization-mode=Webhook

Description: Uses the webhook mode for kubelet authorization, allowing external services to handle authorization.
Potential Impact: Adds flexibility and integrates with RBAC systems. Ensure your webhook service is highly available and performant, as any issues with the webhook can affect node operations.
streaming-connection-idle-timeout=5m

Description: Sets the idle timeout for streaming connections like kubectl exec and kubectl logs.
Potential Impact: Closes idle connections after 5 minutes. This helps free resources but could impact long-running debugging sessions. Adjust the timeout based on your operational needs.
minimum-container-ttl-duration=10s

Description: Specifies the minimum time a container will be retained before being eligible for garbage collection.
Potential Impact: Ensures short-lived containers are not immediately removed. Generally safe, but very short-lived containers might be cleaned up quickly if the duration is too short. Adjust based on your container lifecycle.
maximum-dead-containers-per-container=2

Description: Limits the number of dead containers retained per container.
Potential Impact: Helps manage disk space by limiting old container data. Too low a value might hinder debugging efforts for failed containers. Adjust based on the balance between disk usage and debugging needs.
maximum-dead-containers=240

Description: Sets the maximum number of dead containers to retain globally.
Potential Impact: Controls total disk usage for dead containers. Ensure the value fits your node capacity and debugging requirements.
image-gc-high-threshold=85

Description: Triggers image garbage collection when disk usage exceeds this percentage.
Potential Impact: Helps prevent disk space exhaustion. Ensure critical images are not removed prematurely by managing image retention policies effectively.
image-gc-low-threshold=80

Description: Target disk usage after image garbage collection.
Potential Impact: Provides a buffer for disk usage, preventing frequent GC cycles. Generally safe, but ensure your workloads can tolerate image removal.
69 changes: 69 additions & 0 deletions linuxLastConf.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
systemctl disable firewalld.service
swapoff -a; sed -i '/swap/d' /etc/fstab
vim /etc/ssh/sshd_config
AllowTcpForwarding yes
iptables -A INPUT -p tcp --dport 6443 -j ACCEPT

cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF

sudo modprobe overlay
sudo modprobe br_netfilter

cat << EOF >> /etc/sysctl.conf
# SWAP settings
vm.swappiness=0
vm.panic_on_oom=0
vm.overcommit_memory=1
kernel.panic=10
kernel.panic_on_oops=1
vm.max_map_count = 262144

# Have a larger connection range available
net.ipv4.ip_local_port_range=1024 65000

# Increase max connection
net.core.somaxconn=10000

# Reuse closed sockets faster
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_fin_timeout=15

# The maximum number of "backlogged sockets". Default is 128.
net.core.somaxconn=4096
net.core.netdev_max_backlog=4096

# 16MB per socket - which sounds like a lot,
# but will virtually never consume that much.
net.core.rmem_max=16777216
net.core.wmem_max=16777216

# Various network tunables
net.ipv4.tcp_max_syn_backlog=20480
net.ipv4.tcp_max_tw_buckets=400000
net.ipv4.tcp_no_metrics_save=1
net.ipv4.tcp_rmem=4096 87380 16777216
net.ipv4.tcp_syn_retries=2
net.ipv4.tcp_synack_retries=2
net.ipv4.tcp_wmem=4096 65536 16777216

# ARP cache settings for a highly loaded docker swarm
net.ipv4.neigh.default.gc_thresh1=8096
net.ipv4.neigh.default.gc_thresh2=12288
net.ipv4.neigh.default.gc_thresh3=16384

# ip_forward and tcp keepalive for iptables
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.tcp_keepalive_time=600
net.ipv4.ip_forward=1
net.ipv4.conf.all.forwarding=1
net.ipv6.conf.all.forwarding=1

# monitor file system events
fs.inotify.max_user_instances=8192
fs.inotify.max_user_watches=1048576
EOF
sysctl -p > /dev/null 2>&1
Loading

0 comments on commit fe63cb4

Please sign in to comment.