一、在数据节点运行如下脚本

#0、停止kubelet服务
systemctl stop kubelet
#1、在用户目录生成k8s客户段配置保存目录,非必要
mkdir ~/.kube/
#2、将主节点的集群配置复制到该目录,非必要
scp root@192.168.80.200:/etc/kubernetes/admin.conf ~/.kube/config
#3、将集群配置复制到默认的k8s的kubelet配置目录,可以省略1,直接使用2的scp替代cp
cp ~/.kube/config  /etc/kubernetes/kubelet.conf
chown $(id -u):$(id -g) $HOME/.kube/config
二、在主节点运行如下脚本

#从客户端同步key相关内容
scp 192.168.80.xxx:/var/lib/kubelet/pki/* ./
#删除多余的文件
rm -rf kubelet-client-2024*
#重新生成客户端key
openssl req -new -key kubelet.key -out kubelet.csr -subj "/CN=kubelet"
#使用主节点证书生成客户端请求证书
openssl x509 -req -in kubelet.csr -CA /etc/kubernetes/pki/ca.crt -CAkey /etc/kubernetes/pki/ca.key -CAcreateserial -out kubelet.crt -days 3650
#将客户端证书和key生成一个pem文件
cat kubelet.crt kubelet.key > kubelet-client-current.pem
#将生成的pem下发到客户段节点
scp ./* 192.168.80.xxx:/var/lib/kubelet/pki/
#将依据ca下发到客户端节点
scp /etc/kubernetes/pki/ca.crt 192.168.80.xxx:/etc/kubernetes/pki/ca.crt
三、在数据节点执行如下脚本

#4、重启kubelet
systemctl restart kubelet
四、恢复etcd和集群时候的语句

ETCDCTL_API=3 etcdctl snapshot restore /var/backups/kube_etcd/etcd-2023-05-16-09-30-01/snapshot.db --data-dir=/var/lib/etcd --initial-cluster-token k8s_etcd --initial-cluster default=http://192.168.80.200:2380 --initial-advertise-peer-urls=http://192.168.80.200:2380

./kk create cluster -f ./dev.yaml --with-kubernetes v1.21.5 --with-kubesphere v3.2.1
五、零散节点可看不可用修复

scp 192.168.80.205:/etc/kubernetes/manifests/haproxy.yaml  /etc/kubernetes/manifests/haproxy.yaml
mkdir /etc/kubekey/
scp -r 192.168.80.205:/etc/kubekey/haproxy /etc/kubekey/
kubectl apply -f  /etc/kubernetes/manifests/haproxy.yaml
六、节点提示 forbidden sysctl: “net.core.somaxconn” not whitelisted

$ vim /var/lib/kubelet/config.yaml
allowedUnsafeSysctls:
- kernel.shm*
- kernel.msg*
- kernel.sem
- fs.mqueue.*
- net.*
七、手工清除节点地址变化带来的无法连接

conntrack -D
八、pod显示forbidden sysctl: “net.core.somaxconn” not whitelisted

#1、在问题node上,修改kubelet服务启动参数
vim /etc/systemd/system/kubelet.service
#2、允许运行不安全的sysctls
ExecStart=/usr/local/bin/kubelet --allowed-unsafe-sysctls=net.*
#3、修改kubelet配置参数
vim /var/lib/kubelet/config.yaml
#4、允许不安全的参数,添加到文件末尾即可
allowedUnsafeSysctls:
- kernel.shm*
- kernel.msg*
- kernel.sem
- fs.mqueue.*
- net.*
#5、重启kubelet服务
systemctl daemon-reload && systemctl restart kubelet
#6、可能需要驱逐node上现有的无效pod,在主节点上运行,注意所属namespace(项目)
kubectl get pods -n project -o wide | grep SysctlForbidden | awk 'NR>1{print "kubectl delete pod -n project  " $1}' | bash