Ubuntu24.04二进制方式部署v1.33集群
Ubuntu24.04 二进制方式部署 v1.33 集群
- 文档采用离线方式安装(可使用在线安装命令在线安装软件)。
- 文档适用于高可用和非高可用集群部署。
- 高可用集群部署时,如果不特别说明,表示仅在 controller01 节点操作。
1.系统优化
1.1 配置 hosts 解析
所有节点操作:
cat >> /etc/hosts <<-'EOF'
192.168.109.108 apiserver-lb
192.168.109.100 k8s-controller01
192.168.109.101 k8s-controller02
192.168.109.102 k8s-controller03
192.168.109.103 k8s-worker01
192.168.109.104 k8s-worker02
EOF
如果不是高可用集群,使用以下操作:
cat >> /etc/hosts <<-'EOF'
192.168.109.100 k8s-controller
192.168.109.103 k8s-worker01
192.168.109.104 k8s-worker02
EOF
1.2 配置免密登录并同步文件
上传压缩包解压并安装常用软件:
tar xf software.tar.gz
mv software / && cd /software
tar xf dependence.tar.gz && cd dependence/
./install.sh
执行免密登录和同步文件脚本:
cd /software/ && ./setup_ssh_and_distribute-ha.sh
如果不是高可用集群,使用以下操作:
cd /software/ && ./setup_ssh_and_distribute.sh
1.3 安装常用的软件包
所有节点(除 controller01 节点外)安装:
cd /software
tar xf dependence.tar.gz && cd dependence/
./install.sh
在线安装命令(所有节点):
apt -y install bind9-utils expect rsync jq psmisc net-tools lvm2 vim unzip rename
1.4 关闭 ufw
所有节点操作:
systemctl disable --now ufw
1.5 关闭swap分区
所有节点操作:
swapoff -a && sysctl -w vm.swappiness=0
sed -ri '/^[^#]*swap/s@^@#@' /etc/fstab
free -h
1.6 修改时区
所有节点操作:
ln -svf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
date -R
1.7 配置limit
所有节点操作:
ulimit -SHn 65535
cat >> /etc/security/limits.conf <<-'EOF'
* soft nofile 655360
* hard nofile 131072
* soft nproc 655350
* hard nproc 655350
* soft memlock unlimited
* hard memlock unlimited
EOF
1.8 优化sshd服务
所有节点操作:
sed -i 's@#UseDNS yes@UseDNS no@g' /etc/ssh/sshd_config
sed -i 's@^GSSAPIAuthentication yes@GSSAPIAuthentication no@g' /etc/ssh/sshd_config
1.9 Linux内核调优
所有节点操作:
cat > /etc/sysctl.d/k8s.conf <<-'EOF'
# 以下3个参数是containerd所依赖的内核参数
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv6.conf.all.disable_ipv6 = 1
fs.may_detach_mounts = 1
vm.overcommit_memory=1
vm.panic_on_oom=0
fs.inotify.max_user_watches=89100
fs.file-max=52706963
fs.nr_open=52706963
net.netfilter.nf_conntrack_max=2310720
net.ipv4.tcp_keepalive_time = 600
net.ipv4.tcp_keepalive_probes = 3
net.ipv4.tcp_keepalive_intvl =15
net.ipv4.tcp_max_tw_buckets = 36000
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_max_orphans = 327680
net.ipv4.tcp_orphan_retries = 3
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_max_syn_backlog = 16384
net.ipv4.ip_conntrack_max = 65536
net.ipv4.tcp_max_syn_backlog = 16384
net.ipv4.tcp_timestamps = 0
net.core.somaxconn = 16384
EOF
sysctl --system
1.10 安装并配置ipvsadm
所有节点操作:
cd /software
tar xf ipvsadm.tar.gz && cd ipvsadm/
./install.sh
在线安装命令:
apt -y install ipvsadm ipset sysstat conntrack libseccomp
所有节点操作:
cat > /etc/modules-load.d/ipvs.conf <<-'EOF'
ip_vs
ip_vs_lc
ip_vs_wlc
ip_vs_rr
ip_vs_wrr
ip_vs_lblc
ip_vs_lblcr
ip_vs_dh
ip_vs_sh
ip_vs_fo
ip_vs_nq
ip_vs_sed
ip_vs_ftp
ip_vs_sh
nf_conntrack
ip_tables
ip_set
xt_set
ipt_set
ipt_rpfilter
ipt_REJECT
ipip
EOF
重启所有节点并验证:
lsmod | grep --color=auto -e ip_vs -e nf_conntrack
2.containerd 部署
2.1 安装软件
所有节点操作:
cd /software
tar xf containerd.tar.gz && cd containerd/
./install.sh
在线安装命令:
# step 1: 安装必要的一些系统工具
apt-get update
apt-get install ca-certificates curl gnupg
# step 2: 信任 Docker 的 GPG 公钥
install -m 0755 -d /etc/apt/keyrings
curl -fsSL https://mirrors.aliyun.com/docker-ce/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg
chmod a+r /etc/apt/keyrings/docker.gpg
# Step 3: 写入软件源信息
echo \
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://mirrors.aliyun.com/docker-ce/linux/ubuntu \
"$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
# Step 4: 更新软件源并安装 Containerd
apt-get update
apt-get -y install containerd.io
2.2 模块配置
所有节点操作:
modprobe -- overlay
modprobe -- br_netfilter
cat > /etc/modules-load.d/containerd.conf <<-'EOF'
overlay
br_netfilter
EOF
2.3 修改配置文件
所有节点操作:
containerd config default | tee /etc/containerd/config.toml
sed -ri 's#(SystemdCgroup = )false#\1true#' /etc/containerd/config.toml
grep SystemdCgroup /etc/containerd/config.toml
sed -i 's#registry.k8s.io#registry.cn-hangzhou.aliyuncs.com/google_containers#' /etc/containerd/config.toml
grep sandbox_image /etc/containerd/config.toml
2.4 启动服务
所有节点操作:
systemctl daemon-reload
systemctl enable --now containerd
systemctl status containerd
2.5 配置客户端连接地址
所有节点操作:
cat > /etc/crictl.yaml <<-'EOF'
runtime-endpoint: unix:///run/containerd/containerd.sock
image-endpoint: unix:///run/containerd/containerd.sock
timeout: 10
debug: false
EOF
systemctl restart containerd
ctr version
2.6 拉取镜像测试
所有节点操作:
ctr -n k8s.io i pull docker.m.daocloud.io/alpine:latest
ctr -n k8s.io i ls|grep alpine
ctr -n k8s.io i rm docker.m.daocloud.io/alpine:latest
3.软件配置
3.1 etcd 软件配置
所有 controller 节点操作:
cd /software
tar -xf etcd-v3.6.4-linux-amd64.tar.gz --strip-components=1 -C /usr/local/bin etcd-v3.6.4-linux-amd64/etcd{,ctl}
etcdctl version
etcd 下载地址:https://github.com/etcd-io/etcd/releases
3.2 k8s 软件配置
所有 controller 节点操作:
cd /software
tar -xf kubernetes-server-linux-amd64.tar.gz --strip-components=3 -C /usr/local/bin kubernetes/server/bin/kube{let,ctl,-apiserver,-controller-manager,-scheduler,-proxy}
k8s 软件下载地址(选择安装版本的 Server Binaries 下载):https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.33.md
编写脚本并拷贝软件到其他节点:
mkdir /script
cat > /script/copy_software.sh <<-'EOF'
#!/bin/bash
# k8s 二进制文件分发脚本(带结果统计)
# 需要分发的节点
WORKER_NODES=("k8s-worker01" "k8s-worker02")
# 目录
SRC_DIR="/usr/local/bin"
DST_DIR="/usr/local/bin"
# 文件
BIN_FILES=("kubelet" "kube-proxy")
# 统计
SUCCESS_COUNT=0
FAIL_COUNT=0
FAILED_NODES=()
echo "开始分发 kubelet / kube-proxy 文件..."
# 循环节点
for node in "${WORKER_NODES[@]}"; do
echo ">>> 处理 Worker 节点: $node"
NODE_SUCCESS=true
for file in "${BIN_FILES[@]}"; do
if [[ -f "$SRC_DIR/$file" ]]; then
scp -q "$SRC_DIR/$file" "$node:$DST_DIR/"
if [[ $? -eq 0 ]]; then
echo " [OK] $file 已传输到 $node:$DST_DIR"
ssh "$node" "chmod +x $DST_DIR/$file"
else
echo " [ERROR] $file 传输到 $node 失败"
NODE_SUCCESS=false
fi
else
echo " [WARN] 本地缺少文件: $SRC_DIR/$file"
NODE_SUCCESS=false
fi
done
if $NODE_SUCCESS; then
((SUCCESS_COUNT++))
else
((FAIL_COUNT++))
FAILED_NODES+=("$node")
fi
done
# 总结
echo "======================"
echo "分发完成"
echo "成功节点数量: $SUCCESS_COUNT"
echo "失败节点数量: $FAIL_COUNT"
if [[ $FAIL_COUNT -gt 0 ]]; then
echo "失败节点列表: ${FAILED_NODES[*]}"
fi
echo "======================"
EOF
chmod +x /script/copy_software.sh
/script/copy_software.sh
所有节点操作:
kubelet --version
4.证书软件配置
解压软件包并配置:
cd /software
tar xf cfssl.tar.gz && cd cfssl
rename -v "s/_1.6.5_linux_amd64//g" cfssl*
mv cfssl* /usr/local/bin/
chmod +x /usr/local/bin/cfssl*
cfssl 软件下载地址:https://github.com/cloudflare/cfssl/releases
查看软件版本:
cfssl version
5.生成证书
5.1 生成 etcd 证书
创建证书目录:
mkdir /etc/etcd/ssl -p
生成 etcd 证书私钥和公钥:
cd /software/
tar xf pki.tar.gz && cd pki
cfssl gencert -initca etcd-ca-csr.json | cfssljson -bare /etc/etcd/ssl/etcd-ca
etcd-ca-csr.json 文件内容:
{
"CN": "etcd",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "Chongqing",
"L": "Chongqing",
"O": "etcd",
"OU": "Etcd Security"
}
],
"ca": {
"expiry": "876000h"
}
}
指定 etcd 证书私钥和公钥生成证书:
cfssl gencert \
-ca=/etc/etcd/ssl/etcd-ca.pem \
-ca-key=/etc/etcd/ssl/etcd-ca-key.pem \
-config=ca-config.json \
-hostname=127.0.0.1,k8s-controller01,k8s-controller02,k8s-controller03,192.168.109.100,192.168.109.101,192.168.109.102 \
-profile=kubernetes \
etcd-csr.json | cfssljson -bare /etc/etcd/ssl/etcd
ca-config.json 文件内容:
{
"signing": {
"default": {
"expiry": "876000h"
},
"profiles": {
"kubernetes": {
"usages": [
"signing",
"key encipherment",
"server auth",
"client auth"
],
"expiry": "876000h"
}
}
}
}
etcd-csr.json 文件内容:
{
"CN": "etcd",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "Chongqing",
"L": "Chongqing",
"O": "etcd",
"OU": "Etcd Security"
}
]
}
如果不是高可用集群,使用以下操作:
cfssl gencert \
-ca=/etc/etcd/ssl/etcd-ca.pem \
-ca-key=/etc/etcd/ssl/etcd-ca-key.pem \
-config=ca-config.json \
-hostname=127.0.0.1,k8s-controller01,192.168.109.100 \
-profile=kubernetes \
etcd-csr.json | cfssljson -bare /etc/etcd/ssl/etcd
创建脚本并拷贝证书到其余 controller 节点:
cat > /script/copy_etcd_certificate.sh <<-'EOF'
#!/bin/bash
# etcd证书分发脚本(带结果统计)
# 需要分发的节点
NODES=("k8s-controller02" "k8s-controller03")
# 需要分发的文件
FILES=(
"etcd-ca-key.pem"
"etcd-ca.pem"
"etcd-key.pem"
"etcd.pem"
)
SRC_DIR="/etc/etcd/ssl"
DST_DIR="/etc/etcd/ssl"
SUCCESS_COUNT=0
FAIL_COUNT=0
FAILED_NODES=()
echo "开始分发etcd证书..."
# 循环节点
for node in "${NODES[@]}"; do
echo ">>> 处理节点: $node"
ssh "$node" "mkdir -p $DST_DIR"
NODE_SUCCESS=true
for file in "${FILES[@]}"; do
if [[ -f "$SRC_DIR/$file" ]]; then
scp -q "$SRC_DIR/$file" "$node:$DST_DIR/"
if [[ $? -eq 0 ]]; then
echo " [OK] $file 已传输到 $node:$DST_DIR"
else
echo " [ERROR] $file 传输到 $node 失败"
NODE_SUCCESS=false
fi
else
echo " [WARN] 本地缺少文件: $SRC_DIR/$file"
NODE_SUCCESS=false
fi
done
if $NODE_SUCCESS; then
((SUCCESS_COUNT++))
else
((FAIL_COUNT++))
FAILED_NODES+=("$node")
fi
done
echo "======================"
echo "证书分发完成"
echo "成功节点数量: $SUCCESS_COUNT"
echo "失败节点数量: $FAIL_COUNT"
if [[ $FAIL_COUNT -gt 0 ]]; then
echo "失败节点列表: ${FAILED_NODES[*]}"
fi
echo "======================"
EOF
chmod +x /script/copy_etcd_certificate.sh
/script/copy_etcd_certificate.sh
所有 controller 节点可以看到 4 个证书:
ll /etc/etcd/ssl/*.pem
5.2 生成 k8s 相关证书
5.2.1 创建证书目录
创建目录存放证书:
mkdir -p /etc/kubernetes/pki && cd pki
5.2.2 生成 ca 证书
生成 k8s ca 证书私钥和公钥:
cfssl gencert -initca ca-csr.json | cfssljson -bare /etc/kubernetes/pki/ca
ca-csr.json 文件内容:
{
"CN": "kubernetes",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "Chongqing",
"L": "Chongqing",
"O": "Kubernetes",
"OU": "Kubernetes-manual"
}
],
"ca": {
"expiry": "876000h"
}
}
5.2.3 生成 apiserver 证书
生成 apiserver 证书私钥和公钥:
cfssl gencert \
-ca=/etc/kubernetes/pki/ca.pem \
-ca-key=/etc/kubernetes/pki/ca-key.pem \
-config=ca-config.json \
-hostname=10.96.0.1,192.168.109.108,127.0.0.1,kubernetes,kubernetes.default,kubernetes.default.svc,kubernetes.default.svc.cluster,kubernetes.default.svc.cluster.local,192.168.109.100,192.168.109.101,192.168.109.102,192.168.109.103,192.168.109.104,192.168.109.108 \
-profile=kubernetes apiserver-csr.json | cfssljson -bare /etc/kubernetes/pki/apiserver
ca-config.json 文件内容见前面 5.1 章节。
apiserver-csr.json 文件内容:
{
"CN": "kube-apiserver",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "Chongqing",
"L": "Chongqing",
"O": "Kubernetes",
"OU": "Kubernetes-manual"
}
]
}
如果不是高可用集群,使用以下操作:
cfssl gencert \
-ca=/etc/kubernetes/pki/ca.pem \
-ca-key=/etc/kubernetes/pki/ca-key.pem \
-config=ca-config.json \
-hostname=10.96.0.1,192.168.109.100,127.0.0.1,kubernetes,kubernetes.default,kubernetes.default.svc,kubernetes.default.svc.cluster,kubernetes.default.svc.cluster.local,192.168.109.100,192.168.109.103,192.168.109.104 \
-profile=kubernetes apiserver-csr.json | cfssljson -bare /etc/kubernetes/pki/apiserver
5.2.4 生成 apiserver 聚合证书
生成聚合 ca 证书私钥和公钥:
cfssl gencert -initca front-proxy-ca-csr.json | cfssljson -bare /etc/kubernetes/pki/front-proxy-ca
front-proxy-ca-csr.json 文件内容:
{
"CN": "kubernetes",
"key": {
"algo": "rsa",
"size": 2048
},
"ca": {
"expiry": "876000h"
}
}
指定 ca 证书私钥和公钥生成前端代理客户端的证书:
cfssl gencert \
-ca=/etc/kubernetes/pki/front-proxy-ca.pem \
-ca-key=/etc/kubernetes/pki/front-proxy-ca-key.pem \
-config=ca-config.json \
-profile=kubernetes front-proxy-client-csr.json | cfssljson -bare /etc/kubernetes/pki/front-proxy-client
ca-config.json 文件内容见前面 5.1 章节。
front-proxy-client-csr.json 文件内容:
{
"CN": "front-proxy-client",
"key": {
"algo": "rsa",
"size": 2048
}
}
5.2.5 生成 controller-manager 证书及 kubeconfig 文件
基于自建 kubernetes ca 证书生成 controller-manager 证书文件:
cfssl gencert \
-ca=/etc/kubernetes/pki/ca.pem \
-ca-key=/etc/kubernetes/pki/ca-key.pem \
-config=ca-config.json \
-profile=kubernetes \
manager-csr.json | cfssljson -bare /etc/kubernetes/pki/controller-manager
ca-config.json 文件内容见前面 5.1 章节。
manager-csr.json 文件内容:
{
"CN": "system:kube-controller-manager",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "Chongqing",
"L": "Chongqing",
"O": "system:kube-controller-manager",
"OU": "Kubernetes-manual"
}
]
}
设置一个集群:
kubectl config set-cluster kubernetes \
--certificate-authority=/etc/kubernetes/pki/ca.pem \
--embed-certs=true \
--server=https://192.168.109.108:9443 \
--kubeconfig=/etc/kubernetes/controller-manager.kubeconfig
如果不是高可用集群,使用以下操作:
kubectl config set-cluster kubernetes \
--certificate-authority=/etc/kubernetes/pki/ca.pem \
--embed-certs=true \
--server=https://192.168.109.100:6443 \
--kubeconfig=/etc/kubernetes/controller-manager.kubeconfig
设置一个用户项:
kubectl config set-context system:kube-controller-manager@kubernetes \
--cluster=kubernetes \
--user=system:kube-controller-manager \
--kubeconfig=/etc/kubernetes/controller-manager.kubeconfig
设置一个上下文环境(关联前面的集群和用户):
kubectl config set-credentials system:kube-controller-manager \
--client-certificate=/etc/kubernetes/pki/controller-manager.pem \
--client-key=/etc/kubernetes/pki/controller-manager-key.pem \
--embed-certs=true \
--kubeconfig=/etc/kubernetes/controller-manager.kubeconfig
使用默认的上下文:
kubectl config use-context system:kube-controller-manager@kubernetes \
--kubeconfig=/etc/kubernetes/controller-manager.kubeconfig
5.2.6 生成 scheduler 证书及 kubeconfig 文件
基于自建 kubernetes ca 证书生成 scheduler 证书文件:
cfssl gencert \
-ca=/etc/kubernetes/pki/ca.pem \
-ca-key=/etc/kubernetes/pki/ca-key.pem \
-config=ca-config.json \
-profile=kubernetes \
scheduler-csr.json | cfssljson -bare /etc/kubernetes/pki/scheduler
ca-config.json 文件内容见前面 5.1 章节。
scheduler-csr.json 文件内容:
{
"CN": "system:kube-scheduler",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "Chongqing",
"L": "Chongqing",
"O": "system:kube-scheduler",
"OU": "Kubernetes-manual"
}
]
}
设置一个集群:
kubectl config set-cluster kubernetes \
--certificate-authority=/etc/kubernetes/pki/ca.pem \
--embed-certs=true \
--server=https://192.168.109.108:9443 \
--kubeconfig=/etc/kubernetes/scheduler.kubeconfig
如果不是高可用集群,使用以下操作:
kubectl config set-cluster kubernetes \
--certificate-authority=/etc/kubernetes/pki/ca.pem \
--embed-certs=true \
--server=https://192.168.109.100:6443 \
--kubeconfig=/etc/kubernetes/scheduler.kubeconfig
设置一个用户项:
kubectl config set-credentials system:kube-scheduler \
--client-certificate=/etc/kubernetes/pki/scheduler.pem \
--client-key=/etc/kubernetes/pki/scheduler-key.pem \
--embed-certs=true \
--kubeconfig=/etc/kubernetes/scheduler.kubeconfig
设置一个上下文环境:
kubectl config set-context system:kube-scheduler@kubernetes \
--cluster=kubernetes \
--user=system:kube-scheduler \
--kubeconfig=/etc/kubernetes/scheduler.kubeconfig
使用默认的上下文:
kubectl config use-context system:kube-scheduler@kubernetes \
--kubeconfig=/etc/kubernetes/scheduler.kubeconfig
5.2.7 配置 k8s 集群管理员证书及 kubeconfig 文件
基于自建 kubernetes ca 证书生成 k8s 集群管理员证书:
cfssl gencert \
-ca=/etc/kubernetes/pki/ca.pem \
-ca-key=/etc/kubernetes/pki/ca-key.pem \
-config=ca-config.json \
-profile=kubernetes \
admin-csr.json | cfssljson -bare /etc/kubernetes/pki/admin
ca-config.json 文件内容见前面 5.1 章节。
admin-csr.json 文件内容:
{
"CN": "admin",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "Chongqing",
"L": "Chongqing",
"O": "system:masters",
"OU": "Kubernetes-manual"
}
]
}
设置一个集群:
kubectl config set-cluster kubernetes \
--certificate-authority=/etc/kubernetes/pki/ca.pem \
--embed-certs=true \
--server=https://192.168.109.108:9443 \
--kubeconfig=/etc/kubernetes/admin.kubeconfig
如果不是高可用集群,使用以下操作:
kubectl config set-cluster kubernetes \
--certificate-authority=/etc/kubernetes/pki/ca.pem \
--embed-certs=true \
--server=https://192.168.109.100:6443 \
--kubeconfig=/etc/kubernetes/admin.kubeconfig
设置一个用户项:
kubectl config set-credentials kubernetes-admin \
--client-certificate=/etc/kubernetes/pki/admin.pem \
--client-key=/etc/kubernetes/pki/admin-key.pem \
--embed-certs=true \
--kubeconfig=/etc/kubernetes/admin.kubeconfig
设置一个上下文环境:
kubectl config set-context kubernetes-admin@kubernetes \
--cluster=kubernetes \
--user=kubernetes-admin \
--kubeconfig=/etc/kubernetes/admin.kubeconfig
使用默认的上下文:
kubectl config use-context kubernetes-admin@kubernetes --kubeconfig=/etc/kubernetes/admin.kubeconfig
5.2.8 生成 kube-proxy 证书及 kubeconfig 文件
基于自建 kubernetes ca 证书生成 kube-proxy 证书文件:
cfssl gencert \
-ca=/etc/kubernetes/pki/ca.pem \
-ca-key=/etc/kubernetes/pki/ca-key.pem \
-config=ca-config.json \
-profile=kubernetes \
kube-proxy-csr.json | cfssljson -bare /etc/kubernetes/pki/kube-proxy
ca-config.json 文件内容见前面 5.1 章节。
kube-proxy-csr.json 文件内容:
{
"CN": "system:kube-proxy",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "Chongqing",
"L": "Chongqing",
"O": "system:kube-proxy",
"OU": "Kubernetes-manual"
}
]
}
设置一个集群:
kubectl config set-cluster kubernetes \
--certificate-authority=/etc/kubernetes/pki/ca.pem \
--embed-certs=true \
--server=https://192.168.109.108:9443 \
--kubeconfig=/etc/kubernetes/kube-proxy.kubeconfig
如果不是高可用集群,使用以下操作:
kubectl config set-cluster kubernetes \
--certificate-authority=/etc/kubernetes/pki/ca.pem \
--embed-certs=true \
--server=https://192.168.109.100:6443 \
--kubeconfig=/etc/kubernetes/kube-proxy.kubeconfig
设置一个用户项:
kubectl config set-credentials kube-proxy \
--client-certificate=/etc/kubernetes/pki/kube-proxy.pem \
--client-key=/etc/kubernetes/pki/kube-proxy-key.pem \
--embed-certs=true \
--kubeconfig=/etc/kubernetes/kube-proxy.kubeconfig
设置一个上下文环境:
kubectl config set-context kube-proxy@kubernetes \
--cluster=kubernetes \
--user=kube-proxy \
--kubeconfig=/etc/kubernetes/kube-proxy.kubeconfig
使用默认的上下文:
kubectl config use-context kube-proxy@kubernetes --kubeconfig=/etc/kubernetes/kube-proxy.kubeconfig
5.2.9 创建ServiceAccount Key
openssl genrsa -out /etc/kubernetes/pki/sa.key 2048
openssl rsa -in /etc/kubernetes/pki/sa.key -pubout -out /etc/kubernetes/pki/sa.pub
验证证书和 kubeconfig 文件数量
输出证书数量应为 26,kubeconfig 文件数量应为 4:
ls /etc/kubernetes/pki/ |wc -l
ls /etc/kubernetes/*.kubeconfig|wc -l
5.2.10 拷贝证书到其余 controller 节点
cat > /script/copy_kubeconfig_certificate.sh <<-'EOF'
#!/bin/bash
# k8s 证书 + kubeconfig 分发脚本(带结果统计)
# 控制节点(证书 + kubeconfig)
NODES=("k8s-controller02" "k8s-controller03")
# 工作节点(只需要 kube-proxy.kubeconfig)
WORKER_NODES=("k8s-worker01" "k8s-worker02")
# 证书目录
SRC_CERT_DIR="/etc/kubernetes/pki/"
DST_CERT_DIR="/etc/kubernetes/pki/"
CERT_FILES=("$SRC_CERT_DIR"/*)
# kubeconfig 目录(只选取以 .kubeconfig 结尾的文件)
SRC_KUBECONFIG_DIR="/etc/kubernetes"
DST_KUBECONFIG_DIR="/etc/kubernetes"
KUBECONFIG_FILES=("$SRC_KUBECONFIG_DIR"/*.kubeconfig)
# kube-proxy kubeconfig 文件
KUBEPROXY_FILE="$SRC_KUBECONFIG_DIR/kube-proxy.kubeconfig"
# 统计
SUCCESS_COUNT=0
FAIL_COUNT=0
FAILED_NODES=()
echo "开始分发 etcd 证书和 kubeconfig 文件..."
# === 控制节点分发 ===
for node in "${NODES[@]}"; do
echo ">>> 处理控制节点: $node"
NODE_SUCCESS=true
# === 分发证书文件 ===
ssh "$node" "mkdir -p $DST_CERT_DIR"
for file in "${CERT_FILES[@]}"; do
if [[ -f "$file" ]]; then
scp -q "$file" "$node:$DST_CERT_DIR/"
if [[ $? -eq 0 ]]; then
echo " [OK] $(basename "$file") 已传输到 $node:$DST_CERT_DIR"
else
echo " [ERROR] $(basename "$file") 传输到 $node 失败"
NODE_SUCCESS=false
fi
else
echo " [WARN] 本地缺少文件: $file"
NODE_SUCCESS=false
fi
done
# === 分发 kubeconfig 文件(只处理 .kubeconfig) ===
ssh "$node" "mkdir -p $DST_KUBECONFIG_DIR"
for file in "${KUBECONFIG_FILES[@]}"; do
if [[ -f "$file" ]]; then
scp -q "$file" "$node:$DST_KUBECONFIG_DIR/"
if [[ $? -eq 0 ]]; then
echo " [OK] $(basename "$file") 已传输到 $node:$DST_KUBECONFIG_DIR"
else
echo " [ERROR] $(basename "$file") 传输到 $node 失败"
NODE_SUCCESS=false
fi
else
echo " [WARN] 本地缺少文件: $file"
NODE_SUCCESS=false
fi
done
# 节点统计
if $NODE_SUCCESS; then
((SUCCESS_COUNT++))
else
((FAIL_COUNT++))
FAILED_NODES+=("$node")
fi
done
# === 工作节点分发 kube-proxy.kubeconfig ===
for node in "${WORKER_NODES[@]}"; do
echo ">>> 处理工作节点: $node"
NODE_SUCCESS=true
ssh "$node" "mkdir -p $DST_KUBECONFIG_DIR"
if [[ -f "$KUBEPROXY_FILE" ]]; then
scp -q "$KUBEPROXY_FILE" "$node:$DST_KUBECONFIG_DIR/"
if [[ $? -eq 0 ]]; then
echo " [OK] kube-proxy.kubeconfig 已传输到 $node:$DST_KUBECONFIG_DIR"
else
echo " [ERROR] kube-proxy.kubeconfig 传输到 $node 失败"
NODE_SUCCESS=false
fi
else
echo " [WARN] 本地缺少文件: $KUBEPROXY_FILE"
NODE_SUCCESS=false
fi
# 节点统计
if $NODE_SUCCESS; then
((SUCCESS_COUNT++))
else
((FAIL_COUNT++))
FAILED_NODES+=("$node")
fi
done
# === 总结 ===
echo "======================"
echo "分发完成"
echo "成功节点数量: $SUCCESS_COUNT"
echo "失败节点数量: $FAIL_COUNT"
if [[ $FAIL_COUNT -gt 0 ]]; then
echo "失败节点列表: ${FAILED_NODES[*]}"
fi
echo "======================"
EOF
chmod +x /script/copy_kubeconfig_certificate.sh
/script/copy_kubeconfig_certificate.sh
如果不是高可用集群,使用以下操作并在 worker 节点验证:
cat > /script/copy_kubeconfig_certificate_worker.sh <<-'EOF'
#!/bin/bash
# k8s kube-proxy.kubeconfig 分发脚本(只处理工作节点,带结果统计)
# 工作节点(只需要 kube-proxy.kubeconfig)
WORKER_NODES=("k8s-worker01" "k8s-worker02")
# kubeconfig 目录
SRC_KUBECONFIG_DIR="/etc/kubernetes"
DST_KUBECONFIG_DIR="/etc/kubernetes"
# kube-proxy kubeconfig 文件
KUBEPROXY_FILE="$SRC_KUBECONFIG_DIR/kube-proxy.kubeconfig"
# 统计
SUCCESS_COUNT=0
FAIL_COUNT=0
FAILED_NODES=()
echo "开始分发 kube-proxy.kubeconfig 文件到工作节点..."
# === 工作节点分发 kube-proxy.kubeconfig ===
for node in "${WORKER_NODES[@]}"; do
echo ">>> 处理工作节点: $node"
NODE_SUCCESS=true
ssh "$node" "mkdir -p $DST_KUBECONFIG_DIR"
if [[ -f "$KUBEPROXY_FILE" ]]; then
scp -q "$KUBEPROXY_FILE" "$node:$DST_KUBECONFIG_DIR/"
if [[ $? -eq 0 ]]; then
echo " [OK] kube-proxy.kubeconfig 已传输到 $node:$DST_KUBECONFIG_DIR"
else
echo " [ERROR] kube-proxy.kubeconfig 传输到 $node 失败"
NODE_SUCCESS=false
fi
else
echo " [WARN] 本地缺少文件: $KUBEPROXY_FILE"
NODE_SUCCESS=false
fi
# 节点统计
if $NODE_SUCCESS; then
((SUCCESS_COUNT++))
else
((FAIL_COUNT++))
FAILED_NODES+=("$node")
fi
done
# === 总结 ===
echo "======================"
echo "分发完成"
echo "成功节点数量: $SUCCESS_COUNT"
echo "失败节点数量: $FAIL_COUNT"
if [[ $FAIL_COUNT -gt 0 ]]; then
echo "失败节点列表: ${FAILED_NODES[*]}"
fi
echo "======================"
EOF
chmod +x /script/copy_kubeconfig_certificate_worker.sh
/script/copy_kubeconfig_certificate_worker.sh
其余 controller 节点验证:
ls /etc/kubernetes/pki/ |wc -l
ls /etc/kubernetes/*.kubeconfig|wc -l
worker 节点验证(有 kube-proxy.kubeconfig 文件):
ls /etc/kubernetes/
6.etcd 集群部署
6.1 创建配置文件
k8s-controller01 节点配置:
cat > /etc/etcd/etcd.config.yml << EOF
name: 'k8s-controller01'
data-dir: /var/lib/etcd
wal-dir: /var/lib/etcd/wal
snapshot-count: 5000
heartbeat-interval: 100
election-timeout: 1000
quota-backend-bytes: 0
listen-peer-urls: 'https://192.168.109.100:2380'
listen-client-urls: 'https://192.168.109.100:2379,http://127.0.0.1:2379'
max-snapshots: 3
max-wals: 5
cors:
initial-advertise-peer-urls: 'https://192.168.109.100:2380'
advertise-client-urls: 'https://192.168.109.100:2379'
discovery:
discovery-fallback: 'proxy'
discovery-proxy:
discovery-srv:
initial-cluster: 'k8s-controller01=https://192.168.109.100:2380,k8s-controller02=https://192.168.109.101:2380,k8s-controller03=https://192.168.109.102:2380'
initial-cluster-token: 'etcd-k8s-cluster'
initial-cluster-state: 'new'
strict-reconfig-check: false
enable-v2: true
enable-pprof: true
proxy: 'off'
proxy-failure-wait: 5000
proxy-refresh-interval: 30000
proxy-dial-timeout: 1000
proxy-write-timeout: 5000
proxy-read-timeout: 0
client-transport-security:
cert-file: '/etc/etcd/ssl/etcd.pem'
key-file: '/etc/etcd/ssl/etcd-key.pem'
client-cert-auth: true
trusted-ca-file: '/etc/etcd/ssl/etcd-ca.pem'
auto-tls: true
peer-transport-security:
cert-file: '/etc/etcd/ssl/etcd.pem'
key-file: '/etc/etcd/ssl/etcd-key.pem'
peer-client-cert-auth: true
trusted-ca-file: '/etc/etcd/ssl/etcd-ca.pem'
auto-tls: true
debug: false
log-package-levels:
log-outputs: [default]
force-new-cluster: false
EOF
k8s-controller02 节点配置:
cat > /etc/etcd/etcd.config.yml << EOF
name: 'k8s-controller02'
data-dir: /var/lib/etcd
wal-dir: /var/lib/etcd/wal
snapshot-count: 5000
heartbeat-interval: 100
election-timeout: 1000
quota-backend-bytes: 0
listen-peer-urls: 'https://192.168.109.101:2380'
listen-client-urls: 'https://192.168.109.101:2379,http://127.0.0.1:2379'
max-snapshots: 3
max-wals: 5
cors:
initial-advertise-peer-urls: 'https://192.168.109.101:2380'
advertise-client-urls: 'https://192.168.109.101:2379'
discovery:
discovery-fallback: 'proxy'
discovery-proxy:
discovery-srv:
initial-cluster: 'k8s-controller01=https://192.168.109.100:2380,k8s-controller02=https://192.168.109.101:2380,k8s-controller03=https://192.168.109.102:2380'
initial-cluster-token: 'etcd-k8s-cluster'
initial-cluster-state: 'new'
strict-reconfig-check: false
enable-v2: true
enable-pprof: true
proxy: 'off'
proxy-failure-wait: 5000
proxy-refresh-interval: 30000
proxy-dial-timeout: 1000
proxy-write-timeout: 5000
proxy-read-timeout: 0
client-transport-security:
cert-file: '/etc/etcd/ssl/etcd.pem'
key-file: '/etc/etcd/ssl/etcd-key.pem'
client-cert-auth: true
trusted-ca-file: '/etc/etcd/ssl/etcd-ca.pem'
auto-tls: true
peer-transport-security:
cert-file: '/etc/etcd/ssl/etcd.pem'
key-file: '/etc/etcd/ssl/etcd-key.pem'
peer-client-cert-auth: true
trusted-ca-file: '/etc/etcd/ssl/etcd-ca.pem'
auto-tls: true
debug: false
log-package-levels:
log-outputs: [default]
force-new-cluster: false
EOF
k8s-controller03 节点配置:
cat > /etc/etcd/etcd.config.yml << EOF
name: 'k8s-controller03'
data-dir: /var/lib/etcd
wal-dir: /var/lib/etcd/wal
snapshot-count: 5000
heartbeat-interval: 100
election-timeout: 1000
quota-backend-bytes: 0
listen-peer-urls: 'https://192.168.109.102:2380'
listen-client-urls: 'https://192.168.109.102:2379,http://127.0.0.1:2379'
max-snapshots: 3
max-wals: 5
cors:
initial-advertise-peer-urls: 'https://192.168.109.102:2380'
advertise-client-urls: 'https://192.168.109.102:2379'
discovery:
discovery-fallback: 'proxy'
discovery-proxy:
discovery-srv:
initial-cluster: 'k8s-controller01=https://192.168.109.100:2380,k8s-controller02=https://192.168.109.101:2380,k8s-controller03=https://192.168.109.102:2380'
initial-cluster-token: 'etcd-k8s-cluster'
initial-cluster-state: 'new'
strict-reconfig-check: false
enable-v2: true
enable-pprof: true
proxy: 'off'
proxy-failure-wait: 5000
proxy-refresh-interval: 30000
proxy-dial-timeout: 1000
proxy-write-timeout: 5000
proxy-read-timeout: 0
client-transport-security:
cert-file: '/etc/etcd/ssl/etcd.pem'
key-file: '/etc/etcd/ssl/etcd-key.pem'
client-cert-auth: true
trusted-ca-file: '/etc/etcd/ssl/etcd-ca.pem'
auto-tls: true
peer-transport-security:
cert-file: '/etc/etcd/ssl/etcd.pem'
key-file: '/etc/etcd/ssl/etcd-key.pem'
peer-client-cert-auth: true
trusted-ca-file: '/etc/etcd/ssl/etcd-ca.pem'
auto-tls: true
debug: false
log-package-levels:
log-outputs: [default]
force-new-cluster: false
EOF
如果不是高可用集群,使用以下操作:
cat > /etc/etcd/etcd.config.yml << EOF
name: 'k8s-controller01'
data-dir: /var/lib/etcd
wal-dir: /var/lib/etcd/wal
snapshot-count: 5000
heartbeat-interval: 100
election-timeout: 1000
quota-backend-bytes: 0
listen-peer-urls: 'https://192.168.109.100:2380'
listen-client-urls: 'https://192.168.109.100:2379,http://127.0.0.1:2379'
max-snapshots: 3
max-wals: 5
cors:
initial-advertise-peer-urls: 'https://192.168.109.100:2380'
advertise-client-urls: 'https://192.168.109.100:2379'
discovery:
discovery-fallback: 'proxy'
discovery-proxy:
discovery-srv:
initial-cluster: 'k8s-controller01=https://192.168.109.100:2380'
initial-cluster-token: 'etcd-k8s'
initial-cluster-state: 'new'
strict-reconfig-check: false
enable-v2: true
enable-pprof: true
proxy: 'off'
proxy-failure-wait: 5000
proxy-refresh-interval: 30000
proxy-dial-timeout: 1000
proxy-write-timeout: 5000
proxy-read-timeout: 0
client-transport-security:
cert-file: '/etc/etcd/ssl/etcd.pem'
key-file: '/etc/etcd/ssl/etcd-key.pem'
client-cert-auth: true
trusted-ca-file: '/etc/etcd/ssl/etcd-ca.pem'
auto-tls: true
peer-transport-security:
cert-file: '/etc/etcd/ssl/etcd.pem'
key-file: '/etc/etcd/ssl/etcd-key.pem'
peer-client-cert-auth: true
trusted-ca-file: '/etc/etcd/ssl/etcd-ca.pem'
auto-tls: true
debug: false
log-package-levels:
log-outputs: [default]
force-new-cluster: false
EOF
6.2 创建 service 文件
所有 controller 节点配置:
cat > /usr/lib/systemd/system/etcd.service << EOF
[Unit]
Description=Etcd Service
Documentation=https://coreos.com/etcd/docs/latest/
After=network.target
[Service]
Type=notify
ExecStart=/usr/local/bin/etcd --config-file=/etc/etcd/etcd.config.yml
Restart=on-failure
RestartSec=10
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
Alias=etcd3.service
EOF
6.3 启动服务并验证
所有 controller 节点操作:
systemctl daemon-reload
systemctl enable --now etcd.service
systemctl restart etcd.service
systemctl status etcd.service
任意 controller 节点操作:
export ETCDCTL_API=3
etcdctl --endpoints="192.168.109.102:2379,192.168.109.101:2379,192.168.109.100:2379" --cacert=/etc/etcd/ssl/etcd-ca.pem --cert=/etc/etcd/ssl/etcd.pem --key=/etc/etcd/ssl/etcd-key.pem endpoint status --write-out=table
如果不是高可用集群,使用以下操作:
export ETCDCTL_API=3 etcdctl --endpoints="192.168.109.100:2379" --cacert=/etc/etcd/ssl/etcd-ca.pem --cert=/etc/etcd/ssl/etcd.pem --key=/etc/etcd/ssl/etcd-key.pem endpoint status --write-out=table
7.高可用组件安装
如果不是高可用集群,跳过此步骤。
7.1 安装软件
所有 controller 节点操作:
cd /software/
tar xf ha.tar.gz && cd ha
./install.sh
在线安装命令:
apt -y install keepalived haproxy
7.2 haproxy 软件配置
所有 controller 节点操作:
cp /etc/haproxy/haproxy.cfg /etc/haproxy/haproxy.cfg.bak
cat >/etc/haproxy/haproxy.cfg<<"EOF"
global
maxconn 2000
ulimit-n 16384
log 127.0.0.1 local0 err
stats timeout 30s
defaults
log global
mode http
option httplog
timeout connect 5000
timeout client 50000
timeout server 50000
timeout http-request 15s
timeout http-keep-alive 15s
frontend monitor-in
bind *:33305
mode http
option httplog
monitor-uri /monitor
frontend k8s-master
bind 0.0.0.0:9443
bind 127.0.0.1:9443
mode tcp
option tcplog
tcp-request inspect-delay 5s
default_backend k8s-master
backend k8s-master
mode tcp
option tcplog
option tcp-check
balance roundrobin
default-server inter 10s downinter 5s rise 2 fall 2 slowstart 60s maxconn 250 maxqueue 256 weight 100
server k8s-controller01 192.168.109.100:6443 check
server k8s-controller02 192.168.109.101:6443 check
server k8s-controller03 192.168.109.102:6443 check
EOF
7.3 keepalived 软件配置
controller01 节点:
cat > /etc/keepalived/keepalived.conf << EOF
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
}
vrrp_script chk_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 5
weight -5
fall 2
rise 1
}
vrrp_instance VI_1 {
state MASTER
# 注意网卡名
interface ens33
mcast_src_ip 192.168.109.100
virtual_router_id 51
priority 100
nopreempt
advert_int 2
authentication {
auth_type PASS
auth_pass K8SHA_KA_AUTH
}
virtual_ipaddress {
192.168.109.108
}
track_script {
chk_apiserver
} }
EOF
controller02 节点:
cat > /etc/keepalived/keepalived.conf << EOF
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
}
vrrp_script chk_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 5
weight -5
fall 2
rise 1
}
vrrp_instance VI_1 {
state BACKUP
# 注意网卡名
interface ens33
mcast_src_ip 192.168.109.101
virtual_router_id 51
priority 80
nopreempt
advert_int 2
authentication {
auth_type PASS
auth_pass K8SHA_KA_AUTH
}
virtual_ipaddress {
192.168.109.108
}
track_script {
chk_apiserver
} }
EOF
controller03 节点:
cat > /etc/keepalived/keepalived.conf << EOF
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
}
vrrp_script chk_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 5
weight -5
fall 2
rise 1
}
vrrp_instance VI_1 {
state BACKUP
# 注意网卡名
interface ens33
mcast_src_ip 192.168.109.102
virtual_router_id 51
priority 50
nopreempt
advert_int 2
authentication {
auth_type PASS
auth_pass K8SHA_KA_AUTH
}
virtual_ipaddress {
192.168.109.108
}
track_script {
chk_apiserver
} }
EOF
7.4 健康检查脚本
所有 controller 节点操作:
cat > /etc/keepalived/check_apiserver.sh << EOF
#!/bin/bash
err=0
for k in \$(seq 1 3)
do
check_code=\$(pgrep haproxy)
if [[ \$check_code == "" ]]; then
err=\$(expr \$err + 1)
sleep 1
continue
else
err=0
break
fi
done
if [[ \$err != "0" ]]; then
echo "systemctl stop keepalived"
/usr/bin/systemctl stop keepalived
exit 1
else
exit 0
fi
EOF
chmod +x /etc/keepalived/check_apiserver.sh
7.5 启动 haproxy 和 keepalived 服务
systemctl daemon-reload
systemctl enable --now haproxy.service
systemctl enable --now keepalived.service
systemctl status haproxy.service
systemctl status keepalived.service
7.6 高可用测试
ping -c 4 192.168.109.108
telnet 192.168.109.108 9443
8.k8s 组件部署
8.1 ApiServer 组件部署
Apiserver 启动参数 --requestheader-allowed-names 的值一定要设置为 front-proxy-client,否则后期部署 Metrics Server 时会出错。
controller01 节点:
cat > /usr/lib/systemd/system/kube-apiserver.service << EOF
[Unit]
Description=Kubernetes API Server
Documentation=https://github.com/kubernetes/kubernetes
After=network.target
[Service]
ExecStart=/usr/local/bin/kube-apiserver \\
--v=2 \\
--allow-privileged=true \\
--bind-address=0.0.0.0 \\
--secure-port=6443 \\
--advertise-address=192.168.109.100 \\
--service-cluster-ip-range=10.96.0.0/12 \\
--service-node-port-range=30000-32767 \\
--etcd-servers=https://192.168.109.100:2379,https://192.168.109.101:2379,https://192.168.109.102:2379 \\
--etcd-cafile=/etc/etcd/ssl/etcd-ca.pem \\
--etcd-certfile=/etc/etcd/ssl/etcd.pem \\
--etcd-keyfile=/etc/etcd/ssl/etcd-key.pem \\
--client-ca-file=/etc/kubernetes/pki/ca.pem \\
--tls-cert-file=/etc/kubernetes/pki/apiserver.pem \\
--tls-private-key-file=/etc/kubernetes/pki/apiserver-key.pem \\
--kubelet-client-certificate=/etc/kubernetes/pki/apiserver.pem \\
--kubelet-client-key=/etc/kubernetes/pki/apiserver-key.pem \\
--service-account-key-file=/etc/kubernetes/pki/sa.pub \\
--service-account-signing-key-file=/etc/kubernetes/pki/sa.key \\
--service-account-issuer=https://kubernetes.default.svc.cluster.local \\
--kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname \\
--enable-admission-plugins=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,NodeRestriction,ResourceQuota \
--authorization-mode=Node,RBAC \\
--enable-bootstrap-token-auth=true \\
--requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.pem \\
--proxy-client-cert-file=/etc/kubernetes/pki/front-proxy-client.pem \\
--proxy-client-key-file=/etc/kubernetes/pki/front-proxy-client-key.pem \\
--requestheader-allowed-names=front-proxy-client \\
--requestheader-group-headers=X-Remote-Group \\
--requestheader-extra-headers-prefix=X-Remote-Extra- \\
--requestheader-username-headers=X-Remote-User \\
--enable-aggregator-routing=true
Restart=on-failure
RestartSec=10s
LimitNOFILE=65535
[Install]
WantedBy=multi-user.target
EOF
controller02 节点:
cat > /usr/lib/systemd/system/kube-apiserver.service << EOF
[Unit]
Description=Kubernetes API Server
Documentation=https://github.com/kubernetes/kubernetes
After=network.target
[Service]
ExecStart=/usr/local/bin/kube-apiserver \\
--v=2 \\
--allow-privileged=true \\
--bind-address=0.0.0.0 \\
--secure-port=6443 \\
--advertise-address=192.168.109.101 \\
--service-cluster-ip-range=10.96.0.0/12 \\
--service-node-port-range=30000-32767 \\
--etcd-servers=https://192.168.109.100:2379,https://192.168.109.101:2379,https://192.168.109.102:2379 \\
--etcd-cafile=/etc/etcd/ssl/etcd-ca.pem \\
--etcd-certfile=/etc/etcd/ssl/etcd.pem \\
--etcd-keyfile=/etc/etcd/ssl/etcd-key.pem \\
--client-ca-file=/etc/kubernetes/pki/ca.pem \\
--tls-cert-file=/etc/kubernetes/pki/apiserver.pem \\
--tls-private-key-file=/etc/kubernetes/pki/apiserver-key.pem \\
--kubelet-client-certificate=/etc/kubernetes/pki/apiserver.pem \\
--kubelet-client-key=/etc/kubernetes/pki/apiserver-key.pem \\
--service-account-key-file=/etc/kubernetes/pki/sa.pub \\
--service-account-signing-key-file=/etc/kubernetes/pki/sa.key \\
--service-account-issuer=https://kubernetes.default.svc.cluster.local \\
--kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname \\
--enable-admission-plugins=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,NodeRestriction,ResourceQuota \\
--authorization-mode=Node,RBAC \\
--enable-bootstrap-token-auth=true \\
--requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.pem \\
--proxy-client-cert-file=/etc/kubernetes/pki/front-proxy-client.pem \\
--proxy-client-key-file=/etc/kubernetes/pki/front-proxy-client-key.pem \\
--requestheader-allowed-names=front-proxy-client \\
--requestheader-group-headers=X-Remote-Group \\
--requestheader-extra-headers-prefix=X-Remote-Extra- \\
--requestheader-username-headers=X-Remote-User \\
--enable-aggregator-routing=true
Restart=on-failure
RestartSec=10s
LimitNOFILE=65535
[Install]
WantedBy=multi-user.target
EOF
controller03 节点:
cat > /usr/lib/systemd/system/kube-apiserver.service << EOF
[Unit]
Description=Kubernetes API Server
Documentation=https://github.com/kubernetes/kubernetes
After=network.target
[Service]
ExecStart=/usr/local/bin/kube-apiserver \\
--v=2 \\
--allow-privileged=true \\
--bind-address=0.0.0.0 \\
--secure-port=6443 \\
--advertise-address=192.168.109.102 \\
--service-cluster-ip-range=10.96.0.0/12 \\
--service-node-port-range=30000-32767 \\
--etcd-servers=https://192.168.109.100:2379,https://192.168.109.101:2379,https://192.168.109.102:2379 \\
--etcd-cafile=/etc/etcd/ssl/etcd-ca.pem \\
--etcd-certfile=/etc/etcd/ssl/etcd.pem \\
--etcd-keyfile=/etc/etcd/ssl/etcd-key.pem \\
--client-ca-file=/etc/kubernetes/pki/ca.pem \\
--tls-cert-file=/etc/kubernetes/pki/apiserver.pem \\
--tls-private-key-file=/etc/kubernetes/pki/apiserver-key.pem \\
--kubelet-client-certificate=/etc/kubernetes/pki/apiserver.pem \\
--kubelet-client-key=/etc/kubernetes/pki/apiserver-key.pem \\
--service-account-key-file=/etc/kubernetes/pki/sa.pub \\
--service-account-signing-key-file=/etc/kubernetes/pki/sa.key \\
--service-account-issuer=https://kubernetes.default.svc.cluster.local \\
--kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname \\
--enable-admission-plugins=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,NodeRestriction,ResourceQuota \\
--authorization-mode=Node,RBAC \\
--enable-bootstrap-token-auth=true \\
--requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.pem \\
--proxy-client-cert-file=/etc/kubernetes/pki/front-proxy-client.pem \\
--proxy-client-key-file=/etc/kubernetes/pki/front-proxy-client-key.pem \\
--requestheader-allowed-names=front-proxy-client \\
--requestheader-group-headers=X-Remote-Group \\
--requestheader-extra-headers-prefix=X-Remote-Extra- \\
--requestheader-username-headers=X-Remote-User \\
--enable-aggregator-routing=true
Restart=on-failure
RestartSec=10s
LimitNOFILE=65535
[Install]
WantedBy=multi-user.target
EOF
如果不是高可用集群,使用以下操作:
cat > /usr/lib/systemd/system/kube-apiserver.service << EOF
[Unit]
Description=Kubernetes API Server
Documentation=https://github.com/kubernetes/kubernetes
After=network.target
[Service]
ExecStart=/usr/local/bin/kube-apiserver \\
--v=2 \\
--allow-privileged=true \\
--bind-address=0.0.0.0 \\
--secure-port=6443 \\
--advertise-address=192.168.109.100 \\
--service-cluster-ip-range=10.96.0.0/12 \\
--service-node-port-range=30000-32767 \\
--etcd-servers=https://192.168.109.100:2379 \\
--etcd-cafile=/etc/etcd/ssl/etcd-ca.pem \\
--etcd-certfile=/etc/etcd/ssl/etcd.pem \\
--etcd-keyfile=/etc/etcd/ssl/etcd-key.pem \\
--client-ca-file=/etc/kubernetes/pki/ca.pem \\
--tls-cert-file=/etc/kubernetes/pki/apiserver.pem \\
--tls-private-key-file=/etc/kubernetes/pki/apiserver-key.pem \\
--kubelet-client-certificate=/etc/kubernetes/pki/apiserver.pem \\
--kubelet-client-key=/etc/kubernetes/pki/apiserver-key.pem \\
--service-account-key-file=/etc/kubernetes/pki/sa.pub \\
--service-account-signing-key-file=/etc/kubernetes/pki/sa.key \\
--service-account-issuer=https://kubernetes.default.svc.cluster.local \\
--kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname \\
--enable-admission-plugins=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,NodeRestriction,ResourceQuota \
--authorization-mode=Node,RBAC \\
--enable-bootstrap-token-auth=true \\
--requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.pem \\
--proxy-client-cert-file=/etc/kubernetes/pki/front-proxy-client.pem \\
--proxy-client-key-file=/etc/kubernetes/pki/front-proxy-client-key.pem \\
--requestheader-allowed-names=front-proxy-client \\
--requestheader-group-headers=X-Remote-Group \\
--requestheader-extra-headers-prefix=X-Remote-Extra- \\
--requestheader-username-headers=X-Remote-User \\
--enable-aggregator-routing=true
Restart=on-failure
RestartSec=10s
LimitNOFILE=65535
[Install]
WantedBy=multi-user.target
EOF
所有 controller 节点操作:
systemctl daemon-reload
systemctl enable --now kube-apiserver.service
systemctl restart kube-apiserver.service
systemctl status kube-apiserver.service
8.2 ControlerManager 组件部署
所有 controller 节点操作:
cat > /usr/lib/systemd/system/kube-controller-manager.service << EOF
[Unit]
Description=Kubernetes Controller Manager
Documentation=https://github.com/kubernetes/kubernetes
After=network.target
[Service]
ExecStart=/usr/local/bin/kube-controller-manager \\
--v=2 \\
--bind-address=0.0.0.0 \\
--root-ca-file=/etc/kubernetes/pki/ca.pem \\
--cluster-signing-cert-file=/etc/kubernetes/pki/ca.pem \\
--cluster-signing-key-file=/etc/kubernetes/pki/ca-key.pem \\
--service-account-private-key-file=/etc/kubernetes/pki/sa.key \\
--kubeconfig=/etc/kubernetes/controller-manager.kubeconfig \\
--leader-elect=true \\
--use-service-account-credentials=true \\
--node-monitor-grace-period=40s \\
--node-monitor-period=5s \\
--controllers=*,bootstrapsigner,tokencleaner \\
--allocate-node-cidrs=true \\
--service-cluster-ip-range=10.96.0.0/12 \\
--cluster-cidr=172.16.0.0/12 \\
--node-cidr-mask-size-ipv4=24 \\
--requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.pem
Restart=always
RestartSec=10s
[Install]
WantedBy=multi-user.target
EOF
所有 controller 节点操作:
systemctl daemon-reload
systemctl enable --now kube-controller-manager.service
systemctl restart kube-controller-manager.service
systemctl status kube-controller-manager.service
8.3 Scheduler 组件部署
所有 controller 节点操作:
cat > /usr/lib/systemd/system/kube-scheduler.service << EOF
[Unit]
Description=Kubernetes Scheduler
Documentation=https://github.com/kubernetes/kubernetes
After=network.target
[Service]
ExecStart=/usr/local/bin/kube-scheduler \\
--v=2 \\
--bind-address=0.0.0.0 \\
--leader-elect=true \\
--kubeconfig=/etc/kubernetes/scheduler.kubeconfig
Restart=always
RestartSec=10s
[Install]
WantedBy=multi-user.target
EOF
所有 controller 节点操作:
systemctl daemon-reload
systemctl enable --now kube-scheduler.service
systemctl restart kube-scheduler.service
systemctl status kube-scheduler.service
9.TLS Bootstrapping配置
设置一个集群:
kubectl config set-cluster kubernetes \
--certificate-authority=/etc/kubernetes/pki/ca.pem \
--embed-certs=true --server=https://192.168.109.108:9443 \
--kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig
如果不是高可用集群,使用以下操作:
kubectl config set-cluster kubernetes \ --certificate-authority=/etc/kubernetes/pki/ca.pem \ --embed-certs=true --server=https://192.168.109.100:6443 \ --kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig
设置一个用户项:
kubectl config set-credentials tls-bootstrap-token-user \
--token=690f18.ec60b9557b7da447 \
--kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig
设置一个上下文环境:
kubectl config set-context tls-bootstrap-token-user@kubernetes \
--cluster=kubernetes \
--user=tls-bootstrap-token-user \
--kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig
使用默认的上下文:
kubectl config use-context tls-bootstrap-token-user@kubernetes \
--kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig
所有 controller 节点拷贝管理员的证书文件:
mkdir -p /root/.kube ; cp /etc/kubernetes/admin.kubeconfig /root/.kube/config
查看 controller 组件(该组件官方在1.19+版本开始弃用,但是在1.28依旧没有移除。)
kubectl get cs
如果没有正常输出,先不要继续后面的步骤,先排查问题。
创建 bootstrap-secret 文件:
cat > bootstrap.secret.yaml << EOF
apiVersion: v1
kind: Secret
metadata:
name: bootstrap-token-690f18
namespace: kube-system
type: bootstrap.kubernetes.io/token
stringData:
description: "The default bootstrap token generated by 'kubelet '."
token-id: 690f18
token-secret: ec60b9557b7da447
usage-bootstrap-authentication: "true"
usage-bootstrap-signing: "true"
auth-extra-groups: system:bootstrappers:default-node-token,system:bootstrappers:worker,system:bootstrappers:ingress
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: kubelet-bootstrap
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:node-bootstrapper
subjects:
- apiGroup: rbac.authorization.k8s.io
kind: Group
name: system:bootstrappers:default-node-token
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: node-autoapprove-bootstrap
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:certificates.k8s.io:certificatesigningrequests:nodeclient
subjects:
- apiGroup: rbac.authorization.k8s.io
kind: Group
name: system:bootstrappers:default-node-token
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: node-autoapprove-certificate-rotation
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:certificates.k8s.io:certificatesigningrequests:selfnodeclient
subjects:
- apiGroup: rbac.authorization.k8s.io
kind: Group
name: system:nodes
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
annotations:
rbac.authorization.kubernetes.io/autoupdate: "true"
labels:
kubernetes.io/bootstrapping: rbac-defaults
name: system:kube-apiserver-to-kubelet
rules:
- apiGroups:
- ""
resources:
- nodes/proxy
- nodes/stats
- nodes/log
- nodes/spec
- nodes/metrics
verbs:
- "*"
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: system:kube-apiserver
namespace: ""
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:kube-apiserver-to-kubelet
subjects:
- apiGroup: rbac.authorization.k8s.io
kind: User
name: kube-apiserver
EOF
应用资源文件:
kubectl create -f bootstrap.secret.yaml
10.worker 节点部署
10.1 Kubelet 组件部署
10.1.1 拷贝证书和 kubeconfig 文件到 worker 节点
拷贝文件到 worker 节点:
cat > /script/copy_kubeconfig_certificate.sh <<-'EOF'
#!/bin/bash
# k8s 证书 + kubeconfig 分发脚本(带结果统计)
# === 配置 ===
# 节点分组
WORKER_NODES=("k8s-worker01" "k8s-worker02")
ALL_NODES=("k8s-controller02" "k8s-controller03" "k8s-worker01" "k8s-worker02")
# 目录
K8S_CERT_DIR="/etc/kubernetes/pki"
KUBECONFIG_DIR="/etc/kubernetes/"
# 需要拷贝的文件
CERT_FILES=("ca.pem" "ca-key.pem" "front-proxy-ca.pem")
KUBECONFIG_FILE="bootstrap-kubelet.kubeconfig"
# 统计
SUCCESS_COUNT=0
FAIL_COUNT=0
FAILED_NODES=()
echo "开始分发文件..."
# === 任务 1: 分发证书到 Worker 节点 ===
for node in "${WORKER_NODES[@]}"; do
echo ">>> [任务1] 处理 Worker 节点: $node"
ssh "$node" "mkdir -p $K8S_CERT_DIR"
NODE_SUCCESS=true
for file in "${CERT_FILES[@]}"; do
if [[ -f "$K8S_CERT_DIR/$file" ]]; then
scp -q "$K8S_CERT_DIR/$file" "$node:$K8S_CERT_DIR/"
if [[ $? -eq 0 ]]; then
echo " [OK] $file 已传输到 $node:$K8S_CERT_DIR"
else
echo " [ERROR] $file 传输到 $node 失败"
NODE_SUCCESS=false
fi
else
echo " [WARN] 本地缺少文件: $K8S_CERT_DIR/$file"
NODE_SUCCESS=false
fi
done
if $NODE_SUCCESS; then
((SUCCESS_COUNT++))
else
((FAIL_COUNT++))
FAILED_NODES+=("$node(任务1)")
fi
done
# === 任务 2: 分发 kubeconfig 到 所有节点 ===
for node in "${ALL_NODES[@]}"; do
echo ">>> [任务2] 处理节点: $node"
ssh "$node" "mkdir -p $KUBECONFIG_DIR"
NODE_SUCCESS=true
if [[ -f "$KUBECONFIG_DIR/$KUBECONFIG_FILE" ]]; then
scp -q "$KUBECONFIG_DIR/$KUBECONFIG_FILE" "$node:$KUBECONFIG_DIR/"
if [[ $? -eq 0 ]]; then
echo " [OK] $KUBECONFIG_FILE 已传输到 $node:$KUBECONFIG_DIR"
else
echo " [ERROR] $KUBECONFIG_FILE 传输到 $node 失败"
NODE_SUCCESS=false
fi
else
echo " [WARN] 本地缺少文件: $KUBECONFIG_DIR/$KUBECONFIG_FILE"
NODE_SUCCESS=false
fi
if $NODE_SUCCESS; then
((SUCCESS_COUNT++))
else
((FAIL_COUNT++))
FAILED_NODES+=("$node(任务2)")
fi
done
# === 总结 ===
echo "======================"
echo "分发完成"
echo "成功节点数量: $SUCCESS_COUNT"
echo "失败节点数量: $FAIL_COUNT"
if [[ $FAIL_COUNT -gt 0 ]]; then
echo "失败节点列表: ${FAILED_NODES[*]}"
fi
echo "======================"
EOF
chmod +x /script/copy_kubeconfig_certificate.sh
/script/copy_kubeconfig_certificate.sh
如果不是高可用集群,使用以下操作:
cat > /script/copy_kubeconfig_certificate.sh <<-'EOF'
#!/bin/bash
# k8s 证书 + kubeconfig 分发脚本(仅 Worker 节点)
# === 配置 ===
# Worker 节点
WORKER_NODES=("k8s-worker01" "k8s-worker02")
# 目录
K8S_CERT_DIR="/etc/kubernetes/pki"
KUBECONFIG_DIR="/etc/kubernetes/"
# 需要拷贝的文件
CERT_FILES=("ca.pem" "ca-key.pem" "front-proxy-ca.pem")
KUBECONFIG_FILE="bootstrap-kubelet.kubeconfig"
# 统计
SUCCESS_COUNT=0
FAIL_COUNT=0
FAILED_NODES=()
echo "开始分发文件到 Worker 节点..."
for node in "${WORKER_NODES[@]}"; do
echo ">>> 处理 Worker 节点: $node"
ssh "$node" "mkdir -p $K8S_CERT_DIR $KUBECONFIG_DIR"
NODE_SUCCESS=true
# 分发证书
for file in "${CERT_FILES[@]}"; do
if [[ -f "$K8S_CERT_DIR/$file" ]]; then
scp -q "$K8S_CERT_DIR/$file" "$node:$K8S_CERT_DIR/"
if [[ $? -eq 0 ]]; then
echo " [OK] $file 已传输到 $node:$K8S_CERT_DIR"
else
echo " [ERROR] $file 传输到 $node 失败"
NODE_SUCCESS=false
fi
else
echo " [WARN] 本地缺少文件: $K8S_CERT_DIR/$file"
NODE_SUCCESS=false
fi
done
# 分发 kubeconfig
if [[ -f "$KUBECONFIG_DIR/$KUBECONFIG_FILE" ]]; then
scp -q "$KUBECONFIG_DIR/$KUBECONFIG_FILE" "$node:$KUBECONFIG_DIR/"
if [[ $? -eq 0 ]]; then
echo " [OK] $KUBECONFIG_FILE 已传输到 $node:$KUBECONFIG_DIR"
else
echo " [ERROR] $KUBECONFIG_FILE 传输到 $node 失败"
NODE_SUCCESS=false
fi
else
echo " [WARN] 本地缺少文件: $KUBECONFIG_DIR/$KUBECONFIG_FILE"
NODE_SUCCESS=false
fi
if $NODE_SUCCESS; then
((SUCCESS_COUNT++))
else
((FAIL_COUNT++))
FAILED_NODES+=("$node")
fi
done
# === 总结 ===
echo "======================"
echo "分发完成"
echo "成功节点数量: $SUCCESS_COUNT"
echo "失败节点数量: $FAIL_COUNT"
if [[ $FAIL_COUNT -gt 0 ]]; then
echo "失败节点列表: ${FAILED_NODES[*]}"
fi
echo "======================"
EOF
chmod +x /script/copy_kubeconfig_certificate.sh
/script/copy_kubeconfig_certificate.sh
10.1.2 创建 service 文件
所有节点操作:
mkdir -p /var/lib/kubelet /var/log/kubernetes /etc/systemd/system/kubelet.service.d /etc/kubernetes/manifests/
cat > /usr/lib/systemd/system/kubelet.service << EOF
[Unit]
Description=Kubernetes Kubelet
Documentation=https://github.com/kubernetes/kubernetes
After=network-online.target firewalld.service containerd.service
Wants=network-online.target
Requires=containerd.service
[Service]
ExecStart=/usr/local/bin/kubelet \\
--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig \\
--kubeconfig=/etc/kubernetes/kubelet.kubeconfig \\
--config=/etc/kubernetes/kubelet-conf.yml \\
--container-runtime-endpoint=unix:///run/containerd/containerd.sock \\
--node-labels=node.kubernetes.io/node= \\
--resolv-conf=/run/systemd/resolve/resolv.conf
[Install]
WantedBy=multi-user.target
EOF
10.1.3 创建配置文件
所有节点操作:
cat > /etc/kubernetes/kubelet-conf.yml <<EOF
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
address: 0.0.0.0
port: 10250
readOnlyPort: 10255
authentication:
anonymous:
enabled: false
webhook:
cacheTTL: 2m0s
enabled: true
x509:
clientCAFile: /etc/kubernetes/pki/ca.pem
authorization:
mode: Webhook
webhook:
cacheAuthorizedTTL: 5m0s
cacheUnauthorizedTTL: 30s
cgroupDriver: systemd
cgroupsPerQOS: true
clusterDNS:
- 10.96.0.10
clusterDomain: cluster.local
containerLogMaxFiles: 5
containerLogMaxSize: 10Mi
contentType: application/vnd.kubernetes.protobuf
cpuCFSQuota: true
cpuManagerPolicy: none
cpuManagerReconcilePeriod: 10s
enableControllerAttachDetach: true
enableDebuggingHandlers: true
enforceNodeAllocatable:
- pods
eventBurst: 10
eventRecordQPS: 5
evictionHard:
imagefs.available: 15%
memory.available: 100Mi
nodefs.available: 10%
nodefs.inodesFree: 5%
evictionPressureTransitionPeriod: 5m0s
failSwapOn: true
fileCheckFrequency: 20s
hairpinMode: promiscuous-bridge
healthzBindAddress: 127.0.0.1
healthzPort: 10248
httpCheckFrequency: 20s
imageGCHighThresholdPercent: 85
imageGCLowThresholdPercent: 80
imageMinimumGCAge: 2m0s
iptablesDropBit: 15
iptablesMasqueradeBit: 14
kubeAPIBurst: 10
kubeAPIQPS: 5
makeIPTablesUtilChains: true
maxOpenFiles: 1000000
maxPods: 110
nodeStatusUpdateFrequency: 10s
oomScoreAdj: -999
podPidsLimit: -1
registryBurst: 10
registryPullQPS: 5
resolvConf: /etc/resolv.conf
rotateCertificates: true
runtimeRequestTimeout: 2m0s
serializeImagePulls: true
staticPodPath: /etc/kubernetes/manifests
streamingConnectionIdleTimeout: 4h0m0s
syncFrequency: 1m0s
volumeStatsAggPeriod: 1m0s
EOF
10.1.4 启动服务
所有节点操作:
systemctl daemon-reload
systemctl enable --now kubelet.service
systemctl restart kubelet.service
systemctl status kubelet.service
10.1.5 查看集群和容器运行时
所有 controller 节点操作::
kubectl get node
所有 controller 节点操作::
kubectl describe node | grep Runtime
10.2 kube-proxy 组件部署
10.2.1 创建 service 文件
所有节点操作::
cat > /usr/lib/systemd/system/kube-proxy.service << EOF
[Unit]
Description=Kubernetes Kube Proxy
Documentation=https://github.com/kubernetes/kubernetes
After=network.target
[Service]
ExecStart=/usr/local/bin/kube-proxy \\
--config=/etc/kubernetes/kube-proxy.yaml \\
--cluster-cidr=172.16.0.0/12 \\
--v=2
Restart=always
RestartSec=10s
[Install]
WantedBy=multi-user.target
EOF
此处 pod 网段为 172.16.0.0/12,请与自己集群环境保持一致。
10.2.2 创建配置文件
所有节点操作::
cat > /etc/kubernetes/kube-proxy.yaml << EOF
apiVersion: kubeproxy.config.k8s.io/v1alpha1
bindAddress: 0.0.0.0
clientConnection:
acceptContentTypes: ""
burst: 10
contentType: application/vnd.kubernetes.protobuf
kubeconfig: /etc/kubernetes/kube-proxy.kubeconfig
qps: 5
clusterCIDR: 172.16.0.0/12
configSyncPeriod: 15m0s
conntrack:
max: null
maxPerCore: 32768
min: 131072
tcpCloseWaitTimeout: 1h0m0s
tcpEstablishedTimeout: 24h0m0s
enableProfiling: false
healthzBindAddress: 0.0.0.0:10256
hostnameOverride: ""
iptables:
masqueradeAll: false
masqueradeBit: 14
minSyncPeriod: 0s
syncPeriod: 30s
ipvs:
masqueradeAll: true
minSyncPeriod: 5s
scheduler: "rr"
syncPeriod: 30s
kind: KubeProxyConfiguration
metricsBindAddress: 127.0.0.1:10249
mode: "ipvs"
nodePortAddresses: null
oomScoreAdj: -999
portRange: ""
udpIdleTimeout: 250ms
EOF
此处 pod 网段为 172.16.0.0/12,请与自己集群环境保持一致。
10.2.3 启动服务
所有节点操作::
systemctl daemon-reload
systemctl enable --now kube-proxy.service
systemctl restart kube-proxy.service
systemctl status kube-proxy.service
11.配置命令补全
所有 controller 节点操作:
kubectl completion bash > ~/.kube/completion.bash.inc
echo "source '$HOME/.kube/completion.bash.inc'" >> $HOME/.bashrc
source $HOME/.bashrc
12.安装网络插件
12.1 安装 calico 插件
官方安装文档:https://docs.tigera.io/calico/latest/getting-started/kubernetes/quickstart
12.1.1准备镜像
如果是在线安装且网络良好,可以不用提前导入镜像。
编写拷贝脚本:
cat > /script/copy_file.sh <<-'EOF'
#!/bin/bash
# 通用文件分发脚本
# 用法: /script/copy_kubeconfig_certificate.sh [w|c|a] /path/to/file
# 节点分组
CONTROLLERS=("k8s-controller02" "k8s-controller03")
WORKERS=("k8s-worker01" "k8s-worker02")
ALL_NODES=("${CONTROLLERS[@]}" "${WORKERS[@]}")
# 参数检查
if [[ $# -ne 2 ]]; then
echo "用法: $0 [w|c|a] /path/to/file"+
exit 1
fi
TARGET_GROUP=$1
SRC_FILE=$2
if [[ ! -f "$SRC_FILE" ]]; then
echo "❌ 本地文件不存在: $SRC_FILE"
exit 1
fi
# 目标节点选择
case "$TARGET_GROUP" in
w)
NODES=("${WORKERS[@]}")
;;
c)
NODES=("${CONTROLLERS[@]}")
;;
a)
NODES=("${ALL_NODES[@]}")
;;
*)
echo "❌ 无效参数: $TARGET_GROUP (必须是 w|c|a)"
exit 1
;;
esac
# 提取目录和文件名
DST_DIR=$(dirname "$SRC_FILE")
FILENAME=$(basename "$SRC_FILE")
# 统计
SUCCESS_COUNT=0
FAIL_COUNT=0
FAILED_NODES=()
echo "开始分发文件: $SRC_FILE"
echo "目标节点组: $TARGET_GROUP"
# 循环节点
for node in "${NODES[@]}"; do
echo ">>> 处理节点: $node"
NODE_SUCCESS=true
# 确保目标目录存在
ssh "$node" "mkdir -p $DST_DIR"
if [[ $? -ne 0 ]]; then
echo " [ERROR] 无法在 $node 上创建目录: $DST_DIR"
NODE_SUCCESS=false
else
# 传输文件
scp -q "$SRC_FILE" "$node:$DST_DIR/"
if [[ $? -eq 0 ]]; then
echo " [OK] $FILENAME 已传输到 $node:$DST_DIR"
else
echo " [ERROR] $FILENAME 传输到 $node 失败"
NODE_SUCCESS=false
fi
fi
# 节点统计
if $NODE_SUCCESS; then
((SUCCESS_COUNT++))
else
((FAIL_COUNT++))
FAILED_NODES+=("$node")
fi
done
# === 总结 ===
echo "======================"
echo "分发完成"
echo "成功节点数量: $SUCCESS_COUNT"
echo "失败节点数量: $FAIL_COUNT"
if [[ $FAIL_COUNT -gt 0 ]]; then
echo "失败节点列表: ${FAILED_NODES[*]}"
fi
echo "======================"
EOF
chmod +x /script/copy_file.sh
拷贝镜像到其他节点:
cd /software
tar xf calico-v3.30.3.tar.gz && /script/copy_file.sh a /software/calico/images.tar
如果不是高可用集群,使用以下操作:
cd /software
tar xf calico-v3.30.3.tar.gz && /script/copy_file.sh w /software/calico/images.tar
所有节点导入镜像:
ctr -n k8s.io i import /software/calico/images.tar
12.1.2 安装软件
应用资源文件:
cd /software/calico/
kubectl create -f tigera-operator.yaml
kubectl create -f custom-resources.yaml
在线部署命令:
# 下载资源文件
wget https://raw.githubusercontent.com/projectcalico/calico/v3.30.4/manifests/tigera-operator.yaml
wget https://raw.githubusercontent.com/projectcalico/calico/v3.30.4/manifests/custom-resources.yaml
# 修改 Pod 网段(请和自己集群环境一致)
sed -i 's#cidr: 192.168.0.0/16#cidr: 172.16.0.0/12#g' custom-resources.yaml
grep cidr custom-resources.yaml
# 应用资源文件
kubectl create -f tigera-operator.yaml
# 可以等上 10 秒钟再执行
kubectl create -f custom-resources.yaml
查看所有 pod 是否正常运行:
kubectl get po -A -owide -w
13.安装 CoreDNS 组件
如果不特别说明,表示仅在 controller01 节点操作。
13.1 安装 helm
解压软件并验证:
cd /software
tar xf helm-v3.18.6-linux-amd64.tar.gz -C /usr/local/bin/ linux-amd64/helm --strip-components=1
helm version
配置命令自动补全:
source <(helm completion bash)
helm completion bash > /etc/bash_completion.d/helm
13.2 导入镜像
如果是在线安装且网络良好,可以不用提前导入镜像。
拷贝镜像到其他节点:
cd /software
tar xf coredns.tar.gz
/script/copy_file.sh a /software/coredns/coredns-v1.12.3.tar
如果不是高可用集群,使用以下操作:
cd /software
tar xf coredns.tar.gz
/script/copy_file.sh w /software/coredns/coredns-v1.12.3.tar
所有节点导入镜像:
ctr -n k8s.io i import /software/coredns/coredns-v1.12.3.tar
13.3 安装软件
使用 helm 安装软件:
cd /software/coredns
tar xf coredns-1.43.3.tar.gz && helm install coredns ./coredns/ -n kube-system
查看所有 pod 是否正常运行:
kubectl get po -A -o wide -n kube-system|grep coredns
此处配置的 IP 为 10.96.0.10,要和自己集群 service 网段一致,地址通常为网段第十个 IP(x.x.x.10),也可以自行设置。
查看字段命令:
sed -i 's/# clusterIP: ""/ clusterIP: "10.96.0.10"/g' values.yaml
14.安装 Metrics Server 组件
官方 Github 仓库:https://github.com/kubernetes-sigs/metrics-server
14.1 导入镜像
如果是在线安装且网络良好,可以不用提前导入镜像。
拷贝镜像到其他节点:
cd /software
tar xf metrics-server.tar.gz
/script/copy_file.sh a /software/metrics-server/metrics-server-v0.8.0.tar
如果不是高可用集群,使用以下操作:
cd /software
tar xf metrics-server.tar.gz
/script/copy_file.sh w /software/metrics-server/metrics-server-v0.8.0.tar
所有节点导入镜像:
ctr -n k8s.io i import /software/metrics-server/metrics-server-v0.8.0.tar
14.2 安装软件和验证
应用资源文件:
cd /software/metrics-server && kubectl apply -f components.yaml
在线部署命令:
# 下载资源文件
wget https://github.com/kubernetes-sigs/metrics-server/releases/download/v0.8.0/components.yaml
# 修改配置文件
vim components.yaml
# 在 Deployment 配置文件的 spec.template.spec.containers.args 下添加 --kubelet-insecure-tls
...
spec:
containers:
- args:
- --kubelet-insecure-tls
- --cert-dir=/tmp
- --secure-port=10250
...
# 修改镜像地址为国内源地址(如果网络不好)
sed -i "s#registry.k8s.io/#k8s.m.daocloud.io/#g" components.yaml
grep image components.yaml
# 应用资源文件
kubectl apply -f components.yaml
查看 pod 运行状态
kubectl -n kube-system get pods -l k8s-app=metrics-server -w
当 pod 都正常运行后,可以看到采集数据:
kubectl top node
kubectl top po -A
如果 pod 都正常运行,使用 top 命令还是不能获取相关数据,此时可以查看日志排错:
kubectl logs -f -n kube-system -l k8s-app=metrics-server
15.集群可用性验证
15.1 查看节点和 Pod是否正常
kubectl get nodes
kubectl get po -A
15.2 资源部署验证
15.2.1 Pod 资源部署
创建 Pod 资源:
cat<<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
name: busybox
namespace: default
spec:
containers:
- name: busybox
image: docker.m.daocloud.io/library/busybox:1.28
command:
- sleep
- "3600"
imagePullPolicy: IfNotPresent
restartPolicy: Always
EOF
查看 Pod 状态:
kubectl get pod -w
15.2.2 Deployment 资源部署
创建 Deployment 资源:
cat<<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-deployment
labels:
app: nginx
spec:
replicas: 3
selector:
matchLabels:
app: nginx
template:
metadata:
labels:
app: nginx
spec:
containers:
- name: nginx
image: docker.m.daocloud.io/nginx
ports:
- containerPort: 80
EOF
查看 Pod 和 Deployment 资源状态:
kubectl get po,deployments.apps -l app=nginx -owide
# 输出信息如下
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
pod/nginx-deployment-6bd444cb7-9czdh 1/1 Running 0 4m53s 172.20.79.68 k8s-worker01 <none> <none>
pod/nginx-deployment-6bd444cb7-r65wq 1/1 Running 0 4m53s 172.28.206.4 k8s-controller01 <none> <none>
pod/nginx-deployment-6bd444cb7-vdlwk 1/1 Running 0 4m53s 172.25.45.71 k8s-worker02 <none> <none>
NAME READY UP-TO-DATE AVAILABLE AGE CONTAINERS IMAGES SELECTOR
deployment.apps/nginx-deployment 3/3 3 3 4m53s nginx docker.m.daocloud.io/nginx app=nginx
15.3 DNS 解析验证明
解析默认名称空间 svc:
kubectl exec busybox -- nslookup `kubectl get svc|awk 'NR == 2{print $1}'`
# 输出信息
Server: 10.96.0.10
Address 1: 10.96.0.10 coredns.kube-system.svc.cluster.local
Name: kubernetes
Address 1: 10.96.0.1 kubernetes.default.svc.cluster.local
解析跨名称空间 svc(kube-system):
for name in $(kubectl get svc -n kube-system|awk 'NR != 1{print $1}'); do
echo -------------"$name".kube-system:-------------
kubectl exec busybox -- nslookup "$name".kube-system
echo
echo
done
# 输出信息
-------------coredns.kube-system:-------------
Server: 10.96.0.10
Address 1: 10.96.0.10 coredns.kube-system.svc.cluster.local
Name: coredns.kube-system
Address 1: 10.96.0.10 coredns.kube-system.svc.cluster.local
-------------metrics-server.kube-system:-------------
Server: 10.96.0.10
Address 1: 10.96.0.10 coredns.kube-system.svc.cluster.local
Name: metrics-server.kube-system
Address 1: 10.96.23.63 metrics-server.kube-system.svc.cluster.local
15.4 SVC(443)和 DNS(53)端口验证
所有节点验证:
telnet 10.96.0.1 443
telnet 10.96.0.10 53
curl 10.96.0.10:53
# 输出信息
Trying 10.96.0.1...
Connected to 10.96.0.1.
Escape character is '^]'.
Connection closed by foreign host.
Trying 10.96.0.10...
Connected to 10.96.0.10.
Escape character is '^]'.
Connection closed by foreign host.
curl: (52) Empty reply from server
15.5 网络验证
测试同网段且不同节点 Pod 网络连通性:
cat > /script/network_test.sh <<-'EOF'
#!/bin/bash
# 颜色定义
GREEN=$'\033[32m'
RED=$'\033[31m'
BLUE=$'\033[34m'
YELLOW=$'\033[33m'
RESET=$'\033[0m'
MAX_NAME_LEN=30
# 参数判断:a 表示测试全部命名空间,否则只测试 default
ALL_NS=false
if [ "$1" == "a" ]; then
ALL_NS=true
fi
# 获取 busybox pod
if $ALL_NS; then
busybox_info=$(kubectl get po -A -o json | jq -r '
.items[] | select(.metadata.name=="busybox") |
"\(.metadata.namespace)\t\(.status.podIP)\t\(.spec.nodeName)"' | shuf -n1)
else
busybox_info=$(kubectl get po -n default -o json | jq -r '
.items[] | select(.metadata.name=="busybox") |
"\(.metadata.namespace)\t\(.status.podIP)\t\(.spec.nodeName)"' | shuf -n1)
fi
if [ -z "$busybox_info" ]; then
echo "未找到 busybox pod"
exit 1
fi
IFS=$'\t' read -r ns busybox_ip busybox_node <<< "$busybox_info"
echo ""
echo "busybox 命名空间: $ns"
echo -e "busybox IP: ${BLUE}$busybox_ip${RESET}"
echo -e "busybox 所在 Node: ${YELLOW}$busybox_node${RESET}"
echo ""
# 获取目标 Pod
if $ALL_NS; then
targets=$(kubectl get po -A -o json | jq -r --arg bn "$busybox_node" '
.items[] | select(.status.podIP != null) |
select(.status.podIP | test("^172\\.")) |
select(.spec.nodeName != $bn) |
"\(.metadata.namespace)\t\(.metadata.name)\t\(.status.podIP)\t\(.spec.nodeName)"')
else
targets=$(kubectl get po -n default -o json | jq -r --arg bn "$busybox_node" '
.items[] | select(.status.podIP != null) |
select(.status.podIP | test("^172\\.")) |
select(.spec.nodeName != $bn) |
"\(.metadata.namespace)\t\(.metadata.name)\t\(.status.podIP)\t\(.spec.nodeName)"')
fi
if [ -z "$targets" ]; then
echo "未找到同网段但不同节点的 Pod"
exit 0
fi
total=0
reachable=0
unreachable_list=()
# 遍历目标 Pod
while IFS=$'\t' read -r target_ns target_name target_ip target_node; do
[ -z "$target_ip" ] && continue
total=$((total+1))
[ ${#target_name} -gt $MAX_NAME_LEN ] && target_name="${target_name:0:$MAX_NAME_LEN}..."
if kubectl exec -n "$ns" busybox -- ping -c 2 -W 1 "$target_ip" &> /dev/null; then
status="${GREEN}[OK]${RESET}"
reachable=$((reachable+1))
else
status="${RED}[NG]${RESET}"
unreachable_list+=("$target_ns/$target_name/$target_ip")
fi
# 单行显示
echo -e "命名空间: $target_ns | Pod 名称: $target_name | IP: $target_ip | Node: $target_node | 状态: $status"
done <<< "$targets"
# 输出统计信息
echo ""
echo "本次共找到 Pod 数量为:$total"
echo "其中网络可达数量为:$reachable"
if [ ${#unreachable_list[@]} -gt 0 ]; then
echo "不可达的 Pod 分别为:"
for pod_info in "${unreachable_list[@]}"; do
echo " $pod_info"
done
fi
exit 0
EOF
chmod +x /script/network_test.sh
/script/network_test.sh
输出信息如下:
# 输出信息如下
busybox 命名空间: default
busybox IP: 172.20.79.67
busybox 所在 Node: k8s-worker01
命名空间: default | Pod 名称: nginx-deployment-6bd444cb7-r65... | IP: 172.28.206.4 | Node: k8s-controller01 | 状态: [OK]
命名空间: default | Pod 名称: nginx-deployment-6bd444cb7-vdl... | IP: 172.25.45.71 | Node: k8s-worker02 | 状态: [OK]
本次共找到 Pod 数量为:2
其中网络可达数量为:2
默认测试验证 default 名称空间 Pod,测试全部 Pod 命令:
/script/network_test.sh a
15.6 删除测试资源
删除创建的 Pod 和 Deployment:
kubectl delete deployments.apps nginx-deployment && kubectl delete pod busybox
kubectl get po,deployments.apps
16.安装 dashborad
官方 Github 仓库:https://github.com/kubernetes/dashboard/releases
16.1 安装 helm
官方下载地址:https://github.com/helm/helm/releases
解压软件并验证:
cd /software
tar xf helm-v3.18.6-linux-amd64.tar.gz -C /usr/local/bin/ linux-amd64/helm --strip-components=1
helm version
在线安装命令:
wget https://get.helm.sh/helm-v3.18.6-linux-amd64.tar.gz
tar xf helm-v3.18.6-linux-amd64.tar.gz -C /usr/local/bin/ linux-amd64/helm --strip-components=1
helm version
配置命令自动补全:
source <(helm completion bash)
helm completion bash > /etc/bash_completion.d/helm
16.2 导入镜像
如果是在线安装且网络良好,可以不用提前导入镜像。
拷贝镜像到所有 worker 节点:
cd /software
tar xf dashboard.tar.gz
/script/copy_file.sh w /software/dashboard/all_images.tar
所有 worker 节点导入镜像:
ctr -n k8s.io i import /software/dashboard/all_images.tar
16.3 安装软件
安装软件:
cd /software/dashboard
tar xf kubernetes-dashboard-7.13.0.tgz
helm install kubernetes-dashboard ./kubernetes-dashboard/ --create-namespace --namespace kube-system
在线部署命令:
# 下载软件包
wget https://github.com/kubernetes/dashboard/releases/download/kubernetes-dashboard-7.13.0/kubernetes-dashboard-7.13.0.tgz
# 修改镜像地址为国内源地址(如果网络不好)
tar xf kubernetes-dashboard-7.13.0.tgz
sed -i 's#repository: docker.io#repository: docker.m.daocloud.io#g' kubernetes-dashboard/values.yaml
# 安装 dashboard
helm install kubernetes-dashboard ./kubernetes-dashboard/ --create-namespace --namespace kube-system
查看 Pod 是否都正常:
kubectl get pod -A --show-labels |grep dashboard
16.4 修改 svc 类型
修改类型为 NodePort:
kubectl edit svc -n kube-system kubernetes-dashboard-kong-proxy
16.5 创建登录 token
16.5.1 创建用户配置
应用资源文件:
cd /software/dashboard && kubectl apply -f dashboard-user.yaml
dashboard-user.yaml 文件内容:
apiVersion: v1
kind: ServiceAccount
metadata:
name: admin-user
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: admin-user
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-admin
subjects:
- kind: ServiceAccount
name: admin-user
namespace: kube-system
16.5.2 创建临时 token
创建 token:
kubectl -n kube-system create token admin-user
16.5.3 创建永久 token
应用资源文件:
cd /software/dashboard && kubectl apply -f dashboard-user-token.yaml
dashboard-user-token.yaml 文件内容:
apiVersion: v1
kind: Secret
metadata:
name: admin-user
namespace: kube-system
annotations:
kubernetes.io/service-account.name: "admin-user"
type: kubernetes.io/service-account-token
查看 token:
kubectl get secret admin-user -n kube-system -o jsonpath={".data.token"} | base64 -d
16.6 查看端口号并登录
查看端口号:
kubectl get svc kubernetes-dashboard-kong-proxy -n kube-system
# 输出信息
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
kubernetes-dashboard-kong-proxy NodePort 10.103.63.142 <none> 443:32156/TCP 37m
在浏览器访问并登录:
https://192.168.109.100:32156
在访问界面的 Bearer token 框内输入生成的 token 后点击登录即可。
17.污点配置
kuadm 方式部署的 k8s 集群的 controller 节点可以在初始化集群时设置污点(不能调度),但是二进制方式部署时我们需要在部署完集群后手动设置污点。
查看所有节点污点:
kubectl describe nodes | grep Taints
设置污点并驱逐当前节点的 Pod:
# 设置污点
kubectl taint nodes k8s-controller01 key1=value1:NoExecute
kubectl taint nodes k8s-controller02 key1=value1:NoExecute
kubectl taint nodes k8s-controller03 key1=value1:NoExecute
# 取消污点
kubectl taint nodes k8s-controller01 key1=value1:NoExecute-
kubectl taint nodes k8s-controller02 key1=value1:NoExecute-
kubectl taint nodes k8s-controller03 key1=value1:NoExecute-
如果不是高可用集群,使用以下操作:
# 设置污点
kubectl taint nodes k8s-controller key1=value1:NoExecute
# 取消污点
kubectl taint nodes k8s-controller key1=value1:NoExecute-
设置污点但不驱逐当前节点的 Pod:
# 设置污点
kubectl taint nodes k8s-controller01 key1=value1:NoSchedule
kubectl taint nodes k8s-controller02 key1=value1:NoSchedule
kubectl taint nodes k8s-controller03 key1=value1:NoSchedule
# 取消污点
kubectl taint nodes k8s-controller01 key1=value1:NoSchedule-
kubectl taint nodes k8s-controller02 key1=value1:NoSchedule-
kubectl taint nodes k8s-controller03 key1=value1:NoSchedule-
如果不是高可用集群,使用以下操作:
# 设置污点
kubectl taint nodes k8s-controller key1=value1:NoSchedule
# 取消污点
kubectl taint nodes k8s-controller key1=value1:NoSchedule-