1.安装centos虚拟机
可以借鉴之前blog
2.配置
设置系统主机名以及 Host 文件的相互解析
hostnamectl set-hostname k8s-master01
安装依赖包
yum install -y conntrack ntpdate ntp ipvsadm ipset jq iptables curl sysstat libseccomp wget vim net-tools git
设置防火墙为 lptables 并设置空规则
systemctl stop firewalld && systemctl disable firewalld
yum -y install iptables-services && systemctl start iptables && systemctl enable iptables&& iptables -F && service iptables save
关闭 SELINUX
swapoff -a && sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab
setenforce 0 && sed -i 's/^SELINUX=.*/SELINX=disabled/' /etc/selinux/config
调整内核参数,对于 K8S
cat > kubernetes.conf <<EOF
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.ipv4.ip_forward=1
net.ipv4.tcp_tw_recycle=0
vm.swappiness=0 # 禁止使用 swap 空间,只有当系统 00M 时才允许使用它 vm.overcommit_memory=1 # 不检查物理内存是否够用 vm.panic_on_oom=O # 开启 00M fs.inotify.max_user_instances=8192
fs.inotify.max_user_watches=1048576
fs.file-max=52706963
fs.nr_open=52706963
net.ipv6.conf.all.disable_ipv6=1
net.netfilter.nf_conntrack_max=2310720
EOF
cp kubernetes.conf /etc/sysctl.d/kubernetes.conf
modprobe br_netfilter
sysctl -p /etc/sysctl.d/kubernetes.conf
调整系统时区
# 设置系统时区为 中国/上海
timedatect1 set-timezone Asia/Shanghai# 将当前的 UTC 时间写入硬件时钟 timedatectl set-local-rtc o# 重启依赖于系统时间的服务 systemctl restart rsyslog systemctl restart crond
关闭系统不需要服务
systemctl stop postfix && systemctl disable postfix
设置 rsyslogd和 systemd journald
mkdir /var/log/journal # 持久化保存日志的目录
mkdir /etc/systemd/journald.conf.d
cat >/etc/systemd/journald.conf.d/99-prophet.conf<<EOF
[Journal]
# 持久化保存到磁盘
Storage=persistent
# 压缩历史日志
Compress=yes
SyncIntervalSec=5m
RateLimitInterval=30s
RateLimitBurst=1000
# 最大占用空间10G
SystemMaxUse=10G
# 单日志文件最大200M
SystemaxFileSize=200M
# 日志保存时间 2 周
MaxRetentionSec=2week
# 不将日志转发到 syslog
ForwardToSyslog=no
EOF
systemctl restart systemd-journald
升级系统内核为 4.44
CentOS7X系统自带的3.10X内核存在一些Bugs,导致运行的Docker、Kubernetes不稳定,例如∶rpm-Uvh http://wwwelrepo.org/elrepo-release-70-3.el7elrepo.noarch.rpm
rpm -Uvh http://www.elrepo.org/elrepo-release-7.0-3.el7.elrepo.noarch.rpm
# 安装完成后检查 /boot/grub2/grub.cfg 中对应内核 menuentry 中是否包含 initrd16 配置,如果没有,再安装一次!
yum --enablerepo=elrepo-kernel install -y kernel-lt#设置开机从新内核启动
cat /boot/grub2/grub.cfg | grep menuentry
grub2-set-default 'CentoS Linux((4.4.189-1.el7.elrepo.x86_64)7(Core)'(选择上面所示的内核)
grub2-editenv list
grub2-mkconfig -o /boot/grub2/grub.cfg
reboot
#kube-proxy开启ipvs的前置条件
modprobe br_netfilter
cat>/etc/sysconfig/modules/ipvs.modules<<EOF
#!/bin/bash
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
(内核是4.4.19以下的写nf_conntrack_ipv4)
modprobe -- nf_conntrack
EOF
chmod 755 /etc/sysconfig/modules/ipvs.modules && bash /etc/sysconfig/modules/ipvs.modules && lsmod | grep -e ip_vs -e nf_conntrack_ipv4
#安装 Docker 软件
yum install -y yum-utils device-mapper-persistent-data lvm2
yum-config-manager \--add-repo \
http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
yum update -y && yum install -y docker-ce
## 创建 /etc/docker 目录
mkdir /etc/docker
# 配置 daemon.
cat>/etc/docker/daemon.json<<EOF
{
"registry-mirrors": ["https://<my-docker-mirror-host>"],
"exec-opts":["native.cgroupdriver=systemd"],
"log-driver":"json-file",
"log-opts": {
"max-size": "100m"
}
}
EOF
mkdir -p /etc/systemd/system/docker.service.d
# 重启docker服务
systemctl daemon-reload && systemctl restart docker && systemctl enable docker
安装Kubeadm (主从配置)
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=http://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=http://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg http://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
yum -y install kubeadm-1.19.1 kubectl-1.19.1 kubelet-1.19.1
systemctl enable kubelet.service
初始化主节点
kubeadm config print init-defaults>kubeadm-config.yaml
vim kubeadm-config.yaml
localAPIEndpoint:
advertiseAddress: 192.168.0.10
imageRepository: registry.aliyuncs.com/google_containers
kubernetesVersion: v1.19.1
networking:
podSubnet: 10.244.0.0/16
serviceSubnet: 10.96.0.0/12
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
featureGates :
SupportIPVSProxyMode: true
mode: ipvs
kubeadm init --config=kubeadm-config.yaml --upload-certs | tee kubeadm-init.log
加入主节点以及其余工作节点
执行安装日志中的加入命令即可
部署网络
kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
3.遇到的问题
1.coredns处于pending状态
检查pod
[root@host121 k3s]# kubectl describe pods coredns-d798c9dd-mpbpc -n kube-system Name: coredns-d798c9dd-mpbpc Namespace: kube-system Priority: 0 Node: <none> Labels: k8s-app=kube-dns pod-template-hash=d798c9dd Annotations: <none> Status: Pending IP: IPs: <none> Controlled By: ReplicaSet/coredns-d798c9dd Containers: coredns: Image: coredns/coredns:1.6.3 Ports: 53/UDP, 53/TCP, 9153/TCP Host Ports: 0/UDP, 0/TCP, 0/TCP Args: -conf /etc/coredns/Corefile Limits: memory: 170Mi Requests: cpu: 100m memory: 70Mi Liveness: http-get http://:8080/health delay=60s timeout=5s period=10s #success=1 #failure=5 Readiness: http-get http://:8181/ready delay=10s timeout=5s period=10s #success=1 #failure=5 Environment: <none> Mounts: /etc/coredns from config-volume (ro) /var/run/secrets/kubernetes.io/serviceaccount from coredns-token-bvb5w (ro) Conditions: Type Status PodScheduled False Volumes: config-volume: Type: ConfigMap (a volume populated by a ConfigMap) Name: coredns Optional: false coredns-token-bvb5w: Type: Secret (a volume populated by a Secret) SecretName: coredns-token-bvb5w Optional: false QoS Class: Burstable Node-Selectors: beta.kubernetes.io/os=linux Tolerations: CriticalAddonsOnly node.kubernetes.io/not-ready:NoExecute for 300s node.kubernetes.io/unreachable:NoExecute for 300s Events: Type Reason Age From Message ---- ------ ---- ---- ------- Warning FailedScheduling <unknown> default-scheduler 0/4 nodes are available: 4 node(s) had taints that the pod didn't tolerate. Warning FailedScheduling <unknown> default-scheduler 0/1 nodes are available: 1 node(s) had taints that the pod didn't tolerate. Warning FailedScheduling <unknown> default-scheduler 0/4 nodes are available: 4 node(s) had taints that the pod didn't tolerate. [root@host121 k3s]#
显示的是4个节点有污点无法分配
查看污点
[root@host121 k3s]# kubectl describe nodes |grep Taints Taints: node.cloudprovider.kubernetes.io/uninitialized=true:NoSchedule Taints: node.cloudprovider.kubernetes.io/uninitialized=true:NoSchedule Taints: node.cloudprovider.kubernetes.io/uninitialized=true:NoSchedule Taints: node.cloudprovider.kubernetes.io/uninitialized=true:NoSchedule [root@host121 k3s]#
显示Noschedule,可以通过kubectl taint命令进行设定如下三种方式,具体说明如下:
- NoSchedule: 不调度
- PreferNoSchedule: 尽量不调度
- NoExecute: 不调度并且立即驱逐节点上现存pod
解决方法
通过kubectl taint命令设定允许污点方式,命令最后的-表示通配。
[root@host121 k3s]# kubectl taint nodes --all node.cloudprovider.kubernetes.io/uninitialized- node/host121 untainted node/host123 untainted node/host122 untainted node/host124 untainted [root@host121 k3s]#
设定完毕,之后再次确认状态
[root@host121 k3s]# kubectl describe nodes |grep Taints Taints: <none> Taints: <none> Taints: <none> Taints: <none> [root@host121 k3s]#
2.报错:The connection to the server localhost:8080 was refused – did you specify the right host or port?
安装完成以后kubectl get node
显示The connection to the server localhost:8080 was refused – did you specify the right host or port?
解决方法
mkdir -p $HOME/.kube sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config sudo chown $(id -u):$(id -g) $HOME/.kube/config
3.k8s.gcr的镜像无法下载
在init k8s主节点时,k8s的镜像无法下载下来
解决方法
- 直接指定国内镜像代理仓库(如阿里云代理仓库)进行镜像拉取下载。
- 成功拉取代理仓库中的镜像后,再将其tag打标签成为k8s.gcr.io对应镜像。
- 最后再删除从代理仓库中拉取下来的镜像。
- 要确保imagePullPolicy策略是IfNotPresent,即本地有镜像则使用本地镜像,不拉取!
或者将下载的镜像放到harbor私有仓库里,然后将image下载源指向harbor私仓地址。
阿里云代理仓库地址为:registry.aliyuncs.com/google_containers
比如下载k8s.gcr.io/coredns:1.6.5
可以代理为:registry.aliyuncs.com/google_containers/coredns:1.6.5
以上总结三个步骤:
# docker pull registry.aliyuncs.com/google_containers/coredns:1.6.5 # docker tag registry.aliyuncs.com/google_containers/coredns:1.6.5 k8s.gcr.io/coredns:1.6.5 # docker rmi registry.aliyuncs.com/google_containers/coredns:1.6.5
还有一个方法
直接在kubeadm-config.yaml中指定镜像地址
将imageRepository: k8s.gcr.io
改为imageRepository: registry.aliyuncs.com/google_containers
即可
4.查看k8s的相关报错日志
看系统日志 cat /var/log/messages 用kubectl 查看日志 # 注意:使用Kubelet describe 查看日志,一定要带上 命名空间,否则会报如下错误 [root@node2 ~]# kubectl describe pod coredns-6c65fc5cbb-8ntpv Error from server (NotFound): pods "coredns-6c65fc5cbb-8ntpv" not found kubectl describe pod kubernetes-dashboard-849cd79b75-s2snt --namespace kube-system kubectl logs -f pods/monitoring-influxdb-fc8f8d5cd-dbs7d -n kube-system kubectl logs --tail 200 -f kube-apiserver -n kube-system |more kubectl logs --tail 200 -f podname -n jenkins 用journalctl查看日志非常管用 journalctl -u kube-scheduler journalctl -xefu kubelet journalctl -u kube-apiserver journalctl -u kubelet |tail journalctl -xe 用docker查看日志 docker logs c36c56e4cfa3 (容器id)
5.k8s异常:failed to get container info for “/system.slice/docker.service
我是用报错日志查看了kubelet的相关日志,发现一直failed to get container info for “/system.slice/docker.service
错误,原来是因为k8s和docker版本问题
在10-kubeadm.conf文件中”KUBELET_CGROUP_ARGS”属性添加”--runtime-cgroups=/systemd/system.slice --kubelet-cgroups=/systemd/system.slice
” ,如果不存在”KUBELET_CGROUP_ARGS”属性则新增。
10-kubeadm.conf的路径网上提到的多为:/etc/systemd/system/kubelet.service.d/10-kubeadm.conf
,本人的机器路径为:/usr/lib/systemd/system/kubelet.service.d
# vi /etc/systemd/system/kubelet.service.d/10-kubeadm.conf [Service] Environment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf" Environment="KUBELET_SYSTEM_PODS_ARGS=--pod-manifest-path=/etc/kubernetes/manifests --allow-privileged=true" Environment="KUBELET_NETWORK_ARGS=--network-plugin=cni --cni-conf-dir=/etc/cni/net.d --cni-bin-dir=/opt/cni/bin" Environment="KUBELET_DNS_ARGS=--cluster-dns=10.96.0.10 --cluster-domain=cluster.local" Environment="KUBELET_AUTHZ_ARGS=--authorization-mode=Webhook --client-ca-file=/etc/kubernetes/pki/ca.crt" Environment="KUBELET_CADVISOR_ARGS=--cadvisor-port=0" Environment="KUBELET_CGROUP_ARGS=--cgroup-driver=systemd --runtime-cgroups=/systemd/system.slice --kubelet-cgroups=/systemd/system.slice" Environment="KUBELET_CERTIFICATE_ARGS=--rotate-certificates=true --cert-dir=/var/lib/kubelet/pki" ExecStart= ExecStart=/usr/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_SYSTEM_PODS_ARGS $KUBELET_NETWORK_ARGS $KUBELET_DNS_ARGS $KUBELET_AUTHZ_ARGS $KUBELET_CADVISOR_ARGS $KUBELET_CGROUP_ARGS $KUBELET_CERTIFICATE_ARGS $KUBELET_EXTRA_ARGS # systemctl daemon-reload # systemctl restart kubelet
6.CoreDNS 添加自定义DNS解析记录
修改coredns的dnsconfig
kubectl edit configmap coredns -n kube-system
apiVersion: v1
data:
Corefile: |
.:53 {
errors
health {
lameduck 5s
}
ready
kubernetes cluster.local in-addr.arpa ip6.arpa {
pods insecure
fallthrough in-addr.arpa ip6.arpa
ttl 30
}
hosts {
192.168.0.40 k8s-node04
fallthrough
}
prometheus :9153
forward . /etc/resolv.conf {
max_concurrent 1000
}
cache 30
loop
reload
loadbalance
}
7.k8s安装报错 Error: unknown flag: –experimental-upload-certs
unknown flag: –experimental-upload-certs,将–experimental-upload-certs 替换为 –upload-certs
8.k8s退出集群加入集群
主节点
1.先将节点设置为维护模式
kubectl drain nodename --delete-local-data --force --ignore-daemonsets nodename
2.删除节点
kubectl delete node nodename
3.获取加入token
kubeadm token create --ttl 0 --print-join-command
kubeadm join 192.168.0.10:6443 –token 8h5slq.k6oor7yi4gbo0vie –discovery-token-ca-cert-hash sha256:52d80ba826ca20b3cd6452626493735d5fa8f093113cb47e954840fc3586087b
从节点
1.停掉kubelet
systemctl stop kubelet
2.删除之前的相关文件
rm -rf /etc/kubernetes/*
kubeadm reset
iptables -F && iptables -t nat -F && iptables -t mangle -F && iptables -X
3.加入节点,按照主节点提供的Token加入
kubeadm join 192.168.0.10:6443 --token 8h5slq.k6oor7yi4gbo0vie --discovery-token-ca-cert-hash sha256:52d80ba826ca20b3cd6452626493735d5fa8f093113cb47e954840fc3586087b