kubernetes 创建pod沙箱失败:rpc错误:代码=未知说明=网络插件cni无法设置Pod网络

在k8s(v1.10)集群上创建Redis POD问题,POD创建卡在“ContainerCreating”

Type     Reason                  Age                   From                Message
  ----     ------                  ----                  ----                -------
  Normal   Scheduled               30m                   default-scheduler   Successfully assigned redis to k8snode02
  Normal   SuccessfulMountVolume   30m                   kubelet, k8snode02  MountVolume.SetUp succeeded for volume "default-token-f8tcg"
  Warning  FailedCreatePodSandBox  5m (x1202 over 30m)   kubelet, k8snode02  Failed create pod sandbox: rpc error: code = Unknown desc = NetworkPlugin cni failed to set up pod "redis_default" network: failed to find plugin "loopback" in path [/opt/loopback/bin /opt/cni/bin]
  Normal   SandboxChanged          47s (x1459 over 30m)  kubelet, k8snode02  Pod sandbox changed, it will be killed and re-created.



root@KubernetesMaster:/opt/cni/bin# kubectl get pods
NAME                   READY   STATUS              RESTARTS   AGE
nginx-5c7588df-5zds6   0/1     ContainerCreating   0          21m
root@KubernetesMaster:/opt/cni/bin# kubectl get nodes
NAME               STATUS   ROLES    AGE   VERSION
kubernetesmaster   Ready    master   26m   v1.13.4
kubernetesslave1   Ready    <none>   22m   v1.13.4

kubectl describe pods
Name:               nginx-5c7588df-5zds6
Namespace:          default
Priority:           0
PriorityClassName:  <none>
Node:               kubernetesslave1/
Start Time:         Sun, 17 Mar 2019 05:13:30 +0000
Labels:             app=nginx
Annotations:        <none>
Status:             Pending
Controlled By:      ReplicaSet/nginx-5c7588df
    Container ID:
    Image:          nginx
    Image ID:
    Port:           <none>
    Host Port:      <none>
    State:          Waiting
      Reason:       ContainerCreating
    Ready:          False
    Restart Count:  0
    Environment:    <none>
      /var/run/secrets/kubernetes.io/serviceaccount from default-token-qtfbs (ro)
  Type              Status
  Initialized       True
  Ready             False
  ContainersReady   False
  PodScheduled      True
    Type:        Secret (a volume populated by a Secret)
    SecretName:  default-token-qtfbs
    Optional:    false
QoS Class:       BestEffort
Node-Selectors:  <none>
Tolerations:     node.kubernetes.io/not-ready:NoExecute for 300s
                 node.kubernetes.io/unreachable:NoExecute for 300s
  Type     Reason                  Age                    From                       Message
  ----     ------                  ----                   ----                       -------
  Normal   Scheduled               18m                    default-scheduler          Successfully assigned default/nginx-5c7588df-5zds6 to kubernetesslave1
  Warning  FailedCreatePodSandBox  18m                    kubelet, kubernetesslave1  Failed create pod sandbox: rpc error: code = Unknown desc = failed to set up sandbox container "123d527490944d80f44b1976b82dbae5dc56934aabf59cf89f151736d7ea8adc" network for pod "nginx-5c7588df-5zds6": NetworkPlugin cni failed to set up pod "nginx-5c7588df-5zds6_default" network: stat /var/lib/calico/nodename: no such file or directory: check that the calico/node container is running and has mounted /var/lib/calico/
  Warning  FailedCreatePodSandBox  18m                    kubelet, kubernetesslave1  Failed create pod sandbox: rpc error: code = Unknown desc = failed to set up sandbox container "8cc5e62ebaab7075782c2248e00d795191c45906cc9579464a00c09a2bc88b71" network for pod "nginx-5c7588df-5zds6": NetworkPlugin cni failed to set up pod "nginx-5c7588df-5zds6_default" network: stat /var/lib/calico/nodename: no such file or directory: check that the calico/node container is running and has mounted /var/lib/calico/
  Warning  FailedCreatePodSandBox  18m                    kubelet, kubernetesslave1  Failed create pod sandbox: rpc error: code = Unknown desc = failed to set up sandbox container "30ffdeace558b0935d1ed3c2e59480e2dd98e983b747dacae707d1baa222353f" network for pod "nginx-5c7588df-5zds6": NetworkPlugin cni failed to set up pod "nginx-5c7588df-5zds6_default" network: stat /var/lib/calico/nodename: no such file or directory: check that the calico/node container is running and has mounted /var/lib/calico/
  Warning  FailedCreatePodSandBox  18m                    kubelet, kubernetesslave1  Failed create pod sandbox: rpc error: code = Unknown desc = failed to set up sandbox container "630e85451b6ce2452839c4cfd1ecb9acce4120515702edf29421c123cf231213" network for pod "nginx-5c7588df-5zds6": NetworkPlugin cni failed to set up pod "nginx-5c7588df-5zds6_default" network: stat /var/lib/calico/nodename: no such file or directory: check that the calico/node container is running and has mounted /var/lib/calico/
  Warning  FailedCreatePodSandBox  18m                    kubelet, kubernetesslave1  Failed create pod sandbox: rpc error: code = Unknown desc = failed to set up sandbox container "820b919b7edcfc3081711bb78b79d33e5be3f7dafcbad29fe46b6d7aa22227aa" network for pod "nginx-5c7588df-5zds6": NetworkPlugin cni failed to set up pod "nginx-5c7588df-5zds6_default" network: stat /var/lib/calico/nodename: no such file or directory: check that the calico/node container is running and has mounted /var/lib/calico/
  Warning  FailedCreatePodSandBox  18m                    kubelet, kubernetesslave1  Failed create pod sandbox: rpc error: code = Unknown desc = failed to set up sandbox container "abbfb5d2756f12802072039dec20ba52f546ae755aaa642a9a75c86577be589f" network for pod "nginx-5c7588df-5zds6": NetworkPlugin cni failed to set up pod "nginx-5c7588df-5zds6_default" network: stat /var/lib/calico/nodename: no such file or directory: check that the calico/node container is running and has mounted /var/lib/calico/
  Warning  FailedCreatePodSandBox  18m                    kubelet, kubernetesslave1  Failed create pod sandbox: rpc error: code = Unknown desc = failed to set up sandbox container "dfeb46ffda4d0f8a434f3f3af04328fcc4b6c7cafaa62626e41b705b06d98cc4" network for pod "nginx-5c7588df-5zds6": NetworkPlugin cni failed to set up pod "nginx-5c7588df-5zds6_default" network: stat /var/lib/calico/nodename: no such file or directory: check that the calico/node container is running and has mounted /var/lib/calico/
  Warning  FailedCreatePodSandBox  18m                    kubelet, kubernetesslave1  Failed create pod sandbox: rpc error: code = Unknown desc = failed to set up sandbox container "9ae3f47bb0282a56e607779d3267127ee8b0ae1d7f416f5a184682119203b1c8" network for pod "nginx-5c7588df-5zds6": NetworkPlugin cni failed to set up pod "nginx-5c7588df-5zds6_default" network: stat /var/lib/calico/nodename: no such file or directory: check that the calico/node container is running and has mounted /var/lib/calico/
  Warning  FailedCreatePodSandBox  18m                    kubelet, kubernetesslave1  Failed create pod sandbox: rpc error: code = Unknown desc = failed to set up sandbox container "546d07f1864728b2e2675c066775f94d658e221ada5fb4ed6bf6689ec7b8de23" network for pod "nginx-5c7588df-5zds6": NetworkPlugin cni failed to set up pod "nginx-5c7588df-5zds6_default" network: stat /var/lib/calico/nodename: no such file or directory: check that the calico/node container is running and has mounted /var/lib/calico/
  Normal   SandboxChanged          18m (x12 over 18m)     kubelet, kubernetesslave1  Pod sandbox changed, it will be killed and re-created.
  Warning  FailedCreatePodSandBox  3m39s (x829 over 18m)  kubelet, kubernetesslave1  (combined from similar events): Failed create pod sandbox: rpc error: code = Unknown desc = failed to set up sandbox container "f586be437843537a3082f37ad139c88d0eacfbe99ddf00621efd4dc049a268cc" network for pod "nginx-5c7588df-5zds6": NetworkPlugin cni failed to set up pod "nginx-5c7588df-5zds6_default" network: stat /var/lib/calico/nodename: no such file or directory: check that the calico/node container is running and has mounted /var/lib/calico/


root@kubernetesslave1:/home/ubuntu# docker ps
CONTAINER ID        IMAGE                  COMMAND                  CREATED             STATUS              PORTS               NAMES
5ad5500e8270        fadcc5d2b066           "/usr/local/bin/kube…"   3 minutes ago       Up 3 minutes                            k8s_kube-proxy_kube-proxy-f24gd_kube-system_4e2d313a-4873-11e9-a33a-06516e7d78c4_1
b1c9929ebe9e        k8s.gcr.io/pause:3.1   "/pause"                 3 minutes ago       Up 3 minutes                            k8s_POD_calico-node-749qx_kube-system_4e2d8c9c-4873-11e9-a33a-06516e7d78c4_1
ceb78340b563        k8s.gcr.io/pause:3.1   "/pause"                 3 minutes ago       Up 3 minutes                            k8s_POD_kube-proxy-f24gd_kube-system_4e2d313a-4873-11e9-a33a-06516e7d78c4_1
    root@kubernetesslave1:/home/ubuntu# docker ps
    CONTAINER ID        IMAGE                  COMMAND                  CREATED             STATUS                  PORTS               NAMES
    94b2994401d0        k8s.gcr.io/pause:3.1   "/pause"                 1 second ago        Up Less than a second                       k8s_POD_nginx-5c7588df-5zds6_default_677a722b-4873-11e9-a33a-06516e7d78c4_534
    5ad5500e8270        fadcc5d2b066           "/usr/local/bin/kube…"   4 minutes ago       Up 4 minutes                                k8s_kube-proxy_kube-proxy-f24gd_kube-system_4e2d313a-4873-11e9-a33a-06516e7d78c4_1
    b1c9929ebe9e        k8s.gcr.io/pause:3.1   "/pause"                 4 minutes ago       Up 4 minutes                                k8s_POD_calico-node-749qx_kube-system_4e2d8c9c-4873-11e9-a33a-06516e7d78c4_1
    ceb78340b563        k8s.gcr.io/pause:3.1   "/pause"                 4 minutes ago       Up 4 minutes                                k8s_POD_kube-proxy-f24gd_kube-system_4e2d313a-4873-11e9-a33a-06516e7d78c4_1
root@kubernetesslave1:/home/ubuntu# cd /etc/cni
root@kubernetesslave1:/etc/cni# ls -ltr
total 4
drwxr-xr-x 2 root root 4096 Mar 17 05:19 net.d
root@kubernetesslave1:/etc/cni# cd /opt/cni
root@kubernetesslave1:/opt/cni# ls -ltr
total 4
drwxr-xr-x 2 root root 4096 Mar 17 05:19 bin
root@kubernetesslave1:/opt/cni# cd bin
root@kubernetesslave1:/opt/cni/bin# ls -ltr
total 107440
-rwxr-xr-x 1 root root  3890407 Aug 17  2017 bridge
-rwxr-xr-x 1 root root  3475802 Aug 17  2017 ipvlan
-rwxr-xr-x 1 root root  3520724 Aug 17  2017 macvlan
-rwxr-xr-x 1 root root  3877986 Aug 17  2017 ptp
-rwxr-xr-x 1 root root  3475750 Aug 17  2017 vlan
-rwxr-xr-x 1 root root  9921982 Aug 17  2017 dhcp
-rwxr-xr-x 1 root root  2605279 Aug 17  2017 sample
-rwxr-xr-x 1 root root 32351072 Mar 17 05:19 calico
-rwxr-xr-x 1 root root 31490656 Mar 17 05:19 calico-ipam
-rwxr-xr-x 1 root root  2856252 Mar 17 05:19 flannel
-rwxr-xr-x 1 root root  3084347 Mar 17 05:19 loopback
-rwxr-xr-x 1 root root  3036768 Mar 17 05:19 host-local
-rwxr-xr-x 1 root root  3550877 Mar 17 05:19 portmap
-rwxr-xr-x 1 root root  2850029 Mar 17 05:19 tuning


确保/etc/cni/net.d及其友元/opt/cni/bin都存在,并且在 * 所有节点 * 上正确填充了CNI配置文件和二进制文件。具体到flannel,可以使用flannel cni repo



我在GCP上的GKE集群中使用了一个抢占式节点池,遇到了这个问题。多亏了检查/etc/cni/net.d完整性的@mdaniel tip,我可以使用命令gcloud compute ssh <name of some node> --zone <zone-of-cluster> --internal-ip通过ssh在测试集群的节点中再次重现这个问题。然后,我简单地编辑了文件/etc/cni/net.d/10-gke-ptp.conflist并修改了"routes": [ {"dst": ""} ]上的值(从0.0.0.0/0更改为1.0.0.0/0)。
之后,我删除了在其中运行的pod,它们都陷入了ContainerCreating状态,永远生成带有错误Failed create pod sandbox: rpc error: code...的kublet事件
从那以后,从GKE中删除节点解决了生产中的问题,我创建了一个Python脚本,列出集群上的所有事件,并过滤掉那些具有关键字"Failed create pod sandbox: rpc error: code"的事件。然后,我检查所有事件并获取它们的pod,然后从pod中,我得到了节点,最后我循环遍历这些节点,从Kubernetes API和Compute API的Python客户端中删除它们。对于Python脚本,我使用了库:例如kubernetesgoogle-cloud-compute


from kubernetes import client, config
from google.cloud.compute_v1.services.instances import InstancesClient

    'Failed to create pod sandbox'.lower()

v1 = client.CoreV1Api()

events_result = v1.list_event_for_all_namespaces()

filtered_events = []

# filter only the events containing ERROR_KEYWORDS
for event in events_result.items:
    for error_keyword in ERROR_KEYWORDS:
        if error_keyword in event.message.lower():

# gets the list of pods from those events
pods_list = {}

for event in filtered_events:
        pod = v1.read_namespaced_pod(

        pod_dict = {
            "name": event.involved_object.name,
            "namespace": event.involved_object.namespace,
            "node": pod.spec.node_name

        pods_list[event.involved_object.name] = pod_dict

    except Exception as e:

# Get the nodes from those pods
broken_nodes = set()

for name, pod_dict in pods_list.items():
    if pod_dict.get('node'):

broken_nodes = list(broken_nodes)

# Deletes the nodes from both Kubernetes API and Compute Engine API
if broken_nodes:
    broken_nodes_str = ", ".join(broken_nodes)
    print(f'BROKEN NODES: "{broken_nodes_str}"')
    for node in broken_nodes:

            api_response = v1.delete_node(node)
        except Exception as e:

            result = gcp_client.delete(project=PROJECT_ID, zone=CLUSTER_ZONE, instance=node)
        except Exception as e:


AWS EKS尚不支持t3a、m5ad和r5ad示例



kubectl drain node1 node2 --delete-local-data --force --ignore-daemonsets




当我在AWS EKS上添加PVC时,这个问题出现在我身上。
将aws-node CNI插件更新到最新版本解决了此问题-



1.初始化主节点kubeadm init --apiserver-advertisement-address
1.安装Pod网络“WeaveNet”kubectl应用-f“https://cloud.weave.works/k8s/net?k8s-version =$(kubectl版本|64进制|tr -d '\n')和环境IP分配范围=”



#Reset the kubernetes environment
#[root@centos8-Master: ~]# k get nodes
#NAME             STATUS   ROLES           AGE   VERSION
#centos8-master   Ready    control-plane   14m   v1.24.1
#centos8-slave    Ready    <none>          11m   v1.24.3

#Master Node
#1. Delete the nodes
#First delete all pods, deployments, svc 
#kubectl delete --all pods
#kubectl delete --all deployments
#kubectl delete --all svc
#kubectl drain centos8-slave --ignore-daemonsets --delete-emptydir-data --force
#kubectl delete node centos8-slave
#Worker Node
#2. Go to worker node, stop all the kubelet services.
#[root@centos8-Slave rprasads]# kubectl version --short
#Client Version: v1.24.3
#Kustomize Version: v4.5.4
#[root@centos8-Slave rprasads]# systemctl stop kubelet
#[root@centos8-Slave rprasads]# netstat -tulnp |grep kube
#kill -9 <pid> [kube-proxy]
#Master Node
#2. Reset the kubeadm.
#$ sudo kubeadm reset
#$ sudo swapoff -a 
#Master Node
#3. Get you kubeadm version
#[root@centos8-Master: ~]# kubectl version --short
#Flag --short has been deprecated, and will be removed in the future. The --short output will become the default.
#Client Version: v1.24.1
#Kustomize Version: v4.5.4
#Server Version: v1.24.3
#Master Node
#4.On Master Initialize the kubeadm with proper network address and version
#$ kubeadm init --apiserver-advertise-address= --pod-network-cidr=

##Download calico yaml file from the site: Refer the documentation https://projectcalico.docs.tigera.io/getting-started/kubernetes/self-managed-onprem/onpremises#install-calico-with-kubernetes-api-datastore-more-than-50-nodes
#$ curl https://projectcalico.docs.tigera.io/manifests/calico.yaml -O
#$ kubectl apply -f calico.yaml
#Worker Node
#5. Go to worker node and add the node with the command displayed.
# kubeadm join --token h0nuxq.zk9m731nc4ia93pq --discovery-token-ca-cert-hash sha256:1682644baf3433caeb0e6f9099ed487ef48b94ab6a0314df88e3ff42ae501a13
#Master Node 
#6.On the master node run below commands.
#$ sudo rm -rf $HOME/.kube
#$ mkdir -p $HOME/.kube
#$ sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
#$ sudo chown $(id -u):$(id -g) $HOME/.kube/config
#$ sudo systemctl enable docker.service
#$ sudo service kubelet restart
#$ kubectl get nodes
#Test your new kubernetes cluster environment.
#[root@centos8-Master: ~]# kubectl run nginx --image=nginx
#Wait for some time.
#[root@centos8-Master: ~]# k describe pods nginx
#Normal  Scheduled  21s   default-scheduler  Successfully assigned default/nginx to centos8-slave
#[root@centos8-Master: ~]# k get pods
#nginx   1/1     Running   0          25s
