Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kubeprober兼容docker和containerd的检测 #160

Merged
merged 2 commits into from
Jul 6, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 71 additions & 41 deletions probers/node/service/service.sh
Original file line number Diff line number Diff line change
@@ -1,61 +1,91 @@
#!/bin/bash

cluster_vendor=$(cat /netdata/dice-ops/dice-config/config.yaml | grep vendor | awk '{print $2}' 2>/dev/null)
cri_name=$(cat /netdata/dice-ops/dice-config/config.yaml|sed 's/ //g'|grep -E "^docker|^containerd"|awk -F: '{print $1}')
is_cs=false
container_socket_path=$(systemctl status containerd|grep containerd.sock|head -1|awk '{print $NF}')
if [[ "$cluster_vendor" == cs || "$cluster_vendor" == cs_managed || "$cluster_vendor" == edas ]]; then
is_cs=true
fi

## docker层面检查
function check_docker_status() {
if systemctl is-active docker | grep '^active' > /dev/null 2>&1; then
echo host_dockerstatus ok

# 检测docker or containerd 服务是否运行
function check_cri_status() {
if systemctl is-active "$cri_name" | grep '^active' > /dev/null 2>&1; then
echo "host_$cri_name status ok"
else
echo host_dockerstatus error "docker not running"
echo "host_$cri_name status error $cri_name not running"
fi
}

# 检测容器数量
function check_container_number() {
num=$(docker info -f '{{.Containers}}')
if [[ $cri_name == "docker" ]];then
num=$(docker info -f '{{.Containers}}')
else
num=$(ctr -a $container_socket_path -n k8s.io containers ls -q|wc -l)
fi
if [[ $num -gt 200 ]]; then
echo host_container info "docker container(with exited) number should no more than 200"
echo host_container info "container(with exited) number should no more than 200"
else
echo host_container ok
fi
}

# 检测镜像数量
function check_image_number() {
num=$(docker info -f '{{.Images}}')
if [[ $cri_name == "docker" ]];then

num=$(docker info -f '{{.Images}}')
else
num=$(ctr -a $container_socket_path -n k8s.io images ls -q|wc -l)
fi
if [[ $num -gt 200 ]]; then
echo host_image warn "docker image number should no more than 200"
echo host_image warn "container image number should no more than 200"
else
echo host_image ok
fi
}


function check_docker_dir() {
docker_data_dir=$(cat /netdata/dice-ops/dice-config/config.yaml | grep data_root: | grep -v "#" | awk -F":" '{print $2}' | sed 's/^\s*\|\s*$//g')
if [[ "$is_cs" == true ]]; then
docker_data_dir=${docker_data_dir:="/var/lib/docker"}
else
docker_data_dir=${docker_data_dir:="/data/docker/data"}
fi

dataroot=$(docker info -f '{{.DockerRootDir}}')
if [[ $dataroot != $docker_data_dir ]]; then
if [[ "$cluster_vendor" == cs_managed && $dataroot == '/var/lib/docker' && $docker_data_dir == '/var/lib/container/docker' ]]; then
# cs_managed ack bind /var/lib/container/docker /var/lib/docker in /etc/fstab
# 检测数据目录
function check_data_dir() {
if [[ "$cri_name" == "docker" ]];then
if [[ "$is_cs" == true ]]; then
docker_data_dir=${docker_data_dir:="/var/lib/docker"}
else
docker_data_dir=${docker_data_dir:="/data/docker/data"}
fi
dataroot=$(docker info -f '{{.DockerRootDir}}')
if [[ $dataroot != $docker_data_dir ]]; then
if [[ "$cluster_vendor" == cs_managed && $dataroot == '/var/lib/docker' && $docker_data_dir == '/var/lib/container/docker' ]]; then
# cs_managed ack bind /var/lib/container/docker /var/lib/docker in /etc/fstab
echo host_dockerdir ok
return
fi
echo host_dockerdir error "docker data-root should be '$docker_data_dir'"
return
fi
echo host_dockerdir ok
return
fi
else
container_data_dir=$(cat /netdata/dice-ops/dice-config/config.yaml|grep state_path: |awk '{print $2}')
container_root=$(containerd config dump |tr -d ' '|grep state=|awk -F'"' '{print $2}')
if [[ $container_data_dir == $container_root ]];then
echo host_containerdir ok
return
else
if [[ "$cluster_vendor" == cs_managed && $container_root == "/run/containerd" ]];then
echo host_containerdir ok
return
fi
echo host_containerdir error "container data-root should be '$container_data_dir'"

fi

echo host_dockerdir error "docker data-root should be '$docker_data_dir'"
return

fi
echo host_dockerdir ok
}

# 检测kubelet服务状态
function check_kubelet_status() {
if systemctl is-active kubelet | grep '^active' > /dev/null 2>&1; then
echo host_kubeletstatus ok
Expand All @@ -64,6 +94,7 @@ function check_kubelet_status() {
fi
}

# 检测防火墙是否 disabled
function check_firewall() {
if systemctl is-active firewalld >/dev/null 2>/dev/null; then
echo host_firewall error "firewall should be disabled but not"
Expand Down Expand Up @@ -91,16 +122,15 @@ function check_chronyd() {
fi
}

function check_docker_notify() {
if cat /etc/systemd/system/docker.service |grep 'Type=notify' >/dev/null 2>&1; then
echo docker_service_notify ok
else
if cat /etc/systemd/system/multi-user.target.wants/docker.service |grep 'Type=notify' >/dev/null 2>&1; then
echo docker_service_notify ok
else
echo docker_service_notify error "docker service is not Type=notify"
fi
fi

function check_container_notify() {
if [ -f "/etc/systemd/system/$cri_name.service" ] && cat /etc/systemd/system/"$cri_name".service | grep 'Type=notify' >/dev/null 2>&1; then
echo "$cri_name"_service_notify ok
elif [ -f "/etc/systemd/system/multi-user.target.wants/$cri_name" ] && cat /etc/systemd/system/multi-user.target.wants/"$cri_name" |grep 'Type=notify' 2&1>/dev/null; then
echo "$cri_name"_service_notify ok
else
echo "$cri_name"_service_notify error "$cri_name service is not Type=notify"
fi
}

function check_kubelet_eviction_config() {
Expand Down Expand Up @@ -138,14 +168,14 @@ function check_kubelet_eviction_soft_config() {
}


check_docker_status
check_cri_status
check_container_number
check_image_number
check_docker_dir
check_data_dir
check_kubelet_status
check_firewall
check_resolved
check_chronyd
check_docker_notify
check_container_notify
check_kubelet_eviction_config
check_kubelet_eviction_soft_config
check_kubelet_eviction_soft_config