diff --git a/probers/node/service/service.sh b/probers/node/service/service.sh index 933ed93..131e1b9 100644 --- a/probers/node/service/service.sh +++ b/probers/node/service/service.sh @@ -1,61 +1,91 @@ #!/bin/bash cluster_vendor=$(cat /netdata/dice-ops/dice-config/config.yaml | grep vendor | awk '{print $2}' 2>/dev/null) +cri_name=$(cat /netdata/dice-ops/dice-config/config.yaml|sed 's/ //g'|grep -E "^docker|^containerd"|awk -F: '{print $1}') is_cs=false +container_socket_path=$(systemctl status containerd|grep containerd.sock|head -1|awk '{print $NF}') if [[ "$cluster_vendor" == cs || "$cluster_vendor" == cs_managed || "$cluster_vendor" == edas ]]; then is_cs=true fi -## docker层面检查 -function check_docker_status() { - if systemctl is-active docker | grep '^active' > /dev/null 2>&1; then - echo host_dockerstatus ok + +# 检测docker or containerd 服务是否运行 +function check_cri_status() { + if systemctl is-active "$cri_name" | grep '^active' > /dev/null 2>&1; then + echo "host_$cri_name status ok" else - echo host_dockerstatus error "docker not running" + echo "host_$cri_name status error $cri_name not running" fi } +# 检测容器数量 function check_container_number() { - num=$(docker info -f '{{.Containers}}') + if [[ $cri_name == "docker" ]];then + num=$(docker info -f '{{.Containers}}') + else + num=$(ctr -a $container_socket_path -n k8s.io containers ls -q|wc -l) + fi if [[ $num -gt 200 ]]; then - echo host_container info "docker container(with exited) number should no more than 200" + echo host_container info "container(with exited) number should no more than 200" else echo host_container ok fi } +# 检测镜像数量 function check_image_number() { - num=$(docker info -f '{{.Images}}') + if [[ $cri_name == "docker" ]];then + + num=$(docker info -f '{{.Images}}') + else + num=$(ctr -a $container_socket_path -n k8s.io images ls -q|wc -l) + fi if [[ $num -gt 200 ]]; then - echo host_image warn "docker image number should no more than 200" + echo host_image warn "container image number should no more than 200" else echo host_image ok fi } - -function check_docker_dir() { - docker_data_dir=$(cat /netdata/dice-ops/dice-config/config.yaml | grep data_root: | grep -v "#" | awk -F":" '{print $2}' | sed 's/^\s*\|\s*$//g') - if [[ "$is_cs" == true ]]; then - docker_data_dir=${docker_data_dir:="/var/lib/docker"} - else - docker_data_dir=${docker_data_dir:="/data/docker/data"} - fi - - dataroot=$(docker info -f '{{.DockerRootDir}}') - if [[ $dataroot != $docker_data_dir ]]; then - if [[ "$cluster_vendor" == cs_managed && $dataroot == '/var/lib/docker' && $docker_data_dir == '/var/lib/container/docker' ]]; then - # cs_managed ack bind /var/lib/container/docker /var/lib/docker in /etc/fstab +# 检测数据目录 +function check_data_dir() { + if [[ "$cri_name" == "docker" ]];then + if [[ "$is_cs" == true ]]; then + docker_data_dir=${docker_data_dir:="/var/lib/docker"} + else + docker_data_dir=${docker_data_dir:="/data/docker/data"} + fi + dataroot=$(docker info -f '{{.DockerRootDir}}') + if [[ $dataroot != $docker_data_dir ]]; then + if [[ "$cluster_vendor" == cs_managed && $dataroot == '/var/lib/docker' && $docker_data_dir == '/var/lib/container/docker' ]]; then + # cs_managed ack bind /var/lib/container/docker /var/lib/docker in /etc/fstab + echo host_dockerdir ok + return + fi + echo host_dockerdir error "docker data-root should be '$docker_data_dir'" + return + fi echo host_dockerdir ok - return - fi + else + container_data_dir=$(cat /netdata/dice-ops/dice-config/config.yaml|grep state_path: |awk '{print $2}') + container_root=$(containerd config dump |tr -d ' '|grep state=|awk -F'"' '{print $2}') + if [[ $container_data_dir == $container_root ]];then + echo host_containerdir ok + return + else + if [[ "$cluster_vendor" == cs_managed && $container_root == "/run/containerd" ]];then + echo host_containerdir ok + return + fi + echo host_containerdir error "container data-root should be '$container_data_dir'" + + fi - echo host_dockerdir error "docker data-root should be '$docker_data_dir'" - return + fi - echo host_dockerdir ok } +# 检测kubelet服务状态 function check_kubelet_status() { if systemctl is-active kubelet | grep '^active' > /dev/null 2>&1; then echo host_kubeletstatus ok @@ -64,6 +94,7 @@ function check_kubelet_status() { fi } +# 检测防火墙是否 disabled function check_firewall() { if systemctl is-active firewalld >/dev/null 2>/dev/null; then echo host_firewall error "firewall should be disabled but not" @@ -91,16 +122,15 @@ function check_chronyd() { fi } -function check_docker_notify() { - if cat /etc/systemd/system/docker.service |grep 'Type=notify' >/dev/null 2>&1; then - echo docker_service_notify ok - else - if cat /etc/systemd/system/multi-user.target.wants/docker.service |grep 'Type=notify' >/dev/null 2>&1; then - echo docker_service_notify ok - else - echo docker_service_notify error "docker service is not Type=notify" - fi - fi + +function check_container_notify() { + if [ -f "/etc/systemd/system/$cri_name.service" ] && cat /etc/systemd/system/"$cri_name".service | grep 'Type=notify' >/dev/null 2>&1; then + echo "$cri_name"_service_notify ok + elif [ -f "/etc/systemd/system/multi-user.target.wants/$cri_name" ] && cat /etc/systemd/system/multi-user.target.wants/"$cri_name" |grep 'Type=notify' 2&1>/dev/null; then + echo "$cri_name"_service_notify ok + else + echo "$cri_name"_service_notify error "$cri_name service is not Type=notify" + fi } function check_kubelet_eviction_config() { @@ -138,14 +168,14 @@ function check_kubelet_eviction_soft_config() { } -check_docker_status +check_cri_status check_container_number check_image_number -check_docker_dir +check_data_dir check_kubelet_status check_firewall check_resolved check_chronyd -check_docker_notify +check_container_notify check_kubelet_eviction_config -check_kubelet_eviction_soft_config \ No newline at end of file +check_kubelet_eviction_soft_config