#!/bin/bash # # DESCRIPTION: analyze memory usage from oom-killer memory dumps # @desc analyze memory usage from oom-killer memory dumps # # @author David Winterstein # @version 2.0 # # @depends bc # @depends zcat # # @env $LOG_FILES list of files to analyze (default: /var/log/syslog) # @env $MIN_MEM minimum total memory usage in mib for procs to be listed (default: 64) # @env $SPLIT_USERS split process groups with the same name based on uid (default: false) # #depends=$(grep -Po "^\s*#\s+@depends\s+\K\S+" $(basename $0)) depends="bc zcat" for depend in $depends; do [[ ! -x $(which $depend 2>/dev/null) ]] \ && echo "ERROR: could not find \"$depend\" executable" \ && exit $LINENO done [[ -n $LOG_FILES ]] \ && files=$LOG_FILES \ || files="/var/log/syslog" [[ -n $MIN_MEM ]] \ && minimum_mem_usage_mib=$MIN_MEM \ || minimum_mem_usage_mib=64 minimum_mem_usage_kb=$(( $minimum_mem_usage_mib * 1049 )) for file in $files; do echo -en "\n$(tput bold)$file$(tput sgr 0)" [[ ! $file =~ \.gz$ ]] \ && content=$(cat $file) \ || content=$(zcat $file) oom_kill_start_lines=$(echo "$content" |grep -Pn '\S+\s+kernel:\s+\[\d+\.\d+\]\s+\[\s*pid\s*\]\s+uid\s+tgid\s+total_vm' |cut -d':' -f1) [[ -z $oom_kill_start_lines ]] \ && echo " | no oom-kills found" \ && continue number_of_oom_kills=$(echo "$oom_kill_start_lines" |wc -l) [[ $number_of_oom_kills -eq 1 ]] && s="" || s="s" echo " | $number_of_oom_kills oom-kill$s found" current_kill=0 for line in $oom_kill_start_lines; do let current_kill++ line++ oom_kill_dump=$(echo "$content" | sed -n "$line,\$p" | sed -r '/Out of memory: Kill(ed)? process/q') oom_kill_target_line=$(echo "$oom_kill_dump" | tail -n2) oom_kill_target_pid=$(echo "$oom_kill_target_line" | grep -Po "pid=\K[^,]+") [[ -z $oom_kill_target_pid ]] && oom_kill_target_pid=$(echo "$oom_kill_target_line" | grep -Poi "kill\s+process\s+\K\d+") oom_kill_target_name=$(echo "$oom_kill_target_line" | grep -Po "task=\K[^,]+") [[ -z $oom_kill_target_name ]] && oom_kill_target_name=$(echo "$oom_kill_target_line" | grep -Poi "kill\s+process\s+\d+\s+\(\K[^\)]+") oom_kill_target_cgroup=$(echo "$oom_kill_target_line" | grep -Po "task_memcg=\K[^,]+") [[ ${oom_kill_target_cgroup##*/} =~ ^[0-9]+$ ]] \ && oom_kill_target_cgroup_short=$(echo "$oom_kill_target_cgroup" | grep -Po "[^/]+(?=/[^/]+$)") \ || oom_kill_target_cgroup_short=${oom_kill_target_cgroup##*/} [[ -n $oom_kill_target_cgroup_short ]] \ && oom_kill_name="$(tput setaf 1)$oom_kill_target_name$(tput sgr 0)/$(tput setaf 0)$oom_kill_target_cgroup_short$(tput sgr 0)" \ || oom_kill_name="$(tput setaf 1)$oom_kill_target_name$(tput sgr 0)" oom_kill_timestamp=$(date -d "$(echo "$oom_kill_dump" | head -n1 | grep -Po "^\S+(\s+\S+){2}(?=\s+\S+\s+kernel)")" +%F\ %T) echo " #$current_kill @$(tput setaf 4)$oom_kill_timestamp$(tput sgr 0) (~ line $(tput setaf 5)$line$(tput sgr 0)) | killed pid: $(tput setaf 3)$oom_kill_target_pid$(tput sgr 0) [$oom_kill_name]" echo " procs consuming >= $(tput setaf 6)${minimum_mem_usage_mib}mib$(tput sgr 0) total:" procs=$(echo "$oom_kill_dump" |grep -Po "\S+\s+kernel:\s+\[\d+\.\d+\]\s+\[\s*\d+\s*\](\s+-?\d+){7}\s+\K\S+" |sort -Vu) { if [[ $SPLIT_USERS == "true" ]]; then echo "$(tput smul)proc,user,count,mem_total(mib),mem_total(gib),mem_average(mib)$(tput sgr 0)" else echo "$(tput smul)proc,count,mem_total(mib),mem_total(gib),mem_average(mib)$(tput sgr 0)" fi for proc in $procs; do if [[ $SPLIT_USERS == "true" ]]; then uids_of_these_procs=$(echo "$oom_kill_dump" |grep -Po "\S+\s+kernel:\s+\[\d+\.\d+\]\s+\[\s*\d+\s*\]\K(\s+-?\d+){7}\s+$proc" |awk '{print $1}' |sort -Vu) for uid_of_these_procs in $uids_of_these_procs; do user_of_these_procs=$(grep -Po "^[^:]+(?=:x:$uid_of_these_procs:)" /etc/passwd) number_of_these_procs=$(echo "$oom_kill_dump" |grep -Po "\S+\s+kernel:\s+\[\d+\.\d+\]\s+\[\s*\d+\s*\]\s+$uid_of_these_procs(\s+-?\d+){6}\s+$proc" |wc -l) mem_of_these_procs=$(echo "$oom_kill_dump" |grep -Po "\S+\s+kernel:\s+\[\d+\.\d+\]\s+\[\s*\d+\s*\]\K\s+$uid_of_these_procs(\s+-?\d+){6}\s+$proc" |awk '{print $3}') total_mem_of_these_procs_kb=0 for mem in $mem_of_these_procs; do [[ $mem =~ ^[0-9]+$ && $mem -gt 0 ]] \ && total_mem_of_these_procs_kb=$(( $total_mem_of_these_procs_kb + $mem )) done [[ $total_mem_of_these_procs_kb -lt $minimum_mem_usage_kb ]] \ && continue total_mem_of_these_procs_kib=$(printf "%.2f" $(echo "scale=2;$total_mem_of_these_procs_kb*1000/1024" |bc 2>/dev/null)) total_mem_of_these_procs_mib=$(printf "%.2f" $(echo "scale=2;$total_mem_of_these_procs_kib/1024" |bc 2>/dev/null)) total_mem_of_these_procs_gib=$(printf "%.2f" $(echo "scale=2;$total_mem_of_these_procs_kib/1024/1024" |bc 2>/dev/null)) average_mem_of_these_procs_mib=$(printf "%.2f" $(echo "scale=2;$total_mem_of_these_procs_mib/$number_of_these_procs" |bc)) printf "%s,%s,%5d,%14.2f,%14.2f,%16.2f\n" "$proc" "$user_of_these_procs" "$number_of_these_procs" "$total_mem_of_these_procs_mib" "$total_mem_of_these_procs_gib" "$average_mem_of_these_procs_mib" done else # uid_of_these_procs=$(echo "$oom_kill_dump" |grep -Po "\S+\s+kernel:\s+\[\d+\.\d+\]\s+\[\s*\d+\s*\]\K(\s+-?\d+){7}\s+$proc" |awk '{print $1}' |sort -Vu |head -n1) # user_of_these_procs=$(grep -Po "^[^:]+(?=:x:$uid_of_these_procs:)" /etc/passwd) number_of_these_procs=$(echo "$oom_kill_dump" |grep -Po "\S+\s+kernel:\s+\[\d+\.\d+\]\s+\[\s*\d+\s*\](\s+-?\d+){7}\s+$proc" |wc -l) mem_of_these_procs=$(echo "$oom_kill_dump" |grep -Po "\S+\s+kernel:\s+\[\d+\.\d+\]\s+\[\s*\d+\s*\]\K(\s+-?\d+){7}\s+$proc" |awk '{print $3}') total_mem_of_these_procs_kb=0 for mem in $mem_of_these_procs; do [[ $mem =~ ^[0-9]+$ && $mem -gt 0 ]] \ && total_mem_of_these_procs_kb=$(( $total_mem_of_these_procs_kb + $mem )) done [[ $total_mem_of_these_procs_kb -lt $minimum_mem_usage_kb ]] \ && continue total_mem_of_these_procs_kib=$(printf "%.2f" $(echo "scale=2;$total_mem_of_these_procs_kb*1000/1024" |bc 2>/dev/null)) total_mem_of_these_procs_mib=$(printf "%.2f" $(echo "scale=2;$total_mem_of_these_procs_kib/1024" |bc 2>/dev/null)) total_mem_of_these_procs_gib=$(printf "%.2f" $(echo "scale=2;$total_mem_of_these_procs_kib/1024/1024" |bc 2>/dev/null)) average_mem_of_these_procs_mib=$(printf "%.2f" $(echo "scale=2;$total_mem_of_these_procs_mib/$number_of_these_procs" |bc)) # printf "%s,%s,%5d,%14.2f,%14.2f,%16.2f\n" "$proc" "$user_of_these_procs" "$number_of_these_procs" "$total_mem_of_these_procs_mib" "$total_mem_of_these_procs_gib" "$average_mem_of_these_procs_mib" printf "%s,%5d,%14.2f,%14.2f,%16.2f\n" "$proc" "$number_of_these_procs" "$total_mem_of_these_procs_mib" "$total_mem_of_these_procs_gib" "$average_mem_of_these_procs_mib" fi done } |column -s"," -t |sort -brV -k4 |sed -r "s/^/ /g" [[ $current_kill -ne $number_of_oom_kills ]] && echo done done echo