Skip to content

Commit

Permalink
0.5-64
Browse files Browse the repository at this point in the history
  • Loading branch information
dimafern committed Jul 12, 2018
1 parent 7b4f9f1 commit fe6835d
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 31 deletions.
17 changes: 9 additions & 8 deletions hive/bin/agent
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ gpu_stats=
#nvidia_indexes=echo "$gpu_stats" | jq '.brand | to_entries[] | select(.value == "nvidia") | .key' | jq -sc '.'
#nvidia_indexes=`gpu-detect listjson | jq '. | to_entries[] | select(.value.brand == "nvidia") | .key' | jq -sc '.'`
#gpu_detect_json=`gpu-detect listjson`
if [[ ! -e /run/hive/gpu-detect.json ]]; then #this is needed only when upgrading version. later we can remove this
gpu-detect listjson > /run/hive/gpu-detect.json
fi
gpu_detect_json=`cat /run/hive/gpu-detect.json`
amd_indexes_array=`echo "$gpu_detect_json" | jq -c '[ . | to_entries[] | select(.value.brand == "amd") | .key ]'`
nvidia_indexes_array=`echo "$gpu_detect_json" | jq -c '[ . | to_entries[] | select(.value.brand == "nvidia") | .key ]'`
Expand All @@ -48,10 +51,8 @@ cpu_indexes_array=`echo "$gpu_detect_json" | jq -c '[ . | to_entries[] | select(
function reset_miner_vars() {
#reset all miner vars
MINER=
COIN=
for i in {1..32}; do
eval unset MINER$i
eval unset COIN$i
done
}

Expand Down Expand Up @@ -89,16 +90,14 @@ function loop {
local mindex
for mindex in {32..2}; do
eval minerval=\$MINER$mindex
eval coinval=\$COIN$mindex
if [[ ! -z $minerval ]]; then
miner_stats $minerval $mindex
eval khs$mindex=\$khs
miner_json=$(jq -n -c \
--arg miner$mindex "$minerval" \
--arg coin$mindex "$coinval" \
--arg total_khs$mindex "$khs" \
--argjson miner_stats$mindex "$stats" \
'{"params": {$miner'$mindex', $coin'$mindex', $total_khs'$mindex', $miner_stats'$mindex'}}')
'{"params": {$miner'$mindex', $total_khs'$mindex', $miner_stats'$mindex'}}')
eval miner${mindex}_json="\$miner_json"
fi
done
Expand All @@ -125,24 +124,26 @@ function loop {
#echo $stats
#uptime: load averages for the past 1, 5, and 15 minutes

[[ ! -z $META ]] && meta="$META" || meta='null'
request=$(
jq -n \
--arg rig_id "$RIG_ID" \
--arg passwd "$RIG_PASSWD" \
--arg miner "$MINER" \
--arg coin "$COIN" \
--argjson meta "$meta" \
--argjson miner_stats "$stats" \
--arg total_khs "$khs" \
--argjson temp "$temp" \
--argjson fan "$fan" \
--argjson power "$power" \
--arg df "`df -h / | awk '{ print $4 }' | tail -n 1 | sed 's/%//'`" \
--argjson mem "`free -m | grep 'Mem' | awk '{print "["$2","$4"]"}'`" \
--argjson cpuavg "[`uptime | awk -F': ' '{print $2}'`]" \
'{
"method": "stats", "jsonrpc": "2.0", "id": 0,
"params": {
$rig_id, $passwd, $miner, $coin, $miner_stats, $total_khs,
$temp, $fan, $power, $df, $cpuavg
$rig_id, $passwd, $miner, $meta, $miner_stats, $total_khs,
$temp, $fan, $power, $df, $mem, $cpuavg
}
}'
)
Expand Down
8 changes: 5 additions & 3 deletions hive/bin/agent.miner_stats.sh
Original file line number Diff line number Diff line change
Expand Up @@ -193,12 +193,13 @@ function miner_stats {
khs=`echo $stats_raw | jq '.result[].sol_ps' | awk '{s+=$1} END {print s/1000}'`
local uptime=$(( `date +%s` - $(stat -c%X /proc/`pidof zm`) )) #dont think zm will die so soon after getting stats
# local fan=$(jq -c "[.fan$nvidia_indexes_array]" <<< $gpu_stats)
local temp=$(jq -c "[.temp$nvidia_indexes_array]" <<< $gpu_stats)
# local temp=$(jq -c "[.temp$nvidia_indexes_array]" <<< $gpu_stats)
local temp=$(jq -c '[.result[].temperature]' <<< "$stats_raw")
local ac=`echo $stats_raw | jq '[.result[].accepted_shares] | add'`
local rj=`echo $stats_raw | jq '[.result[].rejected_shares] | add'`

#All fans speed array
local fan=$(jq -r ".fan | .[]" <<< $gpu_stats)
local fan=$(jq -r ".fan | .[]" <<< "$gpu_stats")
#DSTM's busid array
local bus_id_array=$(jq -r '.result[].gpu_pci_bus_id' <<< "$stats_raw")
#All busid array
Expand All @@ -219,7 +220,8 @@ function miner_stats {
done
done

stats=$(jq --argjson temp "$temp" --argjson fan "`echo "${fans_array[@]}" | jq -s . | jq -c .`" --arg uptime "$uptime" --arg ac "$ac" --arg rj "$rj" \
stats=$(jq --argjson temp "$temp" --argjson fan "`echo "${fans_array[@]}" | jq -s . | jq -c .`" \
--arg uptime "$uptime" --arg ac "$ac" --arg rj "$rj" \
'{ hs: [.result[].sol_ps], $temp, $fan, $uptime, ar: [$ac, $rj] }' <<< "$stats_raw")
fi
;;
Expand Down
3 changes: 2 additions & 1 deletion hive/bin/hello
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,11 @@ jq -n \
--arg manufacturer "$mb_manufacturer" --arg product "$mb_product" \
--arg model "$cpu_model" --arg cores "$cpu_cores" --arg aes "$aes" \
--arg kernel "$kernel" \
--arg server_url "$HIVE_HOST_URL" \
'{
"method": "hello", "jsonrpc": "2.0", "id": 0,
"params": {
$rig_id, $passwd, $farm_hash,
$rig_id, $passwd, $farm_hash, $server_url,
$boot_time, $boot_event, $ip, $net_interfaces, $openvpn,
$gpu, $gpu_count_amd, $gpu_count_nvidia,
"mb": {$manufacturer, $product}, "cpu": {$model, $cores, $aes},
Expand Down
63 changes: 44 additions & 19 deletions hive/sbin/autofan
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ DEF_REBOOT_ON_ERROR=0

#we will restart miner only it was stopped by this script
miner_stopped_by_overheat=0
#flag that the message was sent
unable_to_set_fan_speed=0
#flag if we met 511, 460, etc
temperature_is_unreal=0
#unparsable data
error_in_temp_readings=0

###
# Log write
Expand Down Expand Up @@ -126,12 +132,19 @@ get_fan_speed () {
#checking var is natural number
if [[ -z "${cur_temp##*[!0-9]*}" || -z "${cur_fan_speed##*[!0-9]*}" ]]; then
if [[ $REBOOT_ON_ERROR == 1 ]]; then
local msg="Autofan: GPU error, rebooting"
local msg="Autofan: error in temp readings, rebooting"
local payload=`cat $gpu_stats_json | jq -r -C .`
echo "$payload" | message warning "$msg" payload
nohup bash -c 'sreboot' > /tmp/nohup.log 2>&1 &
else break
else
if [[ $error_in_temp_readings == 0 ]]; then
local msg="Autofan: error in temp readings"
local payload=`cat $gpu_stats_json | jq -r -C .`
echo "$payload" | message warning "$msg" payload
error_in_temp_readings=1
fi
fi
break
fi
#

Expand Down Expand Up @@ -248,21 +261,30 @@ check_overheat() {
local t
for t in ${temperatures_array[@]}; do
#reboot on driver error
if [[ $REBOOT_ON_ERROR == 1 && $t -gt 120 ]]; then
local msg="GPU temperature $t is unreal, driver error"
local payload=`cat $gpu_stats_json | jq -r -C .`
echo "$payload" | message warning "$msg"
nohup bash -c 'sreboot' > /tmp/nohup.log 2>&1 &
if [[ $t -gt 120 ]]; then
if [[ $REBOOT_ON_ERROR == 1 ]]; then
local msg="GPU temperature $t is unreal, driver error, rebooting"
local payload=`cat $gpu_stats_json | jq -r -C .`
echo "$payload" | message warning "$msg" payload
nohup bash -c 'sreboot' > /tmp/nohup.log 2>&1 &
else
if [[ $temperature_is_unreal == 0 ]]; then
local msg="GPU temperature $t is unreal, driver error"
local payload=`cat $gpu_stats_json | jq -r -C .`
echo "$payload" | message warning "$msg" payload
temperature_is_unreal=1
fi
fi
break
fi

#stop on CRITICAL_TEMP
if [[ $t -ge $CRITICAL_TEMP && $t -ne 511 ]]; then
if [[ $t -ge $CRITICAL_TEMP && $t -le 120 ]]; then #do not process temp 511, 460, etc
miner_stopped_by_overheat=1
do_miner_action "miner-stop"
local msg="GPU reached ${CRITICAL_TEMP}°C, mining stopped"
local payload=`cat $gpu_stats_json | jq -r -C .`
echo "$payload" | message warning "$msg"
echo "$payload" | message warning "$msg" payload
break
fi
done
Expand Down Expand Up @@ -302,8 +324,7 @@ nvidia_auto_fan_control () {
args=
#set start index
i=0
for index in ${nvidia_indexes_array[@]}
do
for index in ${nvidia_indexes_array[@]}; do
# TODO Theese fields maybe moved inside `get_fan_speed` replaced by on nvidia_indexes_array[@] as argument
local gpu_temperature=${temperatures_array[index]}
local gpu_temperature_previous=${temperatures_array_previous[index]}
Expand All @@ -321,14 +342,18 @@ nvidia_auto_fan_control () {
#[[ -n $args ]] && nvidia-settings $args > /dev/null 2>&1
if [[ -n $args ]]; then
timeout 30 nvidia-settings $args > /dev/null 2>&1
if [[ $? -ne 0 && $REBOOT_ON_ERROR == 1 ]]; then
local msg="Autofan: unable to set fan speed, rebooting"
message warning "$msg"
nohup bash -c 'sreboot' > /tmp/nohup.log 2>&1 &
fi
if [[ $? -ne 0 && $REBOOT_ON_ERROR == 0 ]]; then
local msg="Autofan: unable to set fan speed"
message warning "$msg"
if [[ $? -ne 0 ]]; then
if [[ $REBOOT_ON_ERROR == 1 ]]; then
local msg="Autofan: unable to set fan speed, rebooting"
message warning "$msg"
nohup bash -c 'sreboot' > /tmp/nohup.log 2>&1 &
else
if [[ $unable_to_set_fan_speed == 0 ]]; then
local msg="Autofan: unable to set fan speed"
message warning "$msg"
fi
fi
unable_to_set_fan_speed=1
fi
fi
}
Expand Down

0 comments on commit fe6835d

Please sign in to comment.