# Copyright 2012 The Chromium OS Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. description "Temporary, quick-hack metrics collection & thermal daemon" author "chromium-os-dev@chromium.org" # This is for quickly adding UMA stats that we may need for # short-term experiments, when we don't have the time to add # stuff to metrics_daemon. That's where it should go in the # long term. # # This is also currently doing a userland thermal loop to allow # for quick experimentation. This thermal loop will eventually # move to the BIOS once the data from experiments help prove its # efficacy. start on started system-services stop on stopping system-services respawn script TEMP_OFFSET=273 # difference between K (reported by EC) and C (used in UMA) # Thermal loop fields CPU_MAX_FREQ_FIELD=1 CPU_MIN_FREQ_FIELD=2 GPU_MAX_FREQ_FIELD=3 CPU_DUTY_CYCLE_FIELD=4 PKG_POWER_LIMIT_FIELD=5 # Thermal loop steps all_steps=" 1801000 800000 1150 0 0x180aa00dd8088 # no throttling 1801000 800000 1150 0 0x180aa00dd8080 # cap pkg to 16W 1801000 800000 1150 0 0x180aa00dd8078 # cap pkg to 15W 1801000 800000 1150 0 0x180aa00dd8070 # cap pkg to 14W 1801000 800000 1150 0 0x180aa00dd8068 # cap pkg to 13W 1800000 800000 900 0 0x180aa00dd8068 # disable turbo 1600000 800000 800 0 0x180aa00dd8068 # cap CPU & GPU frequency 1400000 800000 700 0 0x180aa00dd8068 # cap CPU & GPU frequency 1200000 800000 600 0 0x180aa00dd8068 # cap CPU & GPU frequency 1000000 800000 500 0 0x180aa00dd8068 # cap CPU & GPU frequency 800000 800000 400 0 0x180aa00dd8068 # cap CPU & GPU frequency 800000 800000 350 0 0x180aa00dd8068 # cap CPU & GPU frequency 800000 800000 350 0x1c 0x180aa00dd8068 # duty cycle CPU 800000 800000 350 0x18 0x180aa00dd8068 # duty cycle CPU " max_steps=$(($(echo "$all_steps" | wc -l) - 3)) get_step() { row=$(($1 + 2)) out=$(echo "$all_steps" | awk "{if (NR==$row) print}") echo "$out" } get_field() { out=$(echo "$2" | awk "{print \$$1}") echo $out } get_peci_temp() { tempk=$(ectool temps 9 | sed 's/[^0-9]//g') tempc=$((tempk - $TEMP_OFFSET)) echo $tempc } get_sensor_temp() { s=$1 tempc=0 if out=$(ectool temps $s); then tempk=$(echo $out | sed 's/[^0-9]//g') tempc=$((tempk - $TEMP_OFFSET)) fi echo $tempc } get_sensor_list() { # USB C-Object: 1 or 13 # PCH D-Object: 3 # Hinge C-Object: 5 or 15 # Charger D-Object: 7 if ectool tempsinfo 1 | grep -q "USB C-Object"; then usb_c_object=1 else usb_c_object=13 fi charger_d_object=7 echo $usb_c_object $charger_d_object } set_calibration_data() { B0='-2.94e-5' B1='-5.7e-7' B2='4.63e-9' USB_C_S0='2.712e-14' PCH_D_S0='9.301e-14' HINGE_C_S0='-11.000e-14' CHARGER_D_S0='5.141e-14' # Note that the sensor numbering is different between the ectool tmp006 # and temps/tempsinfo commands. USB_C="0 $USB_C_S0 $B0 $B1 $B2" PCH_D="1 $PCH_D_S0 $B0 $B1 $B2" HINGE_C="2 $HINGE_C_S0 $B0 $B1 $B2" CHARGER_D="3 $CHARGER_D_S0 $B0 $B1 $B2" for i in "$USB_C" "$PCH_D" "$HINGE_C" "$CHARGER_D"; do # Add "--" otherwise ectool will barf when trying to parse negative # coefficients. ectool tmp006cal -- $i done } max_skin_temp=0 sensor_temperatures= get_max_skin_temp() { sensor_temperatures= max_skin_temp=0 for i in $*; do t=$(get_sensor_temp $i) sensor_temperatures=$sensor_temperatures$i:$t: if [ $t -gt $max_skin_temp ]; then max_skin_temp=$t fi done # Record the PECI CPU temperature also. i=9 t=$(get_sensor_temp $i) sensor_temperatures=$sensor_temperatures$i:$t: } set_cpu_freq() { max_freq=$1 min_freq=$2 for cpu in /sys/devices/system/cpu/cpu?/cpufreq; do echo 800000 > $cpu/scaling_min_freq echo 800000 > $cpu/scaling_max_freq echo $max_freq > $cpu/scaling_max_freq echo $min_freq > $cpu/scaling_min_freq done } set_gpu_min_freq() { GPU_MIN_FREQ=450 echo $GPU_MIN_FREQ > /sys/kernel/debug/dri/0/i915_min_freq } set_gpu_max_freq() { gpu_max_freq=$1 if [ $GPU_MIN_FREQ -gt $gpu_max_freq ]; then gpu_max_freq=$GPU_MIN_FREQ fi echo $gpu_max_freq > /sys/kernel/debug/dri/0/i915_max_freq } set_duty_cycle() { duty_cycle=$1 for i in 0 1 2 3; do iotools wrmsr $i 0x19a $duty_cycle done } set_pkg_power_limit() { pwr_limit=$1 iotools wrmsr 0 0x610 $pwr_limit } log_message() { logger -t temp_metrics "$*" } TEMP_THRESHOLD_1=38 TEMP_THRESHOLD_1_WM=40 TEMP_THRESHOLD_2=45 TEMP_THRESHOLD_2_WM=47 TEMP_THRESHOLD_3=50 TEMP_THRESHOLD_3_WM=50 TEMP_THRESHOLD_0_MIN_STEP=0 TEMP_THRESHOLD_0_MAX_STEP=0 TEMP_THRESHOLD_1_MIN_STEP=1 TEMP_THRESHOLD_1_MAX_STEP=5 TEMP_THRESHOLD_2_MIN_STEP=6 TEMP_THRESHOLD_2_MAX_STEP=9 TEMP_THRESHOLD_3_MIN_STEP=10 TEMP_THRESHOLD_3_MAX_STEP=13 current_step=1 new_step=0 thermal_loop() { # Hack to reset turbo activation threshold since BIOS can change it # underneath us. iotools wrmsr 0 0x64c 0x12 skin_temp=$1 if [ $skin_temp -gt $TEMP_THRESHOLD_3 ]; then temp_watermark=$TEMP_THRESHOLD_3_WM min_step=$TEMP_THRESHOLD_3_MIN_STEP max_step=$TEMP_THRESHOLD_3_MAX_STEP elif [ $skin_temp -gt $TEMP_THRESHOLD_2 ]; then temp_watermark=$TEMP_THRESHOLD_2_WM min_step=$TEMP_THRESHOLD_2_MIN_STEP max_step=$TEMP_THRESHOLD_2_MAX_STEP elif [ $skin_temp -gt $TEMP_THRESHOLD_1 ]; then temp_watermark=$TEMP_THRESHOLD_1_WM min_step=$TEMP_THRESHOLD_1_MIN_STEP max_step=$TEMP_THRESHOLD_1_MAX_STEP else temp_watermark=0 min_step=$TEMP_THRESHOLD_0_MIN_STEP max_step=$TEMP_THRESHOLD_0_MAX_STEP fi if [ $skin_temp -gt $temp_watermark ]; then if [ $current_step -ne $max_step ]; then new_step=$(($current_step + 1)) fi elif [ $skin_temp -lt $temp_watermark ]; then if [ $current_step -gt $min_step ]; then new_step=$(($current_step - 1)) fi else new_step=$current_step fi if [ $new_step -gt $max_step ]; then new_step=$max_step elif [ $new_step -lt $min_step ]; then new_step=$min_step fi if [ $new_step -eq $current_step ]; then return fi current_step=$new_step step=$(get_step $new_step) log_message "Throttling (temps: $sensor_temperatures):" $step cpu_max_freq=$(get_field $CPU_MAX_FREQ_FIELD "$step") cpu_min_freq=$(get_field $CPU_MIN_FREQ_FIELD "$step") gpu_max_freq=$(get_field $GPU_MAX_FREQ_FIELD "$step") cpu_duty_cycle=$(get_field $CPU_DUTY_CYCLE_FIELD "$step") pkg_power_limit=$(get_field $PKG_POWER_LIMIT_FIELD "$step") set_cpu_freq $cpu_max_freq $cpu_min_freq set_gpu_max_freq $gpu_max_freq set_duty_cycle $cpu_duty_cycle set_pkg_power_limit $pkg_power_limit } get_fan_rpm() { echo $(ectool pwmgetfanrpm | sed 's/[^0-9]//g') } set_fan_rpm() { ectool pwmsetfanrpm $1 } reset_fan_thresholds() { temp_low1=105 temp_low2=105 temp_low3=105 temp_low4=105 temp_low5=105 temp_low6=105 } last_rpm=10 temp_low1=105 temp_low2=105 temp_low3=105 temp_low4=105 temp_low5=105 temp_low6=105 fan_loop() { skin_temp=$1 if [ $skin_temp -gt 48 ] || [ $skin_temp -gt $temp_low1 ]; then rpm=9300 reset_fan_thresholds temp_low1=46 elif [ $skin_temp -gt 44 ] || [ $skin_temp -gt $temp_low2 ]; then rpm=8000 reset_fan_thresholds temp_low2=43 elif [ $skin_temp -gt 42 ] || [ $skin_temp -gt $temp_low3 ]; then rpm=7000 reset_fan_thresholds temp_low3=41 elif [ $skin_temp -gt 40 ] || [ $skin_temp -gt $temp_low4 ]; then rpm=5500 reset_fan_thresholds temp_low4=39 elif [ $skin_temp -gt 38 ] || [ $skin_temp -gt $temp_low5 ]; then rpm=4000 reset_fan_thresholds temp_low5=34 elif [ $skin_temp -gt 33 ] || [ $skin_temp -gt $temp_low6 ]; then rpm=3000 reset_fan_thresholds temp_low6=30 else rpm=0 reset_fan_thresholds fi # During S0->S3->S0 transitions, the EC sets the fan RPM to 0. This script # isn't aware of such transitions. Read the current fan RPM again to see # if it got set to 0. Note that comparing the current fan RPM against last # requested RPM won't suffice since the actual fan RPM may not be exactly # what was requested. cur_rpm=$(get_fan_rpm) if ([ $cur_rpm -ne 0 ] && [ $last_rpm -eq $rpm ]) || \ ([ $cur_rpm -eq 0 ] && [ $rpm -eq 0 ]); then last_rpm=$rpm return fi log_message "Setting fan RPM (temps: $sensor_temperatures): $last_rpm -> $rpm" last_rpm=$rpm set_fan_rpm $rpm } # Thermal zone 1 is for operating systems where a userland thermal loop # doesn't exist. Disable it. if [ -e /sys/class/thermal/thermal_zone1/mode ]; then echo -n 'disabled' > /sys/class/thermal/thermal_zone1/mode fi # Enable the fan in case no other code has enabled it. ectool fanduty 0 # Get list of sensors to monitor. sensor_list=$(get_sensor_list) # Set sensor calibration data. set_calibration_data # Set minimum GPU frequency. set_gpu_min_freq loop_count=0 ec_fan_loop=0 while true; do sleep 10 loop_count=$(($loop_count + 1)) # Read the max skin temperature. get_max_skin_temp $sensor_list if [ $max_skin_temp -eq 0 ]; then if [ $ec_fan_loop -eq 0 ]; then log_message "Invalid max skin temp. Switching to EC fan loop." ectool autofanctrl ec_fan_loop=1 last_rpm=10 fi else # Run the fan loop. fan_loop $max_skin_temp ec_fan_loop=0 # Run the thermal loop. thermal_loop $max_skin_temp fi # Report the metrics once every 30 seconds. if [ $loop_count -lt 3 ]; then continue fi loop_count=0 ectool temps all | while read line; do index=$(printf "%02d" "${line%%:*}") tempk="${line##* }" tempc=$(($tempk - $TEMP_OFFSET)) # ignore values below freezing if [ $tempc -lt 0 ]; then tempc=0 fi # Use a linear histogram with 1 C buckets starting at 0. N_SLOTS=180 metrics_client -e Platform.Temperature.Sensor$index $tempc $N_SLOTS done done end script