Based on Earl Jew’s presentation on the 6th of October 2022, “Simplest starting tactic for Power10 AIX exploitation V1.2″. Excellent IBM POWER VUG materials can be found here:
https://ibm.ent.box.com/s/6hn2orvig4r2peu6e39owmp07ewyyf4x
And the meeting presentation replay here:
https://ibm.ent.box.com/s/5atsp26sxup1ob04c3urbkiovj8epbax
This is an analytic way to calculate the virtual processors (vCPU) and entitlement (eCPU) on IBM POWER LPARs based on actual load during busy periods of the system. It is a starting tactic, so rinse-and-repeat.
The script below crunches the numbers for Earl’s eCPU/vCPU calculations. To gather data, as it says at the beginning of the script, setup a cron job to run:
uptime | awk '{ print $(NF-2),$(NF-1),$NF }' | tr -d ',' >>/tmp/uptime.out
every twenty minutes or so, for three weeks (give or take), then run this script.
The script finds the highest 5-minute number, which gives the peak number of threads requesting CPU time, and rounds up to nearest vCPU * SMT# to go above that.
It also does a running calculation of the busiest 5 runs, equaling 1h40m, and calculates the eCPU needed based on the average of all 1-, 5- & 15-minute loads divided by the SMT# for the LPAR.
#!/bin/ksh93
###############################################
#
# eCPU/vCPU planning script
#
# 2022-10-26 Henrik Morsing 1.0 Initial
#
ver=1.0
# Run:
# uptime | awk '{ print $(NF-2),${NF-1),$NF }' | tr -d ',' >>/tmp/uptime.out
# every twenty minutes for three weeks
sum5=0
sum5_max=0
sum5_avg_max=0
max5=0
# DBG=1 # Uncomment for running calculation output
# Find SMT#?
if [[ "$(uname)" == "AIX" ]]
then
smt="$(smtctl | grep "SMT threads per processor" | awk '{ print $6 }')"
else
smt_temp="$(dmesg | grep "CPU maps initialized for" | awk '{ print $7 }')"
smt=${smt_temp:-8}
fi
echo
echo "Running on $(uname -n), ${smt} SMT system. Current load:"
uptime
echo
echo "Analysing $(wc -l /tmp/uptime.out) load entries."
[[ $(wc -l /tmp/uptime.out | cut -f 1 -d " ") -lt 1000 ]] && echo "(Is that enough?)"
echo
typeset -A last
while read m1 m5 m15
do
# Firstly store last five values
last[5][5]=${last[5][4]:-0}
last[5][4]=${last[5][3]:-0}
last[5][3]=${last[5][2]:-0}
last[5][2]=${last[5][1]:-0}
last[5][1]=${m5}
last[15][5]=${last[15][4]:-0}
last[15][4]=${last[15][3]:-0}
last[15][3]=${last[15][2]:-0}
last[15][2]=${last[15][1]:-0}
last[15][1]=${m15}
last[1][5]=${last[1][4]:-0}
last[1][4]=${last[1][3]:-0}
last[1][3]=${last[1][2]:-0}
last[1][2]=${last[1][1]:-0}
last[1][1]=${m1}
########
# vCPU #
########
# Store max for vCPU calculation and calculate new vCPU when new max is found
if [[ ${m5} -gt ${max5} ]]
then
max5=${m5}
# Highest 5 minute load, rounded up as per SMT#, to next number of logical processors. Divide by SMT# gives vCPUs
# Sadly, AIX and Linux ksh93 does arithmetic differently, so will have to split this up
vCPU=$(echo "scale=0;${max5} / ${smt} + 1" | bc -l)
[[ -n ${DBG} ]] && echo "vCPU: "${vCPU} "based on max5: "${max5}
fi
########
# eCPU #
########
# Now, sum up the last five five minute values. This is used to determine the busiest period.
sum5=$(( ${last[5][1]}+${last[5][2]}+${last[5][3]}+${last[5][4]}+${last[5][5]} ))
# And sum up all values, this is used for the actual calculation.
sum_all=$(( ${last[1][1]}+${last[1][2]}+${last[1][3]}+${last[1][4]}+${last[1][5]}+${last[5][1]}+${last[5][2]}+${last[5][3]}+${last[5]
[4]}+${last[5][5]}+${last[15][1]}+${last[15][2]}+${last[15][3]}+${last[15][4]}+${last[15][5]} ))
# Keep track of max. When we hit a new max, calculate new eCPU/vCPU pair
if [[ ${sum5} -gt ${sum5_avg_max} ]]
then
# Store max value
sum5_avg_max=${sum5}
# Rather than storing all values, just calculate eCPU straight away
eCPU_temp=$(echo "scale=2;${sum_all} / 15 / ${smt}" | bc -l)
# We don't want to discard previous higher values of eCPU
if [[ "${eCPU_temp}" -gt "${eCPU}" ]]
then
eCPU=${eCPU_temp}
else
[[ -n ${DBG} ]] && echo "eCPU calculation discarded"
fi
[[ -n ${DBG} ]] && echo "eCPU: "${eCPU}
fi
done </tmp/uptime.out
echo "Final calculation:"
echo "eCPU / vCPU: "${eCPU}" / "${vCPU}" - Ratio: "$(( ${vCPU} / ${eCPU} ))
exit 0