A Launcher Script
#!/bin/bash
loops_to_do=10
loop=0
my_file=`mktemp -p . seprun.XXXXX` || exit 1
rm $my_file
echo "writing to file $my_file.txt"
while [ $loop -lt $loops_to_do ]
do
echo "$loop"
sep -start -c -d 1 -ec "CPU_CLK_UNHALTED.CORE_P:OS=no", "INST_RETIRED.ANY:OS=no" -out $my_file
loop=$(($loop + 1))
done
taskset
For testing,
taskset is a handy way to force affinity for a job. For example,
taskset -c 2 openssl speed rsa launches the test only on processor '2'
Script to parse SEP output
#!/usr/bin/python
import sys
import re
if len(sys.argv) < 2:
print "usage: %s (filename)" % sys.argv[0]
sys.exit(1)
my_file = sys.argv[1]
the_file = open(my_file)
all_clockticks = []
all_instr_retired = []
this_clockticks = []
this_instr_retired = []
processing = False
done = 0
for this_line in the_file.readlines():
# if not this_line:
# done = 1
if re.compile("^#").match(this_line):
if processing:
processing = False
all_clockticks.append(this_clockticks)
all_instr_retired.append(this_instr_retired)
this_clockticks = []
this_instr_retired = []
else:
if not processing:
processing = True
if re.compile("^INST_RETIRED").match(this_line):
i = this_line.split(",")[2]
if i == "0":
i = .0000001
this_instr_retired.append(float(i))
elif re.compile("^CPU_CLK_UNHALTED").match(this_line):
this_clockticks.append(float(this_line.split(",")[2]))
elif re.compile("[/s]+").match(this_line):
continue
else:
print "Encountered a non-clocktick/instruction retired event"
print this_line
sys.exit(1)
all_clockticks.append(this_clockticks)
all_instr_retired.append(this_instr_retired)
clockticks_total = 0
instr_retired_total = 0
clockticks_per_cpu = []
instr_retired_per_cpu = []
cpi_per_cpu = []
cpi_per_cpu_per_step = [] # one entry per cpu per step
cpi_per_step = []
# compute cpi for each step, both per-cpu and for the whole machine
for i in range(len(all_clockticks)):
temp = []
c_temp = float(0)
i_temp = float(0)
for j in range(len(all_clockticks[0])):
c_temp += all_clockticks[i][j]
i_temp += all_instr_retired[i][j]
temp.append(float(c_temp / i_temp))
cpi_per_cpu_per_step.append(temp)
cpi_per_step.append(float(c_temp/i_temp))
# initialize arrays, 'detecting' the # of processors
for i in range(len(all_clockticks[0])):
clockticks_per_cpu.append(float(0))
instr_retired_per_cpu.append(float(0))
cpi_per_cpu.append(float(0))
# aggregate the clockticks & instr retired for total run stats
for i in range(len(all_clockticks)):
for j in range(len(clockticks_per_cpu)):
clockticks_per_cpu[j] += all_clockticks[i][j]
clockticks_total += all_clockticks[i][j]
for i in range(len(all_instr_retired)):
for j in range(len(instr_retired_per_cpu)):
instr_retired_per_cpu[j] += all_instr_retired[i][j]
instr_retired_total += all_instr_retired[i][j]
# compute per-cpu total run CPI
for i in range(len(clockticks_per_cpu)):
cpi_per_cpu[i] = clockticks_per_cpu[i] / instr_retired_per_cpu[i]
print "cpi per cpu for each time step"
print cpi_per_cpu_per_step
print "\ncpi per time step, all CPUs"
print cpi_per_step
print "\noverall cpi per cpu"
print cpi_per_cpu
print "\noverall cpi, all CPUs = %f" % float(clockticks_total / instr_retired_total)
--
MattWalsh - 20 Jul 2006