Skip to content

Commit 2e81304

Browse files
committed
Switch fio benchmark to use libaio and multiple fiojobs to measure IOPs
and throughput. There is an increase in disk space needed to run now, from 5G to 16G.
1 parent e9572e3 commit 2e81304

File tree

3 files changed

+208
-61
lines changed

3 files changed

+208
-61
lines changed

internal/report/table_defs.go

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -669,7 +669,7 @@ var tableDefinitions = map[string]TableDefinition{
669669
StorageBenchmarkTableName: {
670670
Name: StorageBenchmarkTableName,
671671
MenuLabel: StorageBenchmarkTableName,
672-
HasRows: false,
672+
HasRows: true,
673673
ScriptNames: []string{
674674
script.StorageBenchmarkScriptName,
675675
},
@@ -2440,15 +2440,50 @@ func numaBenchmarkTableValues(outputs map[string]script.ScriptOutput) []Field {
24402440
return fields
24412441
}
24422442

2443+
// formatOrEmpty formats a value and returns an empty string if the formatted value is "0".
2444+
func formatOrEmpty(format string, value any) string {
2445+
s := fmt.Sprintf(format, value)
2446+
if s == "0" {
2447+
return ""
2448+
}
2449+
return s
2450+
}
2451+
24432452
func storageBenchmarkTableValues(outputs map[string]script.ScriptOutput) []Field {
2444-
readBW, writeBW := storagePerfFromOutput(outputs)
2445-
if readBW == "" && writeBW == "" {
2453+
fioData, err := storagePerfFromOutput(outputs)
2454+
if err != nil {
2455+
slog.Error("failed to get storage benchmark data", slog.String("error", err.Error()))
24462456
return []Field{}
24472457
}
2448-
return []Field{
2449-
{Name: "Single-Thread Read Bandwidth", Values: []string{readBW}},
2450-
{Name: "Single-Thread Write Bandwidth", Values: []string{writeBW}},
2458+
2459+
if len(fioData.Jobs) == 0 {
2460+
return []Field{}
24512461
}
2462+
2463+
// Initialize the fields for metrics (column headers)
2464+
fields := []Field{
2465+
{Name: "Job"},
2466+
{Name: "Read Latency (us)"},
2467+
{Name: "Read IOPs"},
2468+
{Name: "Read Bandwidth (MiB/s)"},
2469+
{Name: "Write Latency (us)"},
2470+
{Name: "Write IOPs"},
2471+
{Name: "Write Bandwidth (MiB/s)"},
2472+
}
2473+
2474+
// For each FIO job, create a new row and populate its values
2475+
slog.Debug("fioData", slog.Any("jobs", fioData.Jobs))
2476+
for _, job := range fioData.Jobs {
2477+
fields[0].Values = append(fields[0].Values, job.Jobname)
2478+
fields[1].Values = append(fields[1].Values, formatOrEmpty("%.0f", job.Read.LatNs.Mean/1000))
2479+
fields[2].Values = append(fields[2].Values, formatOrEmpty("%.0f", job.Read.IopsMean))
2480+
fields[3].Values = append(fields[3].Values, formatOrEmpty("%d", job.Read.Bw/1024))
2481+
fields[4].Values = append(fields[4].Values, formatOrEmpty("%.0f", job.Write.LatNs.Mean/1000))
2482+
fields[5].Values = append(fields[5].Values, formatOrEmpty("%.0f", job.Write.IopsMean))
2483+
fields[6].Values = append(fields[6].Values, formatOrEmpty("%d", job.Write.Bw/1024))
2484+
}
2485+
2486+
return fields
24522487
}
24532488

24542489
// telemetry

internal/report/table_helpers_benchmarking.go

Lines changed: 109 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,100 @@ package report
44
// SPDX-License-Identifier: BSD-3-Clause
55

66
import (
7+
"encoding/json"
78
"fmt"
89
"log/slog"
910
"perfspect/internal/script"
1011
"perfspect/internal/util"
11-
"regexp"
1212
"strconv"
1313
"strings"
1414
)
1515

16+
// fioOutput is the top-level struct for the FIO JSON report.
17+
// ref: https://fio.readthedocs.io/en/latest/fio_doc.html#json-output
18+
type fioOutput struct {
19+
FioVersion string `json:"fio version"`
20+
Timestamp int64 `json:"timestamp"`
21+
TimestampMs int64 `json:"timestamp_ms"`
22+
Time string `json:"time"`
23+
Jobs []fioJob `json:"jobs"`
24+
}
25+
26+
// Job represents a single job's results within the FIO report.
27+
type fioJob struct {
28+
Jobname string `json:"jobname"`
29+
Groupid int `json:"groupid"`
30+
JobStart int64 `json:"job_start"`
31+
Error int `json:"error"`
32+
Eta int `json:"eta"`
33+
Elapsed int `json:"elapsed"`
34+
Read fioIOStats `json:"read"`
35+
Write fioIOStats `json:"write"`
36+
Trim fioIOStats `json:"trim"`
37+
JobRuntime int `json:"job_runtime"`
38+
UsrCPU float64 `json:"usr_cpu"`
39+
SysCPU float64 `json:"sys_cpu"`
40+
Ctx int `json:"ctx"`
41+
Majf int `json:"majf"`
42+
Minf int `json:"minf"`
43+
IodepthLevel map[string]float64 `json:"iodepth_level"`
44+
IodepthSubmit map[string]float64 `json:"iodepth_submit"`
45+
IodepthComplete map[string]float64 `json:"iodepth_complete"`
46+
LatencyNs map[string]float64 `json:"latency_ns"`
47+
LatencyUs map[string]float64 `json:"latency_us"`
48+
LatencyMs map[string]float64 `json:"latency_ms"`
49+
LatencyDepth int `json:"latency_depth"`
50+
LatencyTarget int `json:"latency_target"`
51+
LatencyPercentile float64 `json:"latency_percentile"`
52+
LatencyWindow int `json:"latency_window"`
53+
}
54+
55+
// IOStats holds the detailed I/O statistics for read, write, or trim operations.
56+
type fioIOStats struct {
57+
IoBytes int64 `json:"io_bytes"`
58+
IoKbytes int64 `json:"io_kbytes"`
59+
BwBytes int64 `json:"bw_bytes"`
60+
Bw int64 `json:"bw"`
61+
Iops float64 `json:"iops"`
62+
Runtime int `json:"runtime"`
63+
TotalIos int `json:"total_ios"`
64+
ShortIos int `json:"short_ios"`
65+
DropIos int `json:"drop_ios"`
66+
SlatNs fioLatencyStats `json:"slat_ns"`
67+
ClatNs fioLatencyStatsPercentiles `json:"clat_ns"`
68+
LatNs fioLatencyStats `json:"lat_ns"`
69+
BwMin int `json:"bw_min"`
70+
BwMax int `json:"bw_max"`
71+
BwAgg float64 `json:"bw_agg"`
72+
BwMean float64 `json:"bw_mean"`
73+
BwDev float64 `json:"bw_dev"`
74+
BwSamples int `json:"bw_samples"`
75+
IopsMin int `json:"iops_min"`
76+
IopsMax int `json:"iops_max"`
77+
IopsMean float64 `json:"iops_mean"`
78+
IopsStddev float64 `json:"iops_stddev"`
79+
IopsSamples int `json:"iops_samples"`
80+
}
81+
82+
// fioLatencyStats holds basic latency metrics.
83+
type fioLatencyStats struct {
84+
Min int64 `json:"min"`
85+
Max int64 `json:"max"`
86+
Mean float64 `json:"mean"`
87+
Stddev float64 `json:"stddev"`
88+
N int `json:"N"`
89+
}
90+
91+
// LatencyStatsPercentiles holds latency metrics including percentiles.
92+
type fioLatencyStatsPercentiles struct {
93+
Min int64 `json:"min"`
94+
Max int64 `json:"max"`
95+
Mean float64 `json:"mean"`
96+
Stddev float64 `json:"stddev"`
97+
N int `json:"N"`
98+
Percentile map[string]int64 `json:"percentile"`
99+
}
100+
16101
func cpuSpeedFromOutput(outputs map[string]script.ScriptOutput) string {
17102
var vals []float64
18103
for line := range strings.SplitSeq(strings.TrimSpace(outputs[script.SpeedBenchmarkScriptName].Stdout), "\n") {
@@ -35,27 +120,30 @@ func cpuSpeedFromOutput(outputs map[string]script.ScriptOutput) string {
35120
return fmt.Sprintf("%.0f", util.GeoMean(vals))
36121
}
37122

38-
func storagePerfFromOutput(outputs map[string]script.ScriptOutput) (readBW, writeBW string) {
39-
// fio output format:
40-
// READ: bw=140MiB/s (146MB/s), 140MiB/s-140MiB/s (146MB/s-146MB/s), io=16.4GiB (17.6GB), run=120004-120004msec
41-
// WRITE: bw=139MiB/s (146MB/s), 139MiB/s-139MiB/s (146MB/s-146MB/s), io=16.3GiB (17.5GB), run=120004-120004msec
42-
re := regexp.MustCompile(` bw=(\d+[.]?[\d]*\w+\/s)`)
43-
for line := range strings.SplitSeq(strings.TrimSpace(outputs[script.StorageBenchmarkScriptName].Stdout), "\n") {
44-
if strings.Contains(line, "READ: bw=") {
45-
matches := re.FindStringSubmatch(line)
46-
if len(matches) != 0 {
47-
readBW = matches[1]
48-
}
49-
} else if strings.Contains(line, "WRITE: bw=") {
50-
matches := re.FindStringSubmatch(line)
51-
if len(matches) != 0 {
52-
writeBW = matches[1]
53-
}
54-
} else if strings.Contains(line, "ERROR: ") {
55-
slog.Error("failed to run storage benchmark", slog.String("line", line))
56-
}
123+
func storagePerfFromOutput(outputs map[string]script.ScriptOutput) (fioOutput, error) {
124+
output := outputs[script.StorageBenchmarkScriptName].Stdout
125+
slog.Debug("storage benchmark output", slog.String("output", output))
126+
127+
i := strings.Index(output, "{\n \"fio version\"")
128+
if i >= 0 {
129+
output = output[i:]
130+
} else {
131+
return fioOutput{}, fmt.Errorf("unable to find fio output")
57132
}
58-
return
133+
if strings.Contains(output, "ERROR:") {
134+
return fioOutput{}, fmt.Errorf("failed to run storage benchmark: %s", output)
135+
}
136+
137+
slog.Debug("parsing storage benchmark output")
138+
var fioData fioOutput
139+
if err := json.Unmarshal([]byte(output), &fioData); err != nil {
140+
return fioOutput{}, fmt.Errorf("error unmarshalling JSON: %w", err)
141+
}
142+
if len(fioData.Jobs) == 0 {
143+
return fioOutput{}, fmt.Errorf("no jobs found in storage benchmark output")
144+
}
145+
146+
return fioData, nil
59147
}
60148

61149
// avxTurboFrequenciesFromOutput parses the output of avx-turbo and returns the turbo frequencies as a map of instruction type to frequencies

internal/script/script_defs.go

Lines changed: 58 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1159,41 +1159,65 @@ avx-turbo --min-threads=1 --max-threads=$num_cores_per_socket --test scalar_iadd
11591159
StorageBenchmarkScriptName: {
11601160
Name: StorageBenchmarkScriptName,
11611161
ScriptTemplate: `
1162-
numjobs=1
1163-
file_size_g=5
1164-
space_needed_k=$(( (file_size_g + 1) * 1024 * 1024 * numjobs )) # space needed in kilobytes: (file_size_g + 1) GB per job
1165-
ramp_time=5s
1166-
runtime=120s
1167-
ioengine=sync
1168-
# check if .StorageDir is a directory
1169-
if [[ ! -d "{{.StorageDir}}" ]]; then
1170-
echo "ERROR: {{.StorageDir}} does not exist"
1171-
exit 1
1172-
fi
1173-
# check if .StorageDir is writeable
1174-
if [[ ! -w "{{.StorageDir}}" ]]; then
1175-
echo "ERROR: {{.StorageDir}} is not writeable"
1176-
exit 1
1177-
fi
1178-
# check if .StorageDir has enough space
1179-
# example output for df -P /tmp:
1180-
# Filesystem 1024-blocks Used Available Capacity Mounted on
1181-
# /dev/sdd 1055762868 196668944 805390452 20% /
1182-
available_space=$(df -P "{{.StorageDir}}" | awk 'NR==2 {print $4}')
1183-
if [[ $available_space -lt $space_needed_k ]]; then
1184-
echo "ERROR: {{.StorageDir}} has ${available_space}K available space. A minimum of ${space_needed_k}K is required to run this benchmark."
1185-
exit 1
1186-
fi
1187-
# create temporary directory for fio test
11881162
test_dir=$(mktemp -d --tmpdir="{{.StorageDir}}")
1189-
sync
1190-
/sbin/sysctl -w vm.drop_caches=3 || true
1191-
# single-threaded read & write bandwidth test
1192-
fio --name=bandwidth --directory=$test_dir --numjobs=$numjobs \
1193-
--size="$file_size_g"G --time_based --runtime=$runtime --ramp_time=$ramp_time --ioengine=$ioengine \
1194-
--direct=1 --verify=0 --bs=1M --iodepth=64 --rw=rw \
1195-
--group_reporting=1 --iodepth_batch_submit=64 \
1196-
--iodepth_batch_complete_max=64
1163+
FIO_JOBFILE=$(mktemp $test_dir/fio-job-XXXXXX.fio)
1164+
1165+
cat > $FIO_JOBFILE <<EOF
1166+
[global]
1167+
ioengine=libaio
1168+
direct=1
1169+
size=5G
1170+
ramp_time=5s
1171+
time_based
1172+
create_on_open=1
1173+
unlink=1
1174+
directory=$test_dir
1175+
1176+
[iodepth_1_bs_4k_rand]
1177+
wait_for_previous
1178+
runtime=30s
1179+
rw=randrw
1180+
iodepth=1
1181+
blocksize=4k
1182+
iodepth_batch_submit=1
1183+
iodepth_batch_complete_max=1
1184+
1185+
[iodepth_256_bs_4k_rand]
1186+
wait_for_previous
1187+
runtime=30s
1188+
rw=randrw
1189+
iodepth=256
1190+
blocksize=4k
1191+
iodepth_batch_submit=256
1192+
iodepth_batch_complete_max=256
1193+
1194+
[iodepth_1_bs_1M_numjobs_16]
1195+
wait_for_previous
1196+
size=1G
1197+
runtime=30s
1198+
rw=readwrite
1199+
iodepth=1
1200+
iodepth_batch_submit=1
1201+
iodepth_batch_complete_max=1
1202+
blocksize=1M
1203+
numjobs=16
1204+
group_reporting=1
1205+
1206+
[iodepth_64_bs_1M_numjobs_16]
1207+
wait_for_previous
1208+
size=1G
1209+
runtime=30s
1210+
rw=readwrite
1211+
iodepth=64
1212+
iodepth_batch_submit=64
1213+
iodepth_batch_complete_max=64
1214+
blocksize=1M
1215+
numjobs=16
1216+
group_reporting=1
1217+
EOF
1218+
1219+
fio --output-format=json $FIO_JOBFILE
1220+
11971221
rm -rf $test_dir
11981222
`,
11991223
Superuser: true,

0 commit comments

Comments
 (0)