Skip to content
47 changes: 41 additions & 6 deletions internal/report/table_defs.go
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,7 @@ var tableDefinitions = map[string]TableDefinition{
StorageBenchmarkTableName: {
Name: StorageBenchmarkTableName,
MenuLabel: StorageBenchmarkTableName,
HasRows: false,
HasRows: true,
ScriptNames: []string{
script.StorageBenchmarkScriptName,
},
Expand Down Expand Up @@ -2440,15 +2440,50 @@ func numaBenchmarkTableValues(outputs map[string]script.ScriptOutput) []Field {
return fields
}

// formatOrEmpty formats a value and returns an empty string if the formatted value is "0".
func formatOrEmpty(format string, value any) string {
s := fmt.Sprintf(format, value)
if s == "0" {
return ""
}
return s
}

func storageBenchmarkTableValues(outputs map[string]script.ScriptOutput) []Field {
readBW, writeBW := storagePerfFromOutput(outputs)
if readBW == "" && writeBW == "" {
fioData, err := storagePerfFromOutput(outputs)
if err != nil {
slog.Error("failed to get storage benchmark data", slog.String("error", err.Error()))
return []Field{}
}
return []Field{
{Name: "Single-Thread Read Bandwidth", Values: []string{readBW}},
{Name: "Single-Thread Write Bandwidth", Values: []string{writeBW}},

if len(fioData.Jobs) == 0 {
return []Field{}
}

// Initialize the fields for metrics (column headers)
fields := []Field{
{Name: "Job"},
{Name: "Read Latency (us)"},
{Name: "Read IOPs"},
{Name: "Read Bandwidth (MiB/s)"},
{Name: "Write Latency (us)"},
{Name: "Write IOPs"},
{Name: "Write Bandwidth (MiB/s)"},
}

// For each FIO job, create a new row and populate its values
slog.Debug("fioData", slog.Any("jobs", fioData.Jobs))
for _, job := range fioData.Jobs {
fields[0].Values = append(fields[0].Values, job.Jobname)
fields[1].Values = append(fields[1].Values, formatOrEmpty("%.0f", job.Read.LatNs.Mean/1000))
fields[2].Values = append(fields[2].Values, formatOrEmpty("%.0f", job.Read.IopsMean))
fields[3].Values = append(fields[3].Values, formatOrEmpty("%d", job.Read.Bw/1024))
fields[4].Values = append(fields[4].Values, formatOrEmpty("%.0f", job.Write.LatNs.Mean/1000))
fields[5].Values = append(fields[5].Values, formatOrEmpty("%.0f", job.Write.IopsMean))
fields[6].Values = append(fields[6].Values, formatOrEmpty("%d", job.Write.Bw/1024))
}

return fields
}

// telemetry
Expand Down
131 changes: 110 additions & 21 deletions internal/report/table_helpers_benchmarking.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,100 @@ package report
// SPDX-License-Identifier: BSD-3-Clause

import (
"encoding/json"
"fmt"
"log/slog"
"perfspect/internal/script"
"perfspect/internal/util"
"regexp"
"strconv"
"strings"
)

// fioOutput is the top-level struct for the FIO JSON report.
// ref: https://fio.readthedocs.io/en/latest/fio_doc.html#json-output
type fioOutput struct {
FioVersion string `json:"fio version"`
Timestamp int64 `json:"timestamp"`
TimestampMs int64 `json:"timestamp_ms"`
Time string `json:"time"`
Jobs []fioJob `json:"jobs"`
}

// Job represents a single job's results within the FIO report.
type fioJob struct {
Jobname string `json:"jobname"`
Groupid int `json:"groupid"`
JobStart int64 `json:"job_start"`
Error int `json:"error"`
Eta int `json:"eta"`
Elapsed int `json:"elapsed"`
Read fioIOStats `json:"read"`
Write fioIOStats `json:"write"`
Trim fioIOStats `json:"trim"`
JobRuntime int `json:"job_runtime"`
UsrCPU float64 `json:"usr_cpu"`
SysCPU float64 `json:"sys_cpu"`
Ctx int `json:"ctx"`
Majf int `json:"majf"`
Minf int `json:"minf"`
IodepthLevel map[string]float64 `json:"iodepth_level"`
IodepthSubmit map[string]float64 `json:"iodepth_submit"`
IodepthComplete map[string]float64 `json:"iodepth_complete"`
LatencyNs map[string]float64 `json:"latency_ns"`
LatencyUs map[string]float64 `json:"latency_us"`
LatencyMs map[string]float64 `json:"latency_ms"`
LatencyDepth int `json:"latency_depth"`
LatencyTarget int `json:"latency_target"`
LatencyPercentile float64 `json:"latency_percentile"`
LatencyWindow int `json:"latency_window"`
}

// IOStats holds the detailed I/O statistics for read, write, or trim operations.
type fioIOStats struct {
IoBytes int64 `json:"io_bytes"`
IoKbytes int64 `json:"io_kbytes"`
BwBytes int64 `json:"bw_bytes"`
Bw int64 `json:"bw"`
Iops float64 `json:"iops"`
Runtime int `json:"runtime"`
TotalIos int `json:"total_ios"`
ShortIos int `json:"short_ios"`
DropIos int `json:"drop_ios"`
SlatNs fioLatencyStats `json:"slat_ns"`
ClatNs fioLatencyStatsPercentiles `json:"clat_ns"`
LatNs fioLatencyStats `json:"lat_ns"`
BwMin int `json:"bw_min"`
BwMax int `json:"bw_max"`
BwAgg float64 `json:"bw_agg"`
BwMean float64 `json:"bw_mean"`
BwDev float64 `json:"bw_dev"`
BwSamples int `json:"bw_samples"`
IopsMin int `json:"iops_min"`
IopsMax int `json:"iops_max"`
IopsMean float64 `json:"iops_mean"`
IopsStddev float64 `json:"iops_stddev"`
IopsSamples int `json:"iops_samples"`
}

// fioLatencyStats holds basic latency metrics.
type fioLatencyStats struct {
Min int64 `json:"min"`
Max int64 `json:"max"`
Mean float64 `json:"mean"`
Stddev float64 `json:"stddev"`
N int `json:"N"`
}

// LatencyStatsPercentiles holds latency metrics including percentiles.
type fioLatencyStatsPercentiles struct {
Min int64 `json:"min"`
Max int64 `json:"max"`
Mean float64 `json:"mean"`
Stddev float64 `json:"stddev"`
N int `json:"N"`
Percentile map[string]int64 `json:"percentile"`
}

func cpuSpeedFromOutput(outputs map[string]script.ScriptOutput) string {
var vals []float64
for line := range strings.SplitSeq(strings.TrimSpace(outputs[script.SpeedBenchmarkScriptName].Stdout), "\n") {
Expand All @@ -35,27 +120,31 @@ func cpuSpeedFromOutput(outputs map[string]script.ScriptOutput) string {
return fmt.Sprintf("%.0f", util.GeoMean(vals))
}

func storagePerfFromOutput(outputs map[string]script.ScriptOutput) (readBW, writeBW string) {
// fio output format:
// READ: bw=140MiB/s (146MB/s), 140MiB/s-140MiB/s (146MB/s-146MB/s), io=16.4GiB (17.6GB), run=120004-120004msec
// WRITE: bw=139MiB/s (146MB/s), 139MiB/s-139MiB/s (146MB/s-146MB/s), io=16.3GiB (17.5GB), run=120004-120004msec
re := regexp.MustCompile(` bw=(\d+[.]?[\d]*\w+\/s)`)
for line := range strings.SplitSeq(strings.TrimSpace(outputs[script.StorageBenchmarkScriptName].Stdout), "\n") {
if strings.Contains(line, "READ: bw=") {
matches := re.FindStringSubmatch(line)
if len(matches) != 0 {
readBW = matches[1]
}
} else if strings.Contains(line, "WRITE: bw=") {
matches := re.FindStringSubmatch(line)
if len(matches) != 0 {
writeBW = matches[1]
}
} else if strings.Contains(line, "ERROR: ") {
slog.Error("failed to run storage benchmark", slog.String("line", line))
}
func storagePerfFromOutput(outputs map[string]script.ScriptOutput) (fioOutput, error) {
output := outputs[script.StorageBenchmarkScriptName].Stdout

if strings.Contains(output, "ERROR:") {
return fioOutput{}, fmt.Errorf("failed to run storage benchmark: %s", output)
}
return
i := strings.Index(output, "{\n \"fio version\"")
if i >= 0 {
output = output[i:]
} else {
outputLen := min(len(output), 100)
slog.Info("fio output snip", "output", output[:outputLen], "stderr", outputs[script.StorageBenchmarkScriptName].Stderr)
return fioOutput{}, fmt.Errorf("unable to find fio output")
}

slog.Debug("parsing storage benchmark output")
var fioData fioOutput
if err := json.Unmarshal([]byte(output), &fioData); err != nil {
return fioOutput{}, fmt.Errorf("error unmarshalling JSON: %w", err)
}
if len(fioData.Jobs) == 0 {
return fioOutput{}, fmt.Errorf("no jobs found in storage benchmark output")
}

return fioData, nil
}

// avxTurboFrequenciesFromOutput parses the output of avx-turbo and returns the turbo frequencies as a map of instruction type to frequencies
Expand Down
93 changes: 72 additions & 21 deletions internal/script/script_defs.go
Original file line number Diff line number Diff line change
Expand Up @@ -1159,41 +1159,92 @@ avx-turbo --min-threads=1 --max-threads=$num_cores_per_socket --test scalar_iadd
StorageBenchmarkScriptName: {
Name: StorageBenchmarkScriptName,
ScriptTemplate: `
numjobs=1
file_size_g=5
space_needed_k=$(( (file_size_g + 1) * 1024 * 1024 * numjobs )) # space needed in kilobytes: (file_size_g + 1) GB per job
ramp_time=5s
runtime=120s
ioengine=sync
test_dir=$(mktemp -d --tmpdir="{{.StorageDir}}")
numjobs_bw=16
file_size_bw_g=1
space_needed_bw_k=$(( (file_size_bw_g + 1) * 1024 * 1024 * numjobs_bw )) # space needed in kilobytes: (file_size_bw_g + 1) GB per job
runtime=30s

# check if .StorageDir is a directory
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The checks for existing, writeable directory with enough space to run the benchmark were added because these issues were run into by users in the past. Without these, diagnosis of failure to run was difficult.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in eace668

if [[ ! -d "{{.StorageDir}}" ]]; then
echo "ERROR: {{.StorageDir}} does not exist"
exit 1
echo "ERROR: {{.StorageDir}} does not exist"
exit 1
fi
# check if .StorageDir is writeable
if [[ ! -w "{{.StorageDir}}" ]]; then
echo "ERROR: {{.StorageDir}} is not writeable"
exit 1
echo "ERROR: {{.StorageDir}} is not writeable"
exit 1
fi
# check if .StorageDir has enough space
# example output for df -P /tmp:
# Filesystem 1024-blocks Used Available Capacity Mounted on
# /dev/sdd 1055762868 196668944 805390452 20% /
available_space=$(df -P "{{.StorageDir}}" | awk 'NR==2 {print $4}')
if [[ $available_space -lt $space_needed_k ]]; then
echo "ERROR: {{.StorageDir}} has ${available_space}K available space. A minimum of ${space_needed_k}K is required to run this benchmark."
exit 1
if [[ $available_space -lt $space_needed_bw_k ]]; then
echo "ERROR: {{.StorageDir}} has ${available_space}K available space. A minimum of ${space_needed_bw_k}K is required to run the IO bandwidth benchmark job."
exit 1
fi
# create temporary directory for fio test
test_dir=$(mktemp -d --tmpdir="{{.StorageDir}}")

sync
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sync and dropping caches isn't strictly necessary, but are good practice when running storage benchmarks

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Put back in eace668

/sbin/sysctl -w vm.drop_caches=3 || true
# single-threaded read & write bandwidth test
fio --name=bandwidth --directory=$test_dir --numjobs=$numjobs \
--size="$file_size_g"G --time_based --runtime=$runtime --ramp_time=$ramp_time --ioengine=$ioengine \
--direct=1 --verify=0 --bs=1M --iodepth=64 --rw=rw \
--group_reporting=1 --iodepth_batch_submit=64 \
--iodepth_batch_complete_max=64

FIO_JOBFILE=$(mktemp $test_dir/fio-job-XXXXXX.fio)
cat > $FIO_JOBFILE <<EOF
[global]
ioengine=libaio
direct=1
size=5G
ramp_time=5s
time_based
create_on_open=1
unlink=1
directory=$test_dir

[iodepth_1_bs_4k_rand]
wait_for_previous
runtime=${runtime}
rw=randrw
iodepth=1
blocksize=4k
iodepth_batch_submit=1
iodepth_batch_complete_max=1

[iodepth_256_bs_4k_rand]
wait_for_previous
runtime=${runtime}
rw=randrw
iodepth=256
blocksize=4k
iodepth_batch_submit=256
iodepth_batch_complete_max=256

[iodepth_1_bs_1M_numjobs_${numjobs_bw}]
wait_for_previous
size=${file_size_bw_g}G
runtime=${runtime}
rw=readwrite
iodepth=1
iodepth_batch_submit=1
iodepth_batch_complete_max=1
blocksize=1M
numjobs=$numjobs_bw
group_reporting=1

[iodepth_64_bs_1M_numjobs_${numjobs_bw}]
wait_for_previous
size=${file_size_bw_g}G
runtime=${runtime}
rw=readwrite
iodepth=64
iodepth_batch_submit=64
iodepth_batch_complete_max=64
blocksize=1M
numjobs=$numjobs_bw
group_reporting=1
EOF

fio --output-format=json $FIO_JOBFILE

rm -rf $test_dir
`,
Superuser: true,
Expand Down
17 changes: 14 additions & 3 deletions tools/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,16 @@ ifeq ("$(wildcard ethtool-aarch64/Makefile)","")
endif
cd ethtool-aarch64 && make -j$(NPROC)

LIBAIO_VERSION := libaio-0.3.113
libaio-aarch64:
ifeq ("$(wildcard libaio-aarch64)","")
git clone https://pagure.io/libaio libaio-aarch64
else
cd libaio-aarch64 && git checkout master && git pull
endif
cd libaio-aarch64 && git checkout $(LIBAIO_VERSION)
cd libaio-aarch64 && CC=aarch64-linux-gnu-gcc make -j$(NPROC)

FIO_VERSION := "fio-3.38"
fio:
ifeq ("$(wildcard fio)","")
Expand All @@ -164,15 +174,16 @@ ifeq ("$(wildcard fio/config.log)","")
endif
cd fio && make -j$(NPROC)

fio-aarch64:
# fio-aarch64 needs de3d5e68dd017a6d6099913b7831bb94f46e49cc or newer to support "--extra-ldflags" from commit 6fbe3284d
fio-aarch64: libaio-aarch64
ifeq ("$(wildcard fio-aarch64)","")
git clone https://github.com/axboe/fio.git fio-aarch64
else
cd fio-aarch64 && git checkout master && git pull
endif
cd fio-aarch64 && git checkout $(FIO_VERSION)
cd fio-aarch64 && git checkout de3d5e68dd017a6d6099913b7831bb94f46e49cc
ifeq ("$(wildcard fio-aarch64/config.log)","")
cd fio-aarch64 && ./configure --build-static --disable-native --cc=aarch64-linux-gnu-gcc
cd fio-aarch64 && ./configure --build-static --disable-native --cc=aarch64-linux-gnu-gcc --extra-cflags="-I../libaio-aarch64/src" --extra-ldflags="-L../libaio-aarch64/src"
endif
cd fio-aarch64 && make -j$(NPROC)

Expand Down