From 2e813044a55dae8e5227d04a955725380ad10059 Mon Sep 17 00:00:00 2001 From: Edwin Chiu Date: Mon, 24 Nov 2025 17:32:08 -0500 Subject: [PATCH 1/6] Switch fio benchmark to use libaio and multiple fiojobs to measure IOPs and throughput. There is an increase in disk space needed to run now, from 5G to 16G. --- internal/report/table_defs.go | 47 ++++++- internal/report/table_helpers_benchmarking.go | 130 +++++++++++++++--- internal/script/script_defs.go | 92 ++++++++----- 3 files changed, 208 insertions(+), 61 deletions(-) diff --git a/internal/report/table_defs.go b/internal/report/table_defs.go index eace7fe4..853096b7 100644 --- a/internal/report/table_defs.go +++ b/internal/report/table_defs.go @@ -669,7 +669,7 @@ var tableDefinitions = map[string]TableDefinition{ StorageBenchmarkTableName: { Name: StorageBenchmarkTableName, MenuLabel: StorageBenchmarkTableName, - HasRows: false, + HasRows: true, ScriptNames: []string{ script.StorageBenchmarkScriptName, }, @@ -2440,15 +2440,50 @@ func numaBenchmarkTableValues(outputs map[string]script.ScriptOutput) []Field { return fields } +// formatOrEmpty formats a value and returns an empty string if the formatted value is "0". +func formatOrEmpty(format string, value any) string { + s := fmt.Sprintf(format, value) + if s == "0" { + return "" + } + return s +} + func storageBenchmarkTableValues(outputs map[string]script.ScriptOutput) []Field { - readBW, writeBW := storagePerfFromOutput(outputs) - if readBW == "" && writeBW == "" { + fioData, err := storagePerfFromOutput(outputs) + if err != nil { + slog.Error("failed to get storage benchmark data", slog.String("error", err.Error())) return []Field{} } - return []Field{ - {Name: "Single-Thread Read Bandwidth", Values: []string{readBW}}, - {Name: "Single-Thread Write Bandwidth", Values: []string{writeBW}}, + + if len(fioData.Jobs) == 0 { + return []Field{} } + + // Initialize the fields for metrics (column headers) + fields := []Field{ + {Name: "Job"}, + {Name: "Read Latency (us)"}, + {Name: "Read IOPs"}, + {Name: "Read Bandwidth (MiB/s)"}, + {Name: "Write Latency (us)"}, + {Name: "Write IOPs"}, + {Name: "Write Bandwidth (MiB/s)"}, + } + + // For each FIO job, create a new row and populate its values + slog.Debug("fioData", slog.Any("jobs", fioData.Jobs)) + for _, job := range fioData.Jobs { + fields[0].Values = append(fields[0].Values, job.Jobname) + fields[1].Values = append(fields[1].Values, formatOrEmpty("%.0f", job.Read.LatNs.Mean/1000)) + fields[2].Values = append(fields[2].Values, formatOrEmpty("%.0f", job.Read.IopsMean)) + fields[3].Values = append(fields[3].Values, formatOrEmpty("%d", job.Read.Bw/1024)) + fields[4].Values = append(fields[4].Values, formatOrEmpty("%.0f", job.Write.LatNs.Mean/1000)) + fields[5].Values = append(fields[5].Values, formatOrEmpty("%.0f", job.Write.IopsMean)) + fields[6].Values = append(fields[6].Values, formatOrEmpty("%d", job.Write.Bw/1024)) + } + + return fields } // telemetry diff --git a/internal/report/table_helpers_benchmarking.go b/internal/report/table_helpers_benchmarking.go index 01fe9c22..2a17fc35 100644 --- a/internal/report/table_helpers_benchmarking.go +++ b/internal/report/table_helpers_benchmarking.go @@ -4,15 +4,100 @@ package report // SPDX-License-Identifier: BSD-3-Clause import ( + "encoding/json" "fmt" "log/slog" "perfspect/internal/script" "perfspect/internal/util" - "regexp" "strconv" "strings" ) +// fioOutput is the top-level struct for the FIO JSON report. +// ref: https://fio.readthedocs.io/en/latest/fio_doc.html#json-output +type fioOutput struct { + FioVersion string `json:"fio version"` + Timestamp int64 `json:"timestamp"` + TimestampMs int64 `json:"timestamp_ms"` + Time string `json:"time"` + Jobs []fioJob `json:"jobs"` +} + +// Job represents a single job's results within the FIO report. +type fioJob struct { + Jobname string `json:"jobname"` + Groupid int `json:"groupid"` + JobStart int64 `json:"job_start"` + Error int `json:"error"` + Eta int `json:"eta"` + Elapsed int `json:"elapsed"` + Read fioIOStats `json:"read"` + Write fioIOStats `json:"write"` + Trim fioIOStats `json:"trim"` + JobRuntime int `json:"job_runtime"` + UsrCPU float64 `json:"usr_cpu"` + SysCPU float64 `json:"sys_cpu"` + Ctx int `json:"ctx"` + Majf int `json:"majf"` + Minf int `json:"minf"` + IodepthLevel map[string]float64 `json:"iodepth_level"` + IodepthSubmit map[string]float64 `json:"iodepth_submit"` + IodepthComplete map[string]float64 `json:"iodepth_complete"` + LatencyNs map[string]float64 `json:"latency_ns"` + LatencyUs map[string]float64 `json:"latency_us"` + LatencyMs map[string]float64 `json:"latency_ms"` + LatencyDepth int `json:"latency_depth"` + LatencyTarget int `json:"latency_target"` + LatencyPercentile float64 `json:"latency_percentile"` + LatencyWindow int `json:"latency_window"` +} + +// IOStats holds the detailed I/O statistics for read, write, or trim operations. +type fioIOStats struct { + IoBytes int64 `json:"io_bytes"` + IoKbytes int64 `json:"io_kbytes"` + BwBytes int64 `json:"bw_bytes"` + Bw int64 `json:"bw"` + Iops float64 `json:"iops"` + Runtime int `json:"runtime"` + TotalIos int `json:"total_ios"` + ShortIos int `json:"short_ios"` + DropIos int `json:"drop_ios"` + SlatNs fioLatencyStats `json:"slat_ns"` + ClatNs fioLatencyStatsPercentiles `json:"clat_ns"` + LatNs fioLatencyStats `json:"lat_ns"` + BwMin int `json:"bw_min"` + BwMax int `json:"bw_max"` + BwAgg float64 `json:"bw_agg"` + BwMean float64 `json:"bw_mean"` + BwDev float64 `json:"bw_dev"` + BwSamples int `json:"bw_samples"` + IopsMin int `json:"iops_min"` + IopsMax int `json:"iops_max"` + IopsMean float64 `json:"iops_mean"` + IopsStddev float64 `json:"iops_stddev"` + IopsSamples int `json:"iops_samples"` +} + +// fioLatencyStats holds basic latency metrics. +type fioLatencyStats struct { + Min int64 `json:"min"` + Max int64 `json:"max"` + Mean float64 `json:"mean"` + Stddev float64 `json:"stddev"` + N int `json:"N"` +} + +// LatencyStatsPercentiles holds latency metrics including percentiles. +type fioLatencyStatsPercentiles struct { + Min int64 `json:"min"` + Max int64 `json:"max"` + Mean float64 `json:"mean"` + Stddev float64 `json:"stddev"` + N int `json:"N"` + Percentile map[string]int64 `json:"percentile"` +} + func cpuSpeedFromOutput(outputs map[string]script.ScriptOutput) string { var vals []float64 for line := range strings.SplitSeq(strings.TrimSpace(outputs[script.SpeedBenchmarkScriptName].Stdout), "\n") { @@ -35,27 +120,30 @@ func cpuSpeedFromOutput(outputs map[string]script.ScriptOutput) string { return fmt.Sprintf("%.0f", util.GeoMean(vals)) } -func storagePerfFromOutput(outputs map[string]script.ScriptOutput) (readBW, writeBW string) { - // fio output format: - // READ: bw=140MiB/s (146MB/s), 140MiB/s-140MiB/s (146MB/s-146MB/s), io=16.4GiB (17.6GB), run=120004-120004msec - // WRITE: bw=139MiB/s (146MB/s), 139MiB/s-139MiB/s (146MB/s-146MB/s), io=16.3GiB (17.5GB), run=120004-120004msec - re := regexp.MustCompile(` bw=(\d+[.]?[\d]*\w+\/s)`) - for line := range strings.SplitSeq(strings.TrimSpace(outputs[script.StorageBenchmarkScriptName].Stdout), "\n") { - if strings.Contains(line, "READ: bw=") { - matches := re.FindStringSubmatch(line) - if len(matches) != 0 { - readBW = matches[1] - } - } else if strings.Contains(line, "WRITE: bw=") { - matches := re.FindStringSubmatch(line) - if len(matches) != 0 { - writeBW = matches[1] - } - } else if strings.Contains(line, "ERROR: ") { - slog.Error("failed to run storage benchmark", slog.String("line", line)) - } +func storagePerfFromOutput(outputs map[string]script.ScriptOutput) (fioOutput, error) { + output := outputs[script.StorageBenchmarkScriptName].Stdout + slog.Debug("storage benchmark output", slog.String("output", output)) + + i := strings.Index(output, "{\n \"fio version\"") + if i >= 0 { + output = output[i:] + } else { + return fioOutput{}, fmt.Errorf("unable to find fio output") } - return + if strings.Contains(output, "ERROR:") { + return fioOutput{}, fmt.Errorf("failed to run storage benchmark: %s", output) + } + + slog.Debug("parsing storage benchmark output") + var fioData fioOutput + if err := json.Unmarshal([]byte(output), &fioData); err != nil { + return fioOutput{}, fmt.Errorf("error unmarshalling JSON: %w", err) + } + if len(fioData.Jobs) == 0 { + return fioOutput{}, fmt.Errorf("no jobs found in storage benchmark output") + } + + return fioData, nil } // avxTurboFrequenciesFromOutput parses the output of avx-turbo and returns the turbo frequencies as a map of instruction type to frequencies diff --git a/internal/script/script_defs.go b/internal/script/script_defs.go index 21b0c94f..343aa789 100644 --- a/internal/script/script_defs.go +++ b/internal/script/script_defs.go @@ -1159,41 +1159,65 @@ avx-turbo --min-threads=1 --max-threads=$num_cores_per_socket --test scalar_iadd StorageBenchmarkScriptName: { Name: StorageBenchmarkScriptName, ScriptTemplate: ` -numjobs=1 -file_size_g=5 -space_needed_k=$(( (file_size_g + 1) * 1024 * 1024 * numjobs )) # space needed in kilobytes: (file_size_g + 1) GB per job -ramp_time=5s -runtime=120s -ioengine=sync -# check if .StorageDir is a directory -if [[ ! -d "{{.StorageDir}}" ]]; then - echo "ERROR: {{.StorageDir}} does not exist" - exit 1 -fi -# check if .StorageDir is writeable -if [[ ! -w "{{.StorageDir}}" ]]; then - echo "ERROR: {{.StorageDir}} is not writeable" - exit 1 -fi -# check if .StorageDir has enough space -# example output for df -P /tmp: -# Filesystem 1024-blocks Used Available Capacity Mounted on -# /dev/sdd 1055762868 196668944 805390452 20% / -available_space=$(df -P "{{.StorageDir}}" | awk 'NR==2 {print $4}') -if [[ $available_space -lt $space_needed_k ]]; then - echo "ERROR: {{.StorageDir}} has ${available_space}K available space. A minimum of ${space_needed_k}K is required to run this benchmark." - exit 1 -fi -# create temporary directory for fio test test_dir=$(mktemp -d --tmpdir="{{.StorageDir}}") -sync -/sbin/sysctl -w vm.drop_caches=3 || true -# single-threaded read & write bandwidth test -fio --name=bandwidth --directory=$test_dir --numjobs=$numjobs \ ---size="$file_size_g"G --time_based --runtime=$runtime --ramp_time=$ramp_time --ioengine=$ioengine \ ---direct=1 --verify=0 --bs=1M --iodepth=64 --rw=rw \ ---group_reporting=1 --iodepth_batch_submit=64 \ ---iodepth_batch_complete_max=64 +FIO_JOBFILE=$(mktemp $test_dir/fio-job-XXXXXX.fio) + +cat > $FIO_JOBFILE < Date: Tue, 25 Nov 2025 15:11:21 -0500 Subject: [PATCH 2/6] Add in libaio-aarch64 and update fio-aarch64 to support libaio on arm --- tools/Makefile | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tools/Makefile b/tools/Makefile index 731928e9..d93ac34e 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -151,6 +151,16 @@ ifeq ("$(wildcard ethtool-aarch64/Makefile)","") endif cd ethtool-aarch64 && make -j$(NPROC) +LIBAIO_VERSION := libaio-0.3.113 +libaio-aarch64: +ifeq ("$(wildcard libaio-aarch64)","") + git clone https://pagure.io/libaio libaio-aarch64 +else + cd libaio-aarch64 && git checkout master && git pull +endif + cd libaio-aarch64 && git checkout $(LIBAIO_VERSION) + cd libaio-aarch64 && CC=aarch64-linux-gnu-gcc make -j$(NPROC) + FIO_VERSION := "fio-3.38" fio: ifeq ("$(wildcard fio)","") @@ -164,15 +174,16 @@ ifeq ("$(wildcard fio/config.log)","") endif cd fio && make -j$(NPROC) -fio-aarch64: +# fio-aarch64 needs de3d5e68dd017a6d6099913b7831bb94f46e49cc or newer to support "--extra-ldflags" from commit 6fbe3284d +fio-aarch64: libaio-aarch64 ifeq ("$(wildcard fio-aarch64)","") git clone https://github.com/axboe/fio.git fio-aarch64 else cd fio-aarch64 && git checkout master && git pull endif - cd fio-aarch64 && git checkout $(FIO_VERSION) + cd fio-aarch64 && git checkout de3d5e68dd017a6d6099913b7831bb94f46e49cc ifeq ("$(wildcard fio-aarch64/config.log)","") - cd fio-aarch64 && ./configure --build-static --disable-native --cc=aarch64-linux-gnu-gcc + cd fio-aarch64 && ./configure --build-static --disable-native --cc=aarch64-linux-gnu-gcc --extra-cflags="-I../libaio-aarch64/src" --extra-ldflags="-L../libaio-aarch64/src" endif cd fio-aarch64 && make -j$(NPROC) From 5923923355f56344b63e99f0563e3dcbab5a06a0 Mon Sep 17 00:00:00 2001 From: Edwin Chiu Date: Wed, 26 Nov 2025 11:30:05 -0500 Subject: [PATCH 3/6] remove overly verbose/long debug of fio raw output --- internal/report/table_helpers_benchmarking.go | 1 - 1 file changed, 1 deletion(-) diff --git a/internal/report/table_helpers_benchmarking.go b/internal/report/table_helpers_benchmarking.go index 2a17fc35..fe706e7b 100644 --- a/internal/report/table_helpers_benchmarking.go +++ b/internal/report/table_helpers_benchmarking.go @@ -122,7 +122,6 @@ func cpuSpeedFromOutput(outputs map[string]script.ScriptOutput) string { func storagePerfFromOutput(outputs map[string]script.ScriptOutput) (fioOutput, error) { output := outputs[script.StorageBenchmarkScriptName].Stdout - slog.Debug("storage benchmark output", slog.String("output", output)) i := strings.Index(output, "{\n \"fio version\"") if i >= 0 { From e094f2c7a5822c43be17d34affbedc8707c60b5a Mon Sep 17 00:00:00 2001 From: Edwin Chiu Date: Wed, 26 Nov 2025 15:29:19 -0500 Subject: [PATCH 4/6] check for storage benchmark errors earlier to catch conditions like insufficient diskspace log partial storage benchmark output if unable to find fio output --- internal/report/table_helpers_benchmarking.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/internal/report/table_helpers_benchmarking.go b/internal/report/table_helpers_benchmarking.go index fe706e7b..3e4b54d6 100644 --- a/internal/report/table_helpers_benchmarking.go +++ b/internal/report/table_helpers_benchmarking.go @@ -123,15 +123,20 @@ func cpuSpeedFromOutput(outputs map[string]script.ScriptOutput) string { func storagePerfFromOutput(outputs map[string]script.ScriptOutput) (fioOutput, error) { output := outputs[script.StorageBenchmarkScriptName].Stdout + if strings.Contains(output, "ERROR:") { + return fioOutput{}, fmt.Errorf("failed to run storage benchmark: %s", output) + } i := strings.Index(output, "{\n \"fio version\"") if i >= 0 { output = output[i:] } else { + outputLen := len(output) + if outputLen > 100 { + outputLen = 100 + } + slog.Info("fio output snip", "output", output[:outputLen], "stderr", outputs[script.StorageBenchmarkScriptName].Stderr) return fioOutput{}, fmt.Errorf("unable to find fio output") } - if strings.Contains(output, "ERROR:") { - return fioOutput{}, fmt.Errorf("failed to run storage benchmark: %s", output) - } slog.Debug("parsing storage benchmark output") var fioData fioOutput From eace6685048ae41628df845df8ed9d6952cf2404 Mon Sep 17 00:00:00 2001 From: Edwin Chiu Date: Wed, 26 Nov 2025 15:31:45 -0500 Subject: [PATCH 5/6] Add back in diskspace check in storage benchmark, space based on bandwidth test which uses the most diskspace Add back in sync and dropping vm cache --- internal/script/script_defs.go | 49 ++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/internal/script/script_defs.go b/internal/script/script_defs.go index 343aa789..73a49f7d 100644 --- a/internal/script/script_defs.go +++ b/internal/script/script_defs.go @@ -1160,8 +1160,35 @@ avx-turbo --min-threads=1 --max-threads=$num_cores_per_socket --test scalar_iadd Name: StorageBenchmarkScriptName, ScriptTemplate: ` test_dir=$(mktemp -d --tmpdir="{{.StorageDir}}") -FIO_JOBFILE=$(mktemp $test_dir/fio-job-XXXXXX.fio) +numjobs_bw=16 +file_size_bw_g=1 +space_needed_bw_k=$(( (file_size_bw_g + 1) * 1024 * 1024 * numjobs_bw )) # space needed in kilobytes: (file_size_bw_g + 1) GB per job +runtime=30s +# check if .StorageDir is a directory +if [[ ! -d "{{.StorageDir}}" ]]; then + echo "ERROR: {{.StorageDir}} does not exist" + exit 1 +fi +# check if .StorageDir is writeable +if [[ ! -w "{{.StorageDir}}" ]]; then + echo "ERROR: {{.StorageDir}} is not writeable" + exit 1 +fi +# check if .StorageDir has enough space +# example output for df -P /tmp: +# Filesystem 1024-blocks Used Available Capacity Mounted on +# /dev/sdd 1055762868 196668944 805390452 20% / +available_space=$(df -P "{{.StorageDir}}" | awk 'NR==2 {print $4}') +if [[ $available_space -lt $space_needed_bw_k ]]; then + echo "ERROR: {{.StorageDir}} has ${available_space}K available space. A minimum of ${space_needed_bw_k}K is required to run the IO bandwidth benchmark job." + exit 1 +fi + +sync +/sbin/sysctl -w vm.drop_caches=3 || true + +FIO_JOBFILE=$(mktemp $test_dir/fio-job-XXXXXX.fio) cat > $FIO_JOBFILE < Date: Thu, 27 Nov 2025 07:48:37 -0800 Subject: [PATCH 6/6] modernize --- internal/report/table_helpers_benchmarking.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/internal/report/table_helpers_benchmarking.go b/internal/report/table_helpers_benchmarking.go index 3e4b54d6..41fa303a 100644 --- a/internal/report/table_helpers_benchmarking.go +++ b/internal/report/table_helpers_benchmarking.go @@ -130,10 +130,7 @@ func storagePerfFromOutput(outputs map[string]script.ScriptOutput) (fioOutput, e if i >= 0 { output = output[i:] } else { - outputLen := len(output) - if outputLen > 100 { - outputLen = 100 - } + outputLen := min(len(output), 100) slog.Info("fio output snip", "output", output[:outputLen], "stderr", outputs[script.StorageBenchmarkScriptName].Stderr) return fioOutput{}, fmt.Errorf("unable to find fio output") }