Merge branch 'main' into fiojob

harp-intel · harp-intel · commit f179c95d8fa1 · 2025-11-27T07:47:52.000-08:00
diff --git a/cmd/metrics/metadata.go b/cmd/metrics/metadata.go
@@ -72,6 +72,7 @@ type Metadata struct {
 	// below are not loaded by LoadMetadata, but are set by the caller (should these be here at all?)
 	CollectionStartTime time.Time
 	PerfSpectVersion    string
+	WithWorkload        bool // true if metrics were collected with a user-provided workload application
 }
 
 // LoadMetadata - populates and returns a Metadata structure containing state of the
diff --git a/cmd/metrics/metrics.go b/cmd/metrics/metrics.go
@@ -833,13 +833,29 @@ func processRawData(localOutputDir string) error {
 	printOutputFileNames([][]string{filesWritten})
 	return nil
 }
+
+func needsOutputDir(cmd *cobra.Command) bool {
+	return !flagLive && !flagPrometheusServer && !cmd.Flags().Changed("prometheus-server-addr") && !flagShowMetricNames
+}
+
 func runCmd(cmd *cobra.Command, args []string) error {
 	// appContext is the application context that holds common data and resources.
 	appContext := cmd.Parent().Context().Value(common.AppContext{}).(common.AppContext)
 	localTempDir := appContext.LocalTempDir
 	localOutputDir := appContext.OutputDir
 	// Setup signal manager for coordinated shutdown
 	signalMgr := newSignalManager()
+	// create output directory if needed
+	if needsOutputDir(cmd) {
+		err := util.CreateDirectoryIfNotExists(localOutputDir, 0755) // #nosec G301
+		if err != nil {
+			err = fmt.Errorf("failed to create output directory: %w", err)
+			fmt.Fprintf(os.Stderr, "Error: %v\n", err)
+			slog.Error(err.Error())
+			cmd.SilenceUsage = true
+			return err
+		}
+	}
 	// short circuit when --input flag is set
 	if flagInput != "" {
 		// skip data collection and use raw data for reports
@@ -1320,6 +1336,7 @@ func collectOnTarget(targetContext *targetContext, localTempDir string, localOut
 	printCompleteChannel := make(chan []string)
 	// get current time for use in setting timestamps on output
 	targetContext.metadata.CollectionStartTime = time.Now() // save the start time in the metadata for use when using the --input option to process raw data
+	targetContext.metadata.WithWorkload = len(argsApplication) > 0
 	go printMetricsAsync(targetContext, localOutputDir, frameChannel, printCompleteChannel)
 	var err error
 	for !signalMgr.shouldStop() {
diff --git a/cmd/metrics/summary.go b/cmd/metrics/summary.go
@@ -33,6 +33,10 @@ func summarizeMetrics(localOutputDir string, targetName string, metadata Metadat
 	if err != nil {
 		return filesCreated, fmt.Errorf("failed to read metrics from %s: %w", csvMetricsFile, err)
 	}
+	// exclude the final sample if metrics were collected with a workload
+	if metadata.WithWorkload {
+		metrics.excludeFinalSample()
+	}
 	// csv summary
 	out, err := metrics.getCSV()
 	if err != nil {
@@ -214,6 +218,40 @@ func newMetricCollection(csvPath string) (metrics MetricCollection, err error) {
 	return
 }
 
+// excludeFinalSample removes the final timestamp's rows from all metric groups.
+// This is used when collecting metrics with a workload to avoid including
+// post-workload data that can skew the summary statistics.
+func (mc MetricCollection) excludeFinalSample() {
+	if len(mc) == 0 {
+		return
+	}
+	// All metric groups should have the same number of rows since they come from the same CSV
+	// Check the first group to avoid redundant checking
+	if len(mc[0].rows) <= 1 {
+		// Don't exclude if there's only one sample or no samples
+		slog.Warn("metric collection has only one sample, not excluding final sample")
+		return
+	}
+	for i := range mc {
+		// Find the maximum timestamp in this group
+		maxTimestamp := mc[i].rows[0].timestamp
+		for _, row := range mc[i].rows {
+			if row.timestamp > maxTimestamp {
+				maxTimestamp = row.timestamp
+			}
+		}
+		// Remove all rows with the maximum timestamp
+		var filteredRows []row
+		for _, row := range mc[i].rows {
+			if row.timestamp != maxTimestamp {
+				filteredRows = append(filteredRows, row)
+			}
+		}
+		mc[i].rows = filteredRows
+	}
+	slog.Debug("excluded final sample from metric collection", slog.Int("num_groups", len(mc)))
+}
+
 // getStats - calculate summary stats (min, max, mean, stddev) for each metric
 func (mg *MetricGroup) getStats() (stats map[string]metricStats, err error) {
 	stats = make(map[string]metricStats)
diff --git a/cmd/metrics/summary_test.go b/cmd/metrics/summary_test.go
@@ -0,0 +1,143 @@
+package metrics
+
+// Copyright (C) 2021-2025 Intel Corporation
+// SPDX-License-Identifier: BSD-3-Clause
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestExcludeFinalSample(t *testing.T) {
+	tests := []struct {
+		name          string
+		inputRows     []row
+		expectedCount int
+		expectedMaxTS float64
+	}{
+		{
+			name: "exclude single final timestamp",
+			inputRows: []row{
+				{timestamp: 5.0, metrics: map[string]float64{"metric1": 100.0}},
+				{timestamp: 10.0, metrics: map[string]float64{"metric1": 200.0}},
+				{timestamp: 15.0, metrics: map[string]float64{"metric1": 150.0}},
+				{timestamp: 20.0, metrics: map[string]float64{"metric1": 50.0}}, // this should be excluded
+			},
+			expectedCount: 3,
+			expectedMaxTS: 15.0,
+		},
+		{
+			name: "exclude multiple rows with same final timestamp",
+			inputRows: []row{
+				{timestamp: 5.0, socket: "0", metrics: map[string]float64{"metric1": 100.0}},
+				{timestamp: 10.0, socket: "0", metrics: map[string]float64{"metric1": 200.0}},
+				{timestamp: 15.0, socket: "0", metrics: map[string]float64{"metric1": 150.0}},
+				{timestamp: 15.0, socket: "1", metrics: map[string]float64{"metric1": 160.0}}, // same timestamp, different socket
+			},
+			expectedCount: 2,
+			expectedMaxTS: 10.0,
+		},
+		{
+			name: "single sample - should not exclude",
+			inputRows: []row{
+				{timestamp: 5.0, metrics: map[string]float64{"metric1": 100.0}},
+			},
+			expectedCount: 1,
+			expectedMaxTS: 5.0,
+		},
+		{
+			name: "two samples - exclude last one",
+			inputRows: []row{
+				{timestamp: 5.0, metrics: map[string]float64{"metric1": 100.0}},
+				{timestamp: 10.0, metrics: map[string]float64{"metric1": 50.0}},
+			},
+			expectedCount: 1,
+			expectedMaxTS: 5.0,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Create a MetricCollection with a single MetricGroup
+			mc := MetricCollection{
+				MetricGroup{
+					names:        []string{"metric1"},
+					rows:         tt.inputRows,
+					groupByField: "",
+					groupByValue: "",
+				},
+			}
+
+			// Call excludeFinalSample
+			mc.excludeFinalSample()
+
+			// Verify the number of remaining rows
+			assert.Equal(t, tt.expectedCount, len(mc[0].rows), "unexpected number of rows after exclusion")
+
+			// Verify that no row has a timestamp greater than expectedMaxTS
+			if len(mc[0].rows) > 0 {
+				for _, row := range mc[0].rows {
+					assert.LessOrEqual(t, row.timestamp, tt.expectedMaxTS, "found row with timestamp greater than expected maximum")
+				}
+			}
+		})
+	}
+}
+
+func TestExcludeFinalSampleMultipleGroups(t *testing.T) {
+	// Test with multiple metric groups (e.g., multiple sockets)
+	mc := MetricCollection{
+		MetricGroup{
+			names:        []string{"metric1"},
+			groupByField: "SKT",
+			groupByValue: "0",
+			rows: []row{
+				{timestamp: 5.0, socket: "0", metrics: map[string]float64{"metric1": 100.0}},
+				{timestamp: 10.0, socket: "0", metrics: map[string]float64{"metric1": 200.0}},
+				{timestamp: 15.0, socket: "0", metrics: map[string]float64{"metric1": 50.0}}, // should be excluded
+			},
+		},
+		MetricGroup{
+			names:        []string{"metric1"},
+			groupByField: "SKT",
+			groupByValue: "1",
+			rows: []row{
+				{timestamp: 5.0, socket: "1", metrics: map[string]float64{"metric1": 110.0}},
+				{timestamp: 10.0, socket: "1", metrics: map[string]float64{"metric1": 210.0}},
+				{timestamp: 15.0, socket: "1", metrics: map[string]float64{"metric1": 60.0}}, // should be excluded
+			},
+		},
+	}
+
+	mc.excludeFinalSample()
+
+	// Both groups should have 2 rows remaining
+	assert.Equal(t, 2, len(mc[0].rows), "socket 0 should have 2 rows")
+	assert.Equal(t, 2, len(mc[1].rows), "socket 1 should have 2 rows")
+
+	// Verify max timestamps
+	assert.Equal(t, 10.0, mc[0].rows[1].timestamp, "socket 0 max timestamp should be 10.0")
+	assert.Equal(t, 10.0, mc[1].rows[1].timestamp, "socket 1 max timestamp should be 10.0")
+}
+
+func TestExcludeFinalSampleEmptyCollection(t *testing.T) {
+	// Test with empty MetricCollection
+	mc := MetricCollection{}
+	mc.excludeFinalSample() // should not panic
+	assert.Equal(t, 0, len(mc), "empty collection should remain empty")
+}
+
+func TestExcludeFinalSampleEmptyRows(t *testing.T) {
+	// Test with MetricGroup that has no rows
+	mc := MetricCollection{
+		MetricGroup{
+			names:        []string{"metric1"},
+			groupByField: "",
+			groupByValue: "",
+			rows:         []row{},
+		},
+	}
+	mc.excludeFinalSample() // should not panic
+	assert.Equal(t, 0, len(mc[0].rows), "empty rows should remain empty")
+}
diff --git a/cmd/root.go b/cmd/root.go
@@ -211,43 +211,6 @@ func initializeApplication(cmd *cobra.Command, args []string) error {
 	if gLogFile != nil {
 		logFilePath = gLogFile.Name()
 	}
-	// create the output directory now to fail fast if there are permission or disk space issues
-	// this validates write access before any data collection begins
-	// skip creating output directory for config command since it doesn't generate output files
-	// also skip for metrics command with --live, --prometheus-server, or --list flags since they don't write files
-	// also skip for update command since it doesn't generate output files
-	needsOutputDir := true
-	if cmd.Name() == "config" || cmd.Name() == "update" {
-		needsOutputDir = false
-	} else if cmd.Name() == "metrics" {
-		// check if --live flag is set
-		if liveFlag, err := cmd.Flags().GetBool("live"); err == nil && liveFlag {
-			needsOutputDir = false
-		}
-		// check if --prometheus-server flag is set
-		if prometheusFlag, err := cmd.Flags().GetBool("prometheus-server"); err == nil && prometheusFlag {
-			needsOutputDir = false
-		}
-		// check if --prometheus-server-addr flag has been changed (which implies prometheus server mode)
-		if cmd.Flags().Changed("prometheus-server-addr") {
-			needsOutputDir = false
-		}
-		// check if --list flag is set (just lists metrics and exits)
-		if listFlag, err := cmd.Flags().GetBool("list"); err == nil && listFlag {
-			needsOutputDir = false
-		}
-	}
-	if needsOutputDir {
-		created, err := createOutputDir(outputDir)
-		if err != nil {
-			slog.Error("failed to create output directory", slog.String("path", outputDir), slog.String("error", err.Error()))
-			fmt.Printf("Error: failed to create output directory: %v\n", err)
-			os.Exit(1)
-		}
-		if created {
-			slog.Debug("output directory created", slog.String("path", outputDir))
-		}
-	}
 	// set app context
 	cmd.Parent().SetContext(
 		context.WithValue(
@@ -350,30 +313,6 @@ func onIntelNetwork() bool {
 	return true
 }
 
-// createOutputDir creates the output directory if it does not exist.
-// Returns true if the directory was created, false if it already existed.
-func createOutputDir(outputDir string) (bool, error) {
-	// Check if directory already exists
-	info, err := os.Stat(outputDir)
-	if err == nil {
-		// Path exists, verify it's a directory
-		if !info.IsDir() {
-			return false, fmt.Errorf("output path exists but is not a directory: %s", outputDir)
-		}
-		return false, nil // Already exists
-	}
-	// If error is not "not exists", something else is wrong
-	if !os.IsNotExist(err) {
-		return false, fmt.Errorf("failed to check output directory: %w", err)
-	}
-	// Directory doesn't exist, create it
-	err = os.MkdirAll(outputDir, 0755) // #nosec G301
-	if err != nil {
-		return false, fmt.Errorf("failed to create output directory: %w", err)
-	}
-	return true, nil // Created successfully
-}
-
 func checkForUpdates(version string) (bool, manifest, error) {
 	latestManifest, err := getLatestManifest()
 	if err != nil {
diff --git a/internal/common/common.go b/internal/common/common.go
@@ -125,6 +125,15 @@ func (rc *ReportingCommand) Run() error {
 			slog.Error("error sending signal to children", slog.String("error", err.Error()))
 		}
 	}()
+	// create output directory
+	err := util.CreateDirectoryIfNotExists(outputDir, 0755) // #nosec G301
+	if err != nil {
+		err = fmt.Errorf("failed to create output directory: %w", err)
+		fmt.Fprintf(os.Stderr, "Error: %v\n", err)
+		slog.Error(err.Error())
+		rc.Cmd.SilenceUsage = true
+		return err
+	}
 
 	var orderedTargetScriptOutputs []TargetScriptOutputs
 	var myTargets []target.Target
@@ -206,7 +215,7 @@ func (rc *ReportingCommand) Run() error {
 	}
 	// create the raw report before processing the data, so that we can save the raw data even if there is an error while processing
 	var rawReports []string
-	rawReports, err := rc.createRawReports(appContext, orderedTargetScriptOutputs)
+	rawReports, err = rc.createRawReports(appContext, orderedTargetScriptOutputs)
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "Error: %v\n", err)
 		slog.Error(err.Error())
diff --git a/tools/build.Dockerfile b/tools/build.Dockerfile
@@ -75,7 +75,7 @@ RUN git clone https://github.com/madler/zlib.git zlib-aarch64 \
 RUN mkdir workdir
 ADD . /workdir
 WORKDIR /workdir
-RUN make tools
+RUN make tools -j$(nproc)
 RUN make oss-source
 
 FROM ubuntu:22.04 AS perf-builder

Original file line number	Diff line number	Diff line change
`@@ -72,6 +72,7 @@ type Metadata struct {`
`72`	`72`	`// below are not loaded by LoadMetadata, but are set by the caller (should these be here at all?)`
`73`	`73`	`CollectionStartTime time.Time`
`74`	`74`	`PerfSpectVersion string`
	`75`	`+ WithWorkload bool // true if metrics were collected with a user-provided workload application`
`75`	`76`	`}`
`76`	`77`
`77`	`78`	`// LoadMetadata - populates and returns a Metadata structure containing state of the`