Skip to content

Commit dc3c3f2

Browse files
committed
fix: exclude final metric sample when running with workload
When collecting metrics with a workload (e.g., `perfspect metrics -- stress-ng`), the final sample may be collected after or during workload completion, capturing system transition from loaded to idle state. With short collections, this single anomalous sample can significantly skew summary statistics (mean, min, max, stddev). This change automatically excludes the final timestamp's samples from summary statistics when metrics are collected with a workload. The full CSV with all samples is still preserved for advanced analysis. Implementation: - Added `WithWorkload` field to `Metadata` struct to track workload context - Set `WithWorkload = true` when workload application arguments are provided - Added `excludeFinalSample()` method to remove final timestamp rows before computing summary statistics - Optimized to check first group only and log once per collection Fixes #569 Signed-off-by: Harper, Jason M <[email protected]>
1 parent bfdae41 commit dc3c3f2

File tree

4 files changed

+183
-0
lines changed

4 files changed

+183
-0
lines changed

cmd/metrics/metadata.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ type Metadata struct {
7272
// below are not loaded by LoadMetadata, but are set by the caller (should these be here at all?)
7373
CollectionStartTime time.Time
7474
PerfSpectVersion string
75+
WithWorkload bool // true if metrics were collected with a user-provided workload application
7576
}
7677

7778
// LoadMetadata - populates and returns a Metadata structure containing state of the

cmd/metrics/metrics.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1320,6 +1320,7 @@ func collectOnTarget(targetContext *targetContext, localTempDir string, localOut
13201320
printCompleteChannel := make(chan []string)
13211321
// get current time for use in setting timestamps on output
13221322
targetContext.metadata.CollectionStartTime = time.Now() // save the start time in the metadata for use when using the --input option to process raw data
1323+
targetContext.metadata.WithWorkload = len(argsApplication) > 0
13231324
go printMetricsAsync(targetContext, localOutputDir, frameChannel, printCompleteChannel)
13241325
var err error
13251326
for !signalMgr.shouldStop() {

cmd/metrics/summary.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ func summarizeMetrics(localOutputDir string, targetName string, metadata Metadat
3333
if err != nil {
3434
return filesCreated, fmt.Errorf("failed to read metrics from %s: %w", csvMetricsFile, err)
3535
}
36+
// exclude the final sample if metrics were collected with a workload
37+
if metadata.WithWorkload {
38+
metrics.excludeFinalSample()
39+
}
3640
// csv summary
3741
out, err := metrics.getCSV()
3842
if err != nil {
@@ -214,6 +218,40 @@ func newMetricCollection(csvPath string) (metrics MetricCollection, err error) {
214218
return
215219
}
216220

221+
// excludeFinalSample removes the final timestamp's rows from all metric groups.
222+
// This is used when collecting metrics with a workload to avoid including
223+
// post-workload data that can skew the summary statistics.
224+
func (mc MetricCollection) excludeFinalSample() {
225+
if len(mc) == 0 {
226+
return
227+
}
228+
// All metric groups should have the same number of rows since they come from the same CSV
229+
// Check the first group to avoid redundant checking
230+
if len(mc[0].rows) <= 1 {
231+
// Don't exclude if there's only one sample or no samples
232+
slog.Warn("metric collection has only one sample, not excluding final sample")
233+
return
234+
}
235+
for i := range mc {
236+
// Find the maximum timestamp in this group
237+
maxTimestamp := mc[i].rows[0].timestamp
238+
for _, row := range mc[i].rows {
239+
if row.timestamp > maxTimestamp {
240+
maxTimestamp = row.timestamp
241+
}
242+
}
243+
// Remove all rows with the maximum timestamp
244+
var filteredRows []row
245+
for _, row := range mc[i].rows {
246+
if row.timestamp != maxTimestamp {
247+
filteredRows = append(filteredRows, row)
248+
}
249+
}
250+
mc[i].rows = filteredRows
251+
}
252+
slog.Debug("excluded final sample from metric collection", slog.Int("num_groups", len(mc)))
253+
}
254+
217255
// getStats - calculate summary stats (min, max, mean, stddev) for each metric
218256
func (mg *MetricGroup) getStats() (stats map[string]metricStats, err error) {
219257
stats = make(map[string]metricStats)

cmd/metrics/summary_test.go

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
package metrics
2+
3+
// Copyright (C) 2021-2025 Intel Corporation
4+
// SPDX-License-Identifier: BSD-3-Clause
5+
6+
import (
7+
"testing"
8+
9+
"github.com/stretchr/testify/assert"
10+
)
11+
12+
func TestExcludeFinalSample(t *testing.T) {
13+
tests := []struct {
14+
name string
15+
inputRows []row
16+
expectedCount int
17+
expectedMaxTS float64
18+
}{
19+
{
20+
name: "exclude single final timestamp",
21+
inputRows: []row{
22+
{timestamp: 5.0, metrics: map[string]float64{"metric1": 100.0}},
23+
{timestamp: 10.0, metrics: map[string]float64{"metric1": 200.0}},
24+
{timestamp: 15.0, metrics: map[string]float64{"metric1": 150.0}},
25+
{timestamp: 20.0, metrics: map[string]float64{"metric1": 50.0}}, // this should be excluded
26+
},
27+
expectedCount: 3,
28+
expectedMaxTS: 15.0,
29+
},
30+
{
31+
name: "exclude multiple rows with same final timestamp",
32+
inputRows: []row{
33+
{timestamp: 5.0, socket: "0", metrics: map[string]float64{"metric1": 100.0}},
34+
{timestamp: 10.0, socket: "0", metrics: map[string]float64{"metric1": 200.0}},
35+
{timestamp: 15.0, socket: "0", metrics: map[string]float64{"metric1": 150.0}},
36+
{timestamp: 15.0, socket: "1", metrics: map[string]float64{"metric1": 160.0}}, // same timestamp, different socket
37+
},
38+
expectedCount: 2,
39+
expectedMaxTS: 10.0,
40+
},
41+
{
42+
name: "single sample - should not exclude",
43+
inputRows: []row{
44+
{timestamp: 5.0, metrics: map[string]float64{"metric1": 100.0}},
45+
},
46+
expectedCount: 1,
47+
expectedMaxTS: 5.0,
48+
},
49+
{
50+
name: "two samples - exclude last one",
51+
inputRows: []row{
52+
{timestamp: 5.0, metrics: map[string]float64{"metric1": 100.0}},
53+
{timestamp: 10.0, metrics: map[string]float64{"metric1": 50.0}},
54+
},
55+
expectedCount: 1,
56+
expectedMaxTS: 5.0,
57+
},
58+
}
59+
60+
for _, tt := range tests {
61+
t.Run(tt.name, func(t *testing.T) {
62+
// Create a MetricCollection with a single MetricGroup
63+
mc := MetricCollection{
64+
MetricGroup{
65+
names: []string{"metric1"},
66+
rows: tt.inputRows,
67+
groupByField: "",
68+
groupByValue: "",
69+
},
70+
}
71+
72+
// Call excludeFinalSample
73+
mc.excludeFinalSample()
74+
75+
// Verify the number of remaining rows
76+
assert.Equal(t, tt.expectedCount, len(mc[0].rows), "unexpected number of rows after exclusion")
77+
78+
// Verify that no row has a timestamp greater than expectedMaxTS
79+
if len(mc[0].rows) > 0 {
80+
for _, row := range mc[0].rows {
81+
assert.LessOrEqual(t, row.timestamp, tt.expectedMaxTS, "found row with timestamp greater than expected maximum")
82+
}
83+
}
84+
})
85+
}
86+
}
87+
88+
func TestExcludeFinalSampleMultipleGroups(t *testing.T) {
89+
// Test with multiple metric groups (e.g., multiple sockets)
90+
mc := MetricCollection{
91+
MetricGroup{
92+
names: []string{"metric1"},
93+
groupByField: "SKT",
94+
groupByValue: "0",
95+
rows: []row{
96+
{timestamp: 5.0, socket: "0", metrics: map[string]float64{"metric1": 100.0}},
97+
{timestamp: 10.0, socket: "0", metrics: map[string]float64{"metric1": 200.0}},
98+
{timestamp: 15.0, socket: "0", metrics: map[string]float64{"metric1": 50.0}}, // should be excluded
99+
},
100+
},
101+
MetricGroup{
102+
names: []string{"metric1"},
103+
groupByField: "SKT",
104+
groupByValue: "1",
105+
rows: []row{
106+
{timestamp: 5.0, socket: "1", metrics: map[string]float64{"metric1": 110.0}},
107+
{timestamp: 10.0, socket: "1", metrics: map[string]float64{"metric1": 210.0}},
108+
{timestamp: 15.0, socket: "1", metrics: map[string]float64{"metric1": 60.0}}, // should be excluded
109+
},
110+
},
111+
}
112+
113+
mc.excludeFinalSample()
114+
115+
// Both groups should have 2 rows remaining
116+
assert.Equal(t, 2, len(mc[0].rows), "socket 0 should have 2 rows")
117+
assert.Equal(t, 2, len(mc[1].rows), "socket 1 should have 2 rows")
118+
119+
// Verify max timestamps
120+
assert.Equal(t, 10.0, mc[0].rows[1].timestamp, "socket 0 max timestamp should be 10.0")
121+
assert.Equal(t, 10.0, mc[1].rows[1].timestamp, "socket 1 max timestamp should be 10.0")
122+
}
123+
124+
func TestExcludeFinalSampleEmptyCollection(t *testing.T) {
125+
// Test with empty MetricCollection
126+
mc := MetricCollection{}
127+
mc.excludeFinalSample() // should not panic
128+
assert.Equal(t, 0, len(mc), "empty collection should remain empty")
129+
}
130+
131+
func TestExcludeFinalSampleEmptyRows(t *testing.T) {
132+
// Test with MetricGroup that has no rows
133+
mc := MetricCollection{
134+
MetricGroup{
135+
names: []string{"metric1"},
136+
groupByField: "",
137+
groupByValue: "",
138+
rows: []row{},
139+
},
140+
}
141+
mc.excludeFinalSample() // should not panic
142+
assert.Equal(t, 0, len(mc[0].rows), "empty rows should remain empty")
143+
}

0 commit comments

Comments
 (0)