Skip to content

Commit eb7f0bd

Browse files
committed
Add detailed failure attributes to exporter send_failed metrics
Signed-off-by: Israel Blancas <[email protected]>
1 parent a0cbea7 commit eb7f0bd

File tree

15 files changed

+877
-61
lines changed

15 files changed

+877
-61
lines changed

.chloggen/13956.yaml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: enhancement
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. receiver/otlp)
7+
component: all
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Add detailed failure attributes to exporter send_failed metrics at detailed telemetry level.
11+
12+
# One or more tracking issues or pull requests related to the change
13+
issues: [13956]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext: |-
19+
The `otelcol_exporter_send_failed_{spans,metric_points,log_records}` metrics now include
20+
failure attributes when telemetry level is Detailed: `error.type` (OpenTelemetry semantic convention
21+
describing the error class) and `failure.permanent` (indicates if error is permanent).
22+
The `error.type` attribute captures gRPC status codes (e.g., "Unavailable", "ResourceExhausted"),
23+
HTTP status codes (e.g., "404", "503"), standard Go errors (e.g., "Canceled", "DeadlineExceeded"),
24+
and collector-specific errors (e.g., "RetryExhausted", "Shutdown").
25+
This enables better alerting and debugging by providing standardized error classification.
26+
27+
# Optional: The change log or logs in which this entry should be included.
28+
# e.g. '[user]' or '[user, api]'
29+
# Include 'user' if the change is relevant to end users.
30+
# Include 'api' if there is a change to a library API.
31+
# Default: '[user]'
32+
change_logs: [user]

exporter/exporterhelper/documentation.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,23 +64,23 @@ Current size of the retry queue (in batches). [Alpha]
6464

6565
### otelcol_exporter_send_failed_log_records
6666

67-
Number of log records in failed attempts to send to destination. [Alpha]
67+
Number of log records in failed attempts to send to destination. At detailed telemetry level, includes attributes: error.type (semantic convention), failure.permanent. [Alpha]
6868

6969
| Unit | Metric Type | Value Type | Monotonic | Stability |
7070
| ---- | ----------- | ---------- | --------- | --------- |
7171
| {records} | Sum | Int | true | Alpha |
7272

7373
### otelcol_exporter_send_failed_metric_points
7474

75-
Number of metric points in failed attempts to send to destination. [Alpha]
75+
Number of metric points in failed attempts to send to destination. At detailed telemetry level, includes attributes: error.type (semantic convention), failure.permanent. [Alpha]
7676

7777
| Unit | Metric Type | Value Type | Monotonic | Stability |
7878
| ---- | ----------- | ---------- | --------- | --------- |
7979
| {datapoints} | Sum | Int | true | Alpha |
8080

8181
### otelcol_exporter_send_failed_spans
8282

83-
Number of spans in failed attempts to send to destination. [Alpha]
83+
Number of spans in failed attempts to send to destination. At detailed telemetry level, includes attributes: error.type (semantic convention), failure.permanent. [Alpha]
8484

8585
| Unit | Metric Type | Value Type | Monotonic | Stability |
8686
| ---- | ----------- | ---------- | --------- | --------- |

exporter/exporterhelper/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ require (
3333
go.uber.org/goleak v1.3.0
3434
go.uber.org/multierr v1.11.0
3535
go.uber.org/zap v1.27.1
36+
google.golang.org/grpc v1.77.0
3637
google.golang.org/protobuf v1.36.10
3738
)
3839

@@ -63,7 +64,6 @@ require (
6364
go.yaml.in/yaml/v3 v3.0.4 // indirect
6465
golang.org/x/sys v0.37.0 // indirect
6566
google.golang.org/genproto/googleapis/rpc v0.0.0-20251022142026-3a174f9686a8 // indirect
66-
google.golang.org/grpc v1.77.0 // indirect
6767
gopkg.in/yaml.v3 v3.0.1 // indirect
6868
)
6969

exporter/exporterhelper/internal/metadata/generated_telemetry.go

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

exporter/exporterhelper/internal/metadatatest/generated_telemetrytest.go

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

exporter/exporterhelper/internal/obs_report_sender.go

Lines changed: 81 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,21 @@ package internal // import "go.opentelemetry.io/collector/exporter/exporterhelpe
55

66
import (
77
"context"
8+
"errors"
9+
"strconv"
810

911
"go.opentelemetry.io/otel/attribute"
10-
"go.opentelemetry.io/otel/codes"
12+
otelcodes "go.opentelemetry.io/otel/codes"
1113
"go.opentelemetry.io/otel/metric"
14+
semconv "go.opentelemetry.io/otel/semconv/v1.37.0"
1215
"go.opentelemetry.io/otel/trace"
16+
"google.golang.org/grpc/codes"
17+
"google.golang.org/grpc/status"
1318

1419
"go.opentelemetry.io/collector/component"
20+
"go.opentelemetry.io/collector/consumer/consumererror"
1521
"go.opentelemetry.io/collector/exporter"
22+
"go.opentelemetry.io/collector/exporter/exporterhelper/internal/experr"
1623
"go.opentelemetry.io/collector/exporter/exporterhelper/internal/metadata"
1724
"go.opentelemetry.io/collector/exporter/exporterhelper/internal/queuebatch"
1825
"go.opentelemetry.io/collector/exporter/exporterhelper/internal/request"
@@ -21,7 +28,6 @@ import (
2128
)
2229

2330
const (
24-
// spanNameSep is duplicate between receiver and exporter.
2531
spanNameSep = "/"
2632

2733
// ExporterKey used to identify exporters in metrics and traces.
@@ -34,6 +40,9 @@ const (
3440
ItemsSent = "items.sent"
3541
// ItemsFailed used to track number of items that failed to be sent by exporters.
3642
ItemsFailed = "items.failed"
43+
44+
// FailurePermanentKey indicates whether the error is permanent (non-retryable).
45+
FailurePermanentKey = "failure.permanent"
3746
)
3847

3948
type obsReportSender[K request.Request] struct {
@@ -46,6 +55,7 @@ type obsReportSender[K request.Request] struct {
4655
metricAttr metric.MeasurementOption
4756
itemsSentInst metric.Int64Counter
4857
itemsFailedInst metric.Int64Counter
58+
exporterID string
4959
next sender.Sender[K]
5060
}
5161

@@ -63,6 +73,7 @@ func newObsReportSender[K request.Request](set exporter.Settings, signal pipelin
6373
tracer: metadata.Tracer(set.TelemetrySettings),
6474
spanAttrs: trace.WithAttributes(expAttr, attribute.String(DataTypeKey, signal.String())),
6575
metricAttr: metric.WithAttributeSet(attribute.NewSet(expAttr)),
76+
exporterID: idStr,
6677
next: next,
6778
}
6879

@@ -88,7 +99,6 @@ func (ors *obsReportSender[K]) Send(ctx context.Context, req K) error {
8899
// be modified by the downstream components like the batcher.
89100
c := ors.startOp(ctx)
90101
items := req.ItemsCount()
91-
// Forward the data to the next consumer (this pusher is the next).
92102
err := ors.next.Send(c, req)
93103
ors.endOp(c, items, err)
94104
return err
@@ -112,21 +122,22 @@ func (ors *obsReportSender[K]) endOp(ctx context.Context, numLogRecords int, err
112122
if ors.itemsSentInst != nil {
113123
ors.itemsSentInst.Add(ctx, numSent, ors.metricAttr)
114124
}
115-
// No metrics recorded for profiles.
116-
if ors.itemsFailedInst != nil {
117-
ors.itemsFailedInst.Add(ctx, numFailedToSend, ors.metricAttr)
125+
if ors.itemsFailedInst != nil && numFailedToSend > 0 {
126+
failedAttrs := extractFailureAttributes(err)
127+
baseAttrs := attribute.NewSet(attribute.String(ExporterKey, ors.exporterID))
128+
combinedAttrs := attribute.NewSet(append(baseAttrs.ToSlice(), failedAttrs.ToSlice()...)...)
129+
ors.itemsFailedInst.Add(ctx, numFailedToSend, metric.WithAttributeSet(combinedAttrs))
118130
}
119131

120132
span := trace.SpanFromContext(ctx)
121133
defer span.End()
122-
// End the span according to errors.
123134
if span.IsRecording() {
124135
span.SetAttributes(
125136
attribute.Int64(ItemsSent, numSent),
126137
attribute.Int64(ItemsFailed, numFailedToSend),
127138
)
128139
if err != nil {
129-
span.SetStatus(codes.Error, err.Error())
140+
span.SetStatus(otelcodes.Error, err.Error())
130141
}
131142
}
132143
}
@@ -137,3 +148,65 @@ func toNumItems(numExportedItems int, err error) (int64, int64) {
137148
}
138149
return int64(numExportedItems), 0
139150
}
151+
152+
func extractFailureAttributes(err error) attribute.Set {
153+
if err == nil {
154+
return attribute.NewSet()
155+
}
156+
157+
attrs := []attribute.KeyValue{}
158+
159+
errorType := determineErrorType(err)
160+
attrs = append(attrs, attribute.String(string(semconv.ErrorTypeKey), errorType))
161+
162+
isPermanent := consumererror.IsPermanent(err)
163+
attrs = append(attrs, attribute.Bool(FailurePermanentKey, isPermanent))
164+
165+
return attribute.NewSet(attrs...)
166+
}
167+
168+
func determineErrorType(err error) string {
169+
if err == nil {
170+
return ""
171+
}
172+
173+
if IsRetryExhaustedErr(err) {
174+
return "RetryExhausted"
175+
}
176+
177+
if experr.IsShutdownErr(err) {
178+
return "Shutdown"
179+
}
180+
181+
if errors.Is(err, context.Canceled) {
182+
return "Canceled"
183+
}
184+
185+
if errors.Is(err, context.DeadlineExceeded) {
186+
return "DeadlineExceeded"
187+
}
188+
189+
if st, ok := status.FromError(err); ok && st.Code() != codes.OK {
190+
return st.Code().String()
191+
}
192+
193+
if httpCode := extractHTTPStatusCode(err); httpCode > 0 {
194+
return strconv.Itoa(httpCode)
195+
}
196+
197+
return "Unknown"
198+
}
199+
200+
// extractHTTPStatusCode attempts to extract an HTTP status code from the error.
201+
func extractHTTPStatusCode(err error) int {
202+
type httpStatusCoder interface {
203+
HTTPStatusCode() int
204+
}
205+
206+
var statusCoder httpStatusCoder
207+
if errors.As(err, &statusCoder) {
208+
return statusCoder.HTTPStatusCode()
209+
}
210+
211+
return 0
212+
}

0 commit comments

Comments
 (0)