Skip to content

Commit 5655113

Browse files
committed
[Instrumentation.Hangfire] Add metrics support
Implements OpenTelemetry workflow semantic conventions for Hangfire metrics. Fixes #2075
1 parent 9d8ba21 commit 5655113

23 files changed

+1931
-39
lines changed

src/OpenTelemetry.Instrumentation.Hangfire/.publicApi/PublicAPI.Unshipped.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
OpenTelemetry.Metrics.MeterProviderBuilderExtensions
12
OpenTelemetry.Trace.HangfireInstrumentationOptions
23
OpenTelemetry.Trace.HangfireInstrumentationOptions.DisplayNameFunc.get -> System.Func<Hangfire.BackgroundJob!, string!>!
34
OpenTelemetry.Trace.HangfireInstrumentationOptions.DisplayNameFunc.set -> void
@@ -6,7 +7,12 @@ OpenTelemetry.Trace.HangfireInstrumentationOptions.Filter.set -> void
67
OpenTelemetry.Trace.HangfireInstrumentationOptions.HangfireInstrumentationOptions() -> void
78
OpenTelemetry.Trace.HangfireInstrumentationOptions.RecordException.get -> bool
89
OpenTelemetry.Trace.HangfireInstrumentationOptions.RecordException.set -> void
10+
OpenTelemetry.Trace.HangfireInstrumentationOptions.RecordQueueLatency.get -> bool
11+
OpenTelemetry.Trace.HangfireInstrumentationOptions.RecordQueueLatency.set -> void
912
OpenTelemetry.Trace.TracerProviderBuilderExtensions
13+
static OpenTelemetry.Metrics.MeterProviderBuilderExtensions.AddHangfireInstrumentation(this OpenTelemetry.Metrics.MeterProviderBuilder! builder) -> OpenTelemetry.Metrics.MeterProviderBuilder!
14+
static OpenTelemetry.Metrics.MeterProviderBuilderExtensions.AddHangfireInstrumentation(this OpenTelemetry.Metrics.MeterProviderBuilder! builder, string? name, System.Action<OpenTelemetry.Trace.HangfireInstrumentationOptions!>? configure) -> OpenTelemetry.Metrics.MeterProviderBuilder!
15+
static OpenTelemetry.Metrics.MeterProviderBuilderExtensions.AddHangfireInstrumentation(this OpenTelemetry.Metrics.MeterProviderBuilder! builder, System.Action<OpenTelemetry.Trace.HangfireInstrumentationOptions!>? configure) -> OpenTelemetry.Metrics.MeterProviderBuilder!
1016
static OpenTelemetry.Trace.TracerProviderBuilderExtensions.AddHangfireInstrumentation(this OpenTelemetry.Trace.TracerProviderBuilder! builder) -> OpenTelemetry.Trace.TracerProviderBuilder!
1117
static OpenTelemetry.Trace.TracerProviderBuilderExtensions.AddHangfireInstrumentation(this OpenTelemetry.Trace.TracerProviderBuilder! builder, string? name, System.Action<OpenTelemetry.Trace.HangfireInstrumentationOptions!>? configure) -> OpenTelemetry.Trace.TracerProviderBuilder!
1218
static OpenTelemetry.Trace.TracerProviderBuilderExtensions.AddHangfireInstrumentation(this OpenTelemetry.Trace.TracerProviderBuilder! builder, System.Action<OpenTelemetry.Trace.HangfireInstrumentationOptions!>? configure) -> OpenTelemetry.Trace.TracerProviderBuilder!

src/OpenTelemetry.Instrumentation.Hangfire/HangfireInstrumentationOptions.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,14 @@ public class HangfireInstrumentationOptions
4545
/// </list>
4646
/// </remarks>
4747
public Func<BackgroundJob, bool>? Filter { get; set; }
48+
49+
/// <summary>
50+
/// Gets or sets a value indicating whether to record queue latency metrics.
51+
/// </summary>
52+
/// <remarks>
53+
/// When enabled, records the time jobs spend waiting in the queue before execution.
54+
/// This requires an additional database call per job execution to retrieve the enqueue timestamp.
55+
/// Default is <see langword="false"/> to avoid performance impact in high-throughput scenarios.
56+
/// </remarks>
57+
public bool RecordQueueLatency { get; set; }
4858
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
using System.Diagnostics.Metrics;
5+
using System.Reflection;
6+
using OpenTelemetry.Internal;
7+
8+
namespace OpenTelemetry.Instrumentation.Hangfire.Implementation;
9+
10+
/// <summary>
11+
/// Centralized metrics definitions for Hangfire instrumentation.
12+
/// </summary>
13+
internal static class HangfireMetrics
14+
{
15+
private static readonly Assembly Assembly = typeof(HangfireMetrics).Assembly;
16+
private static readonly AssemblyName AssemblyName = Assembly.GetName();
17+
internal static readonly string MeterName = AssemblyName.Name!;
18+
private static readonly string InstrumentationVersion = Assembly.GetPackageVersion();
19+
20+
// Metric name constants
21+
internal const string ExecutionCountMetricName = "workflow.execution.count";
22+
internal const string ExecutionDurationMetricName = "workflow.execution.duration";
23+
internal const string ExecutionStatusMetricName = "workflow.execution.status";
24+
internal const string ExecutionErrorsMetricName = "workflow.execution.errors";
25+
26+
internal const string QueueLatencyMetricName = "hangfire.queue.latency";
27+
28+
/// <summary>
29+
/// The meter instance for all Hangfire metrics.
30+
/// </summary>
31+
public static readonly Meter Meter = new(MeterName, InstrumentationVersion);
32+
33+
/// <summary>
34+
/// Counter for the number of task executions which have been initiated.
35+
/// Follows OpenTelemetry workflow semantic conventions.
36+
/// </summary>
37+
public static readonly Counter<long> ExecutionCount =
38+
Meter.CreateCounter<long>(ExecutionCountMetricName, unit: "{executions}",
39+
description: "The number of task executions which have been initiated.");
40+
41+
/// <summary>
42+
/// Histogram for duration of an execution grouped by task, type and result.
43+
/// Follows OpenTelemetry workflow semantic conventions.
44+
/// </summary>
45+
public static readonly Histogram<double> ExecutionDuration =
46+
Meter.CreateHistogram<double>(ExecutionDurationMetricName, unit: "s",
47+
description: "Duration of an execution grouped by task, type and result.");
48+
49+
/// <summary>
50+
/// UpDownCounter for the number of actively running tasks grouped by task and state.
51+
/// Follows OpenTelemetry workflow semantic conventions.
52+
/// </summary>
53+
public static readonly UpDownCounter<long> ExecutionStatus =
54+
Meter.CreateUpDownCounter<long>(ExecutionStatusMetricName, unit: "{executions}",
55+
description: "The number of actively running tasks grouped by task, type and the current state.");
56+
57+
/// <summary>
58+
/// Counter for the number of errors encountered in task runs (eg. compile, test failures).
59+
/// Follows OpenTelemetry workflow semantic conventions.
60+
/// </summary>
61+
public static readonly Counter<long> ExecutionErrors =
62+
Meter.CreateCounter<long>(ExecutionErrorsMetricName, unit: "{error}",
63+
description: "The number of errors encountered in task runs (eg. compile, test failures).");
64+
65+
/// <summary>
66+
/// Histogram for time tasks spend waiting in queue before execution.
67+
/// Hangfire-specific metric (not part of standard workflow conventions).
68+
/// </summary>
69+
public static readonly Histogram<double> QueueLatency =
70+
Meter.CreateHistogram<double>(QueueLatencyMetricName, unit: "s",
71+
description: "Time tasks spend waiting in queue before execution starts.");
72+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
using Hangfire.Common;
5+
using Hangfire.Server;
6+
7+
namespace OpenTelemetry.Instrumentation.Hangfire.Implementation;
8+
9+
/// <summary>
10+
/// Hangfire filter that records OpenTelemetry error metrics for job execution failures.
11+
/// Follows OpenTelemetry workflow semantic conventions for workflow.execution.errors metric.
12+
/// </summary>
13+
internal sealed class HangfireMetricsErrorFilterAttribute : JobFilterAttribute, IServerFilter
14+
{
15+
public void OnPerforming(PerformingContext performingContext)
16+
{
17+
}
18+
19+
public void OnPerformed(PerformedContext performedContext)
20+
{
21+
if (performedContext.Exception != null)
22+
{
23+
var errorTags = HangfireTagBuilder.BuildErrorTags(
24+
performedContext.BackgroundJob.Job,
25+
performedContext.Exception);
26+
27+
HangfireMetrics.ExecutionErrors.Add(1, errorTags);
28+
}
29+
}
30+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
using Hangfire;
5+
using OpenTelemetry.Trace;
6+
7+
namespace OpenTelemetry.Instrumentation.Hangfire.Implementation;
8+
9+
/// <summary>
10+
/// Hangfire metrics instrumentation following OpenTelemetry workflow semantic conventions.
11+
/// </summary>
12+
internal sealed class HangfireMetricsInstrumentation : IDisposable
13+
{
14+
private readonly List<object> filters = new();
15+
16+
public HangfireMetricsInstrumentation(HangfireInstrumentationOptions options)
17+
{
18+
this.AddFilter(new HangfireMetricsJobFilterAttribute());
19+
this.AddFilter(new HangfireMetricsStateFilter());
20+
this.AddFilter(new HangfireMetricsErrorFilterAttribute());
21+
22+
// Only register queue latency filter if enabled (requires DB call per job)
23+
if (options.RecordQueueLatency)
24+
{
25+
this.AddFilter(new HangfireQueueLatencyFilterAttribute());
26+
}
27+
}
28+
29+
public void Dispose()
30+
{
31+
foreach (var filter in this.filters)
32+
{
33+
GlobalJobFilters.Filters.Remove(filter);
34+
}
35+
}
36+
37+
private void AddFilter(object filter)
38+
{
39+
this.filters.Add(filter);
40+
GlobalJobFilters.Filters.Add(filter);
41+
}
42+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
using System.Diagnostics;
5+
using Hangfire.Common;
6+
using Hangfire.Server;
7+
8+
namespace OpenTelemetry.Instrumentation.Hangfire.Implementation;
9+
10+
/// <summary>
11+
/// Hangfire filter that records OpenTelemetry metrics for job execution.
12+
/// </summary>
13+
internal sealed class HangfireMetricsJobFilterAttribute : JobFilterAttribute, IServerFilter
14+
{
15+
private const string StopwatchKey = "OpenTelemetry.Metrics.Stopwatch";
16+
17+
public void OnPerforming(PerformingContext performingContext)
18+
{
19+
performingContext.Items[StopwatchKey] = Stopwatch.StartNew();
20+
}
21+
22+
public void OnPerformed(PerformedContext performedContext)
23+
{
24+
var executionTags = HangfireTagBuilder.BuildExecutionTags(performedContext.BackgroundJob.Job, performedContext.Exception);
25+
26+
HangfireMetrics.ExecutionCount.Add(1, executionTags);
27+
28+
if (performedContext.Items.TryGetValue(StopwatchKey, out var stopwatchObj) && stopwatchObj is Stopwatch stopwatch)
29+
{
30+
stopwatch.Stop();
31+
var duration = stopwatch.Elapsed.TotalSeconds;
32+
33+
HangfireMetrics.ExecutionDuration.Record(duration, executionTags);
34+
}
35+
}
36+
}
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
using Hangfire.Common;
5+
using Hangfire.States;
6+
using Hangfire.Storage;
7+
8+
namespace OpenTelemetry.Instrumentation.Hangfire.Implementation;
9+
10+
/// <summary>
11+
/// Hangfire state change filter responsible for emitting workflow execution status metrics.
12+
/// </summary>
13+
internal sealed class HangfireMetricsStateFilter : JobFilterAttribute, IApplyStateFilter
14+
{
15+
public void OnStateApplied(ApplyStateContext context, IWriteOnlyTransaction transaction)
16+
{
17+
var workflowState = HangfireTagBuilder.MapWorkflowState(context.NewState.Name);
18+
if (workflowState == null)
19+
{
20+
return;
21+
}
22+
23+
var errorType = GetErrorTypeFromNewState(context.NewState);
24+
var recurringJobId = GetRecurringJobId(context);
25+
var tags = HangfireTagBuilder.BuildStateTags(
26+
context.BackgroundJob.Job,
27+
workflowState,
28+
errorType,
29+
recurringJobId);
30+
31+
HangfireMetrics.ExecutionStatus.Add(1, tags);
32+
}
33+
34+
public void OnStateUnapplied(ApplyStateContext context, IWriteOnlyTransaction transaction)
35+
{
36+
var workflowState = HangfireTagBuilder.MapWorkflowState(context.OldStateName);
37+
if (workflowState == null)
38+
{
39+
return;
40+
}
41+
42+
var errorType = GetErrorTypeFromOldState(context);
43+
var recurringJobId = GetRecurringJobId(context);
44+
var tags = HangfireTagBuilder.BuildStateTags(
45+
context.BackgroundJob.Job,
46+
workflowState,
47+
errorType,
48+
recurringJobId);
49+
50+
HangfireMetrics.ExecutionStatus.Add(-1, tags);
51+
}
52+
53+
private static string? GetErrorTypeFromNewState(IState state)
54+
{
55+
if (!string.Equals(state.Name, FailedState.StateName, StringComparison.Ordinal))
56+
{
57+
return null;
58+
}
59+
60+
if (state is FailedState failedState && failedState.Exception != null)
61+
{
62+
var exceptionType = failedState.Exception.GetType();
63+
return exceptionType.FullName ?? exceptionType.Name;
64+
}
65+
66+
return TryGetExceptionTypeFromSerializedData(state.SerializeData());
67+
}
68+
69+
private static string? GetErrorTypeFromOldState(ApplyStateContext context)
70+
{
71+
if (!string.Equals(context.OldStateName, FailedState.StateName, StringComparison.Ordinal))
72+
{
73+
return null;
74+
}
75+
76+
StateData? stateData = context.Connection.GetStateData(context.BackgroundJob.Id);
77+
return stateData != null ? TryGetExceptionTypeFromSerializedData(stateData.Data) : null;
78+
}
79+
80+
private static string? TryGetExceptionTypeFromSerializedData(IDictionary<string, string>? data)
81+
{
82+
if (data == null)
83+
{
84+
return null;
85+
}
86+
87+
if (data.TryGetValue("ExceptionType", out var exceptionType) && !string.IsNullOrWhiteSpace(exceptionType))
88+
{
89+
return exceptionType;
90+
}
91+
92+
return null;
93+
}
94+
95+
private static string? GetRecurringJobId(ApplyStateContext context)
96+
{
97+
try
98+
{
99+
return context.Connection.GetJobParameter(context.BackgroundJob.Id, "RecurringJobId");
100+
}
101+
catch
102+
{
103+
// Parameter doesn't exist or couldn't be retrieved
104+
return null;
105+
}
106+
}
107+
}
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
using Hangfire.Common;
5+
using Hangfire.Server;
6+
using Hangfire.States;
7+
using DateTime = System.DateTime;
8+
9+
namespace OpenTelemetry.Instrumentation.Hangfire.Implementation;
10+
11+
/// <summary>
12+
/// Hangfire filter that records queue latency metrics.
13+
/// </summary>
14+
/// <remarks>
15+
/// This filter captures the EnqueuedAt timestamp when a job enters the Enqueued state
16+
/// and calculates queue latency when the job starts executing.
17+
/// </remarks>
18+
internal sealed class HangfireQueueLatencyFilterAttribute : JobFilterAttribute, IServerFilter, IElectStateFilter
19+
{
20+
private const string EnqueuedAtParameter = "OpenTelemetry.EnqueuedAt";
21+
22+
public void OnStateElection(ElectStateContext context)
23+
{
24+
// When a job transitions to Enqueued state, capture the EnqueuedAt timestamp
25+
if (context.CandidateState is EnqueuedState enqueuedState)
26+
{
27+
try
28+
{
29+
var enqueuedAt = enqueuedState.EnqueuedAt;
30+
context.Connection.SetJobParameter(
31+
context.BackgroundJob.Id,
32+
EnqueuedAtParameter,
33+
JobHelper.SerializeDateTime(enqueuedAt));
34+
}
35+
catch
36+
{
37+
// Skip storing timestamp if parameter write fails
38+
// Instrumentation must never break Hangfire's scheduling pipeline
39+
}
40+
}
41+
}
42+
43+
public void OnPerforming(PerformingContext performingContext)
44+
{
45+
try
46+
{
47+
// Retrieve the EnqueuedAt timestamp that was stored when the job was enqueued
48+
var enqueuedAtStr = performingContext.Connection.GetJobParameter(
49+
performingContext.BackgroundJob.Id,
50+
EnqueuedAtParameter);
51+
52+
if (!string.IsNullOrEmpty(enqueuedAtStr))
53+
{
54+
var enqueuedAt = JobHelper.DeserializeDateTime(enqueuedAtStr);
55+
var queueLatency = (DateTime.UtcNow - enqueuedAt).TotalSeconds;
56+
57+
var tags = HangfireTagBuilder.BuildCommonTags(performingContext.BackgroundJob.Job);
58+
HangfireMetrics.QueueLatency.Record(queueLatency, tags);
59+
}
60+
}
61+
catch
62+
{
63+
// Skip recording if parameter retrieval fails
64+
}
65+
}
66+
67+
public void OnPerformed(PerformedContext performedContext)
68+
{
69+
// No-op: This filter only handles queue latency in OnPerforming
70+
}
71+
}

0 commit comments

Comments
 (0)