|
18 | 18 | import com.google.cloud.teleport.coders.FailsafeElementCoder; |
19 | 19 | import com.google.cloud.teleport.metadata.Template; |
20 | 20 | import com.google.cloud.teleport.metadata.TemplateCategory; |
| 21 | +import com.google.cloud.teleport.metadata.TemplateParameter; |
21 | 22 | import com.google.cloud.teleport.splunk.SplunkEvent; |
22 | 23 | import com.google.cloud.teleport.splunk.SplunkEventCoder; |
23 | 24 | import com.google.cloud.teleport.splunk.SplunkIO; |
|
60 | 61 | import org.apache.beam.sdk.values.TupleTag; |
61 | 62 | import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.MoreObjects; |
62 | 63 | import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; |
| 64 | +import org.json.JSONException; |
| 65 | +import org.json.JSONObject; |
63 | 66 | import org.slf4j.Logger; |
64 | 67 | import org.slf4j.LoggerFactory; |
65 | 68 |
|
@@ -254,18 +257,38 @@ public static PipelineResult run(PubSubToSplunkOptions options) { |
254 | 257 | .withEnableGzipHttpCompression(options.getEnableGzipHttpCompression()) |
255 | 258 | .build()); |
256 | 259 |
|
| 260 | + final ValueProvider<Boolean> unwrapHecProvider = options.getUnwrapHecForDeadletter(); |
| 261 | + |
257 | 262 | // 5a) Wrap write failures into a FailsafeElement. |
258 | 263 | PCollection<FailsafeElement<String, String>> wrappedSplunkWriteErrors = |
259 | 264 | writeErrors.apply( |
260 | 265 | "WrapSplunkWriteErrors", |
261 | 266 | ParDo.of( |
262 | 267 | new DoFn<SplunkWriteError, FailsafeElement<String, String>>() { |
263 | 268 |
|
| 269 | + private boolean unwrapHecForDeadletter = false; |
| 270 | + |
| 271 | + @Setup |
| 272 | + public void setup() { |
| 273 | + if (unwrapHecProvider != null) { |
| 274 | + unwrapHecForDeadletter = |
| 275 | + MoreObjects.firstNonNull(unwrapHecProvider.get(), false); |
| 276 | + } |
| 277 | + LOG.info("unwrapHecForDeadletter set to: {}", unwrapHecForDeadletter); |
| 278 | + } |
| 279 | + |
264 | 280 | @ProcessElement |
265 | 281 | public void processElement(ProcessContext context) { |
266 | 282 | SplunkWriteError error = context.element(); |
| 283 | + |
| 284 | + // Extract original payload if unwrap is enabled |
| 285 | + String payload = error.payload(); |
| 286 | + if (unwrapHecForDeadletter) { |
| 287 | + payload = extractOriginalPayloadFromHec(payload); |
| 288 | + } |
| 289 | + |
267 | 290 | FailsafeElement<String, String> failsafeElement = |
268 | | - FailsafeElement.of(error.payload(), error.payload()); |
| 291 | + FailsafeElement.of(payload, payload); |
269 | 292 |
|
270 | 293 | if (error.statusMessage() != null) { |
271 | 294 | failsafeElement.setErrorMessage(error.statusMessage()); |
@@ -304,7 +327,20 @@ public interface PubSubToSplunkOptions |
304 | 327 | extends SplunkOptions, |
305 | 328 | PubsubReadSubscriptionOptions, |
306 | 329 | PubsubWriteDeadletterTopicOptions, |
307 | | - JavascriptTextTransformerOptions {} |
| 330 | + JavascriptTextTransformerOptions { |
| 331 | + |
| 332 | + @TemplateParameter.Boolean( |
| 333 | + optional = true, |
| 334 | + description = "Unwrap Splunk HEC format from deadletter messages.", |
| 335 | + helpText = |
| 336 | + "When enabled, if a message fails to write to Splunk and is sent to the deadletter " |
| 337 | + + "queue, the original payload will be extracted from the Splunk HEC format before " |
| 338 | + + "writing to the deadletter topic. This prevents event nesting when messages are " |
| 339 | + + "replayed. Default: `false`.") |
| 340 | + ValueProvider<Boolean> getUnwrapHecForDeadletter(); |
| 341 | + |
| 342 | + void setUnwrapHecForDeadletter(ValueProvider<Boolean> unwrapHecForDeadletter); |
| 343 | + } |
308 | 344 |
|
309 | 345 | /** |
310 | 346 | * A {@link PTransform} that reads messages from a Pub/Sub subscription, increments a counter and |
@@ -411,4 +447,43 @@ private static JsonObject getAttributesJson(Map<String, String> attributesMap) { |
411 | 447 |
|
412 | 448 | return attributesJson; |
413 | 449 | } |
| 450 | + |
| 451 | + /** |
| 452 | + * Extracts the original payload from a Splunk HEC-formatted JSON string. |
| 453 | + * |
| 454 | + * <p>When a SplunkEvent fails to write to Splunk, it's stored in HEC format like: {"event": |
| 455 | + * "original payload", "time": ..., "host": ..., etc.} |
| 456 | + * |
| 457 | + * <p>This method extracts just the "event" field to prevent nesting when replaying messages from |
| 458 | + * the deadletter queue. |
| 459 | + * |
| 460 | + * @param hecPayload The HEC-formatted JSON string |
| 461 | + * @return The original event payload, or the input if extraction fails |
| 462 | + */ |
| 463 | + @VisibleForTesting |
| 464 | + protected static String extractOriginalPayloadFromHec(String hecPayload) { |
| 465 | + try { |
| 466 | + JSONObject json = new JSONObject(hecPayload); |
| 467 | + |
| 468 | + // Check if this looks like HEC format (has "event" field) |
| 469 | + if (json.has("event")) { |
| 470 | + Object eventObj = json.get("event"); |
| 471 | + |
| 472 | + // The event field could be a JSON object/array or a string |
| 473 | + if (eventObj instanceof String) { |
| 474 | + return (String) eventObj; |
| 475 | + } else { |
| 476 | + // If it's a JSONObject or JSONArray, return its string |
| 477 | + // representation |
| 478 | + return eventObj.toString(); |
| 479 | + } |
| 480 | + } |
| 481 | + } catch (JSONException e) { |
| 482 | + // If parsing fails, log and return the original |
| 483 | + LOG.debug("Failed to parse payload as HEC JSON, returning as-is. Error: {}", e.getMessage()); |
| 484 | + } |
| 485 | + |
| 486 | + // If no "event" field found or parsing failed, return original |
| 487 | + return hecPayload; |
| 488 | + } |
414 | 489 | } |
0 commit comments