feat(stt): add new sad_module param to recognize functions

apaparazzi0329 · apaparazzi0329 · commit 7b0ab86ed50b · 2025-11-10T12:44:21.000-06:00
diff --git a/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/SpeechToText.java b/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/SpeechToText.java
@@ -1,5 +1,5 @@
 /*
- * (C) Copyright IBM Corp. 2016, 2024.
+ * (C) Copyright IBM Corp. 2016, 2025.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  * the License. You may obtain a copy of the License at
@@ -12,7 +12,7 @@
  */
 
 /*
- * IBM OpenAPI SDK Code Generator Version: 3.97.0-0e90eab1-20241120-170029
+ * IBM OpenAPI SDK Code Generator Version: 3.105.0-3c13b041-20250605-193116
  */
 
 package com.ibm.watson.speech_to_text.v1;
@@ -520,6 +520,9 @@ public ServiceCall<SpeechRecognitionResults> recognize(RecognizeOptions recogniz
           "speech_detector_sensitivity",
           String.valueOf(recognizeOptions.speechDetectorSensitivity()));
     }
+    if (recognizeOptions.sadModule() != null) {
+      builder.query("sad_module", String.valueOf(recognizeOptions.sadModule()));
+    }
     if (recognizeOptions.backgroundAudioSuppression() != null) {
       builder.query(
           "background_audio_suppression",
@@ -854,6 +857,9 @@ public ServiceCall<RecognitionJob> createJob(CreateJobOptions createJobOptions)
           "speech_detector_sensitivity",
           String.valueOf(createJobOptions.speechDetectorSensitivity()));
     }
+    if (createJobOptions.sadModule() != null) {
+      builder.query("sad_module", String.valueOf(createJobOptions.sadModule()));
+    }
     if (createJobOptions.backgroundAudioSuppression() != null) {
       builder.query(
           "background_audio_suppression",
diff --git a/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/CreateJobOptions.java b/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/CreateJobOptions.java
@@ -270,6 +270,7 @@ public interface Events {
   protected Double endOfPhraseSilenceTime;
   protected Boolean splitTranscriptAtPhraseEnd;
   protected Float speechDetectorSensitivity;
+  protected Long sadModule;
   protected Float backgroundAudioSuppression;
   protected Boolean lowLatency;
   protected Float characterInsertionBias;
@@ -306,6 +307,7 @@ public static class Builder {
     private Double endOfPhraseSilenceTime;
     private Boolean splitTranscriptAtPhraseEnd;
     private Float speechDetectorSensitivity;
+    private Long sadModule;
     private Float backgroundAudioSuppression;
     private Boolean lowLatency;
     private Float characterInsertionBias;
@@ -346,6 +348,7 @@ private Builder(CreateJobOptions createJobOptions) {
       this.endOfPhraseSilenceTime = createJobOptions.endOfPhraseSilenceTime;
       this.splitTranscriptAtPhraseEnd = createJobOptions.splitTranscriptAtPhraseEnd;
       this.speechDetectorSensitivity = createJobOptions.speechDetectorSensitivity;
+      this.sadModule = createJobOptions.sadModule;
       this.backgroundAudioSuppression = createJobOptions.backgroundAudioSuppression;
       this.lowLatency = createJobOptions.lowLatency;
       this.characterInsertionBias = createJobOptions.characterInsertionBias;
@@ -717,6 +720,17 @@ public Builder speechDetectorSensitivity(Float speechDetectorSensitivity) {
       return this;
     }
 
+    /**
+     * Set the sadModule.
+     *
+     * @param sadModule the sadModule
+     * @return the CreateJobOptions builder
+     */
+    public Builder sadModule(long sadModule) {
+      this.sadModule = sadModule;
+      return this;
+    }
+
     /**
      * Set the backgroundAudioSuppression.
      *
@@ -797,6 +811,7 @@ protected CreateJobOptions(Builder builder) {
     endOfPhraseSilenceTime = builder.endOfPhraseSilenceTime;
     splitTranscriptAtPhraseEnd = builder.splitTranscriptAtPhraseEnd;
     speechDetectorSensitivity = builder.speechDetectorSensitivity;
+    sadModule = builder.sadModule;
     backgroundAudioSuppression = builder.backgroundAudioSuppression;
     lowLatency = builder.lowLatency;
     characterInsertionBias = builder.characterInsertionBias;
@@ -1353,6 +1368,23 @@ public Float speechDetectorSensitivity() {
     return speechDetectorSensitivity;
   }
 
+  /**
+   * Gets the sadModule.
+   *
+   * <p>Detects speech boundaries within the audio stream with better performance, improved noise
+   * suppression, faster responsiveness, and increased accuracy.
+   *
+   * <p>Specify `sad_module: 2`
+   *
+   * <p>See [Speech Activity Detection
+   * (SAD)](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#sad).
+   *
+   * @return the sadModule
+   */
+  public Long sadModule() {
+    return sadModule;
+  }
+
   /**
    * Gets the backgroundAudioSuppression.
    *
diff --git a/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/RecognizeOptions.java b/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/RecognizeOptions.java
@@ -237,6 +237,7 @@ public interface Model {
   protected Double endOfPhraseSilenceTime;
   protected Boolean splitTranscriptAtPhraseEnd;
   protected Float speechDetectorSensitivity;
+  protected Long sadModule;
   protected Float backgroundAudioSuppression;
   protected Boolean lowLatency;
   protected Float characterInsertionBias;
@@ -268,6 +269,7 @@ public static class Builder {
     private Double endOfPhraseSilenceTime;
     private Boolean splitTranscriptAtPhraseEnd;
     private Float speechDetectorSensitivity;
+    private Long sadModule;
     private Float backgroundAudioSuppression;
     private Boolean lowLatency;
     private Float characterInsertionBias;
@@ -303,6 +305,7 @@ private Builder(RecognizeOptions recognizeOptions) {
       this.endOfPhraseSilenceTime = recognizeOptions.endOfPhraseSilenceTime;
       this.splitTranscriptAtPhraseEnd = recognizeOptions.splitTranscriptAtPhraseEnd;
       this.speechDetectorSensitivity = recognizeOptions.speechDetectorSensitivity;
+      this.sadModule = recognizeOptions.sadModule;
       this.backgroundAudioSuppression = recognizeOptions.backgroundAudioSuppression;
       this.lowLatency = recognizeOptions.lowLatency;
       this.characterInsertionBias = recognizeOptions.characterInsertionBias;
@@ -619,6 +622,17 @@ public Builder speechDetectorSensitivity(Float speechDetectorSensitivity) {
       return this;
     }
 
+    /**
+     * Set the sadModule.
+     *
+     * @param sadModule the sadModule
+     * @return the RecognizeOptions builder
+     */
+    public Builder sadModule(long sadModule) {
+      this.sadModule = sadModule;
+      return this;
+    }
+
     /**
      * Set the backgroundAudioSuppression.
      *
@@ -694,6 +708,7 @@ protected RecognizeOptions(Builder builder) {
     endOfPhraseSilenceTime = builder.endOfPhraseSilenceTime;
     splitTranscriptAtPhraseEnd = builder.splitTranscriptAtPhraseEnd;
     speechDetectorSensitivity = builder.speechDetectorSensitivity;
+    sadModule = builder.sadModule;
     backgroundAudioSuppression = builder.backgroundAudioSuppression;
     lowLatency = builder.lowLatency;
     characterInsertionBias = builder.characterInsertionBias;
@@ -759,7 +774,8 @@ public String model() {
    * when a speech activity is detected in the stream. This can be used both in standard and low
    * latency mode. This feature enables client applications to know that some words/speech has been
    * detected and the service is in the process of decoding. This can be used in lieu of interim
-   * results in standard mode. See [Using speech recognition
+   * results in standard mode. Use `sad_module: 2` to increase accuracy and performance in detecting
+   * speech boundaries within the audio stream. See [Using speech recognition
    * parameters](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-service-features#features-parameters).
    *
    * @return the speechBeginEvent
@@ -1154,6 +1170,23 @@ public Float speechDetectorSensitivity() {
     return speechDetectorSensitivity;
   }
 
+  /**
+   * Gets the sadModule.
+   *
+   * <p>Detects speech boundaries within the audio stream with better performance, improved noise
+   * suppression, faster responsiveness, and increased accuracy.
+   *
+   * <p>Specify `sad_module: 2`
+   *
+   * <p>See [Speech Activity Detection
+   * (SAD)](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#sad).
+   *
+   * @return the sadModule
+   */
+  public Long sadModule() {
+    return sadModule;
+  }
+
   /**
    * Gets the backgroundAudioSuppression.
    *
diff --git a/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/RecognizeWithWebsocketsOptions.java b/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/RecognizeWithWebsocketsOptions.java
@@ -203,6 +203,7 @@ public interface Model {
   protected Float backgroundAudioSuppression;
   protected Boolean lowLatency;
   protected Float characterInsertionBias;
+  protected Long sadModule;
   private Boolean interimResults;
   private Boolean processingMetrics;
   private Float processingMetricsInterval;
@@ -236,6 +237,7 @@ public static class Builder {
     private Float backgroundAudioSuppression;
     private Boolean lowLatency;
     private Float characterInsertionBias;
+    private Long sadModule;
     private Boolean interimResults;
     private Boolean processingMetrics;
     private Float processingMetricsInterval;
@@ -268,6 +270,7 @@ private Builder(RecognizeWithWebsocketsOptions recognizeWithWebsocketsOptions) {
       this.backgroundAudioSuppression = recognizeWithWebsocketsOptions.backgroundAudioSuppression;
       this.lowLatency = recognizeWithWebsocketsOptions.lowLatency;
       this.characterInsertionBias = recognizeWithWebsocketsOptions.characterInsertionBias;
+      this.sadModule = recognizeWithWebsocketsOptions.sadModule;
       this.interimResults = recognizeWithWebsocketsOptions.interimResults;
       this.processingMetrics = recognizeWithWebsocketsOptions.processingMetrics;
       this.processingMetricsInterval = recognizeWithWebsocketsOptions.processingMetricsInterval;
@@ -606,6 +609,17 @@ public Builder characterInsertionBias(Float characterInsertionBias) {
       return this;
     }
 
+    /**
+     * Set the sadModule.
+     *
+     * @param sadModule the sadModule
+     * @return the RecognizeOptions builder
+     */
+    public Builder sadModule(Long sadModule) {
+      this.sadModule = sadModule;
+      return this;
+    }
+
     /**
      * Set the interimResults.
      *
@@ -687,6 +701,7 @@ protected RecognizeWithWebsocketsOptions(Builder builder) {
     backgroundAudioSuppression = builder.backgroundAudioSuppression;
     lowLatency = builder.lowLatency;
     characterInsertionBias = builder.characterInsertionBias;
+    sadModule = builder.sadModule;
     interimResults = builder.interimResults;
     processingMetrics = builder.processingMetrics;
     processingMetricsInterval = builder.processingMetricsInterval;
@@ -1176,6 +1191,23 @@ public Float characterInsertionBias() {
     return characterInsertionBias;
   }
 
+  /**
+   * Gets the sadModule.
+   *
+   * <p>Detects speech boundaries within the audio stream with better performance, improved noise
+   * suppression, faster responsiveness, and increased accuracy.
+   *
+   * <p>Specify `sad_module: 2`
+   *
+   * <p>See [Speech Activity Detection
+   * (SAD)](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#sad).
+   *
+   * @return the sadModule
+   */
+  public Long sadModule() {
+    return sadModule;
+  }
+
   /**
    * Gets the interimResults.
    *
diff --git a/speech-to-text/src/test/java/com/ibm/watson/speech_to_text/v1/SpeechToTextTest.java b/speech-to-text/src/test/java/com/ibm/watson/speech_to_text/v1/SpeechToTextTest.java
@@ -1,5 +1,5 @@
 /*
- * (C) Copyright IBM Corp. 2019, 2024.
+ * (C) Copyright IBM Corp. 2019, 2025.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  * the License. You may obtain a copy of the License at
@@ -249,6 +249,7 @@ public void testRecognizeWOptions() throws Throwable {
             .endOfPhraseSilenceTime(Double.valueOf("0.8"))
             .splitTranscriptAtPhraseEnd(false)
             .speechDetectorSensitivity(Float.valueOf("0.5"))
+            .sadModule(Long.valueOf("1"))
             .backgroundAudioSuppression(Float.valueOf("0.0"))
             .lowLatency(false)
             .characterInsertionBias(Float.valueOf("0.0"))
@@ -296,6 +297,7 @@ public void testRecognizeWOptions() throws Throwable {
     assertEquals(
         Boolean.valueOf(query.get("split_transcript_at_phrase_end")), Boolean.valueOf(false));
     assertEquals(Float.valueOf(query.get("speech_detector_sensitivity")), Float.valueOf("0.5"));
+    assertEquals(Long.valueOf(query.get("sad_module")), Long.valueOf("1"));
     assertEquals(Float.valueOf(query.get("background_audio_suppression")), Float.valueOf("0.0"));
     assertEquals(Boolean.valueOf(query.get("low_latency")), Boolean.valueOf(false));
     assertEquals(Float.valueOf(query.get("character_insertion_bias")), Float.valueOf("0.0"));
@@ -470,6 +472,7 @@ public void testCreateJobWOptions() throws Throwable {
             .endOfPhraseSilenceTime(Double.valueOf("0.8"))
             .splitTranscriptAtPhraseEnd(false)
             .speechDetectorSensitivity(Float.valueOf("0.5"))
+            .sadModule(Long.valueOf("1"))
             .backgroundAudioSuppression(Float.valueOf("0.0"))
             .lowLatency(false)
             .characterInsertionBias(Float.valueOf("0.0"))
@@ -522,6 +525,7 @@ public void testCreateJobWOptions() throws Throwable {
     assertEquals(
         Boolean.valueOf(query.get("split_transcript_at_phrase_end")), Boolean.valueOf(false));
     assertEquals(Float.valueOf(query.get("speech_detector_sensitivity")), Float.valueOf("0.5"));
+    assertEquals(Long.valueOf(query.get("sad_module")), Long.valueOf("1"));
     assertEquals(Float.valueOf(query.get("background_audio_suppression")), Float.valueOf("0.0"));
     assertEquals(Boolean.valueOf(query.get("low_latency")), Boolean.valueOf(false));
     assertEquals(Float.valueOf(query.get("character_insertion_bias")), Float.valueOf("0.0"));
diff --git a/speech-to-text/src/test/java/com/ibm/watson/speech_to_text/v1/model/CreateJobOptionsTest.java b/speech-to-text/src/test/java/com/ibm/watson/speech_to_text/v1/model/CreateJobOptionsTest.java
@@ -1,5 +1,5 @@
 /*
- * (C) Copyright IBM Corp. 2020, 2024.
+ * (C) Copyright IBM Corp. 2020, 2025.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  * the License. You may obtain a copy of the License at
@@ -63,6 +63,7 @@ public void testCreateJobOptions() throws Throwable {
             .endOfPhraseSilenceTime(Double.valueOf("0.8"))
             .splitTranscriptAtPhraseEnd(false)
             .speechDetectorSensitivity(Float.valueOf("0.5"))
+            .sadModule(Long.valueOf("1"))
             .backgroundAudioSuppression(Float.valueOf("0.0"))
             .lowLatency(false)
             .characterInsertionBias(Float.valueOf("0.0"))
@@ -99,6 +100,7 @@ public void testCreateJobOptions() throws Throwable {
     assertEquals(createJobOptionsModel.endOfPhraseSilenceTime(), Double.valueOf("0.8"));
     assertEquals(createJobOptionsModel.splitTranscriptAtPhraseEnd(), Boolean.valueOf(false));
     assertEquals(createJobOptionsModel.speechDetectorSensitivity(), Float.valueOf("0.5"));
+    assertEquals(createJobOptionsModel.sadModule(), Long.valueOf("1"));
     assertEquals(createJobOptionsModel.backgroundAudioSuppression(), Float.valueOf("0.0"));
     assertEquals(createJobOptionsModel.lowLatency(), Boolean.valueOf(false));
     assertEquals(createJobOptionsModel.characterInsertionBias(), Float.valueOf("0.0"));
diff --git a/speech-to-text/src/test/java/com/ibm/watson/speech_to_text/v1/model/RecognizeOptionsTest.java b/speech-to-text/src/test/java/com/ibm/watson/speech_to_text/v1/model/RecognizeOptionsTest.java
@@ -1,5 +1,5 @@
 /*
- * (C) Copyright IBM Corp. 2020, 2024.
+ * (C) Copyright IBM Corp. 2020, 2025.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  * the License. You may obtain a copy of the License at
@@ -58,6 +58,7 @@ public void testRecognizeOptions() throws Throwable {
             .endOfPhraseSilenceTime(Double.valueOf("0.8"))
             .splitTranscriptAtPhraseEnd(false)
             .speechDetectorSensitivity(Float.valueOf("0.5"))
+            .sadModule(Long.valueOf("1"))
             .backgroundAudioSuppression(Float.valueOf("0.0"))
             .lowLatency(false)
             .characterInsertionBias(Float.valueOf("0.0"))
@@ -89,6 +90,7 @@ public void testRecognizeOptions() throws Throwable {
     assertEquals(recognizeOptionsModel.endOfPhraseSilenceTime(), Double.valueOf("0.8"));
     assertEquals(recognizeOptionsModel.splitTranscriptAtPhraseEnd(), Boolean.valueOf(false));
     assertEquals(recognizeOptionsModel.speechDetectorSensitivity(), Float.valueOf("0.5"));
+    assertEquals(recognizeOptionsModel.sadModule(), Long.valueOf("1"));
     assertEquals(recognizeOptionsModel.backgroundAudioSuppression(), Float.valueOf("0.0"));
     assertEquals(recognizeOptionsModel.lowLatency(), Boolean.valueOf(false));
     assertEquals(recognizeOptionsModel.characterInsertionBias(), Float.valueOf("0.0"));