Skip to content

Commit 3ea1da9

Browse files
authored
Merge pull request #381 from deepgram/feat/agent-fallbacks-2
feat: adds support for speak fallback
2 parents 56703fd + 350f501 commit 3ea1da9

File tree

3 files changed

+395
-2
lines changed

3 files changed

+395
-2
lines changed
Lines changed: 311 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,311 @@
1+
// Copyright 2024 Deepgram .NET SDK contributors. All Rights Reserved.
2+
// Use of this source code is governed by a MIT license that can be found in the LICENSE file.
3+
// SPDX-License-Identifier: MIT
4+
5+
using Bogus;
6+
using FluentAssertions;
7+
using FluentAssertions.Execution;
8+
using NSubstitute;
9+
using System.Text.Json;
10+
using Deepgram.Models.Agent.v2.WebSocket;
11+
12+
namespace Deepgram.Tests.UnitTests.ClientTests;
13+
14+
public class AgentSpeakTests
15+
{
16+
[SetUp]
17+
public void Setup()
18+
{
19+
}
20+
21+
#region Backward Compatibility Tests
22+
23+
[Test]
24+
public void Speak_SingleProvider_Should_Maintain_Backward_Compatibility()
25+
{
26+
// Input and Output
27+
var provider = new Provider();
28+
provider.Type = "deepgram";
29+
30+
var endpoint = new Endpoint
31+
{
32+
URL = "https://api.deepgram.com/v1/speak",
33+
Headers = new Dictionary<string, string> { { "authorization", "Bearer test-key" } }
34+
};
35+
36+
var speak = new Speak
37+
{
38+
Provider = provider,
39+
Endpoint = endpoint
40+
};
41+
42+
// Assert
43+
using (new AssertionScope())
44+
{
45+
((object)speak.Provider).Should().NotBeNull();
46+
((string)speak.Provider.Type).Should().Be("deepgram");
47+
speak.Endpoint.Should().NotBeNull();
48+
speak.SpeakProviders.Should().BeNull();
49+
}
50+
}
51+
52+
[Test]
53+
public void Speak_SingleProvider_ToString_Should_Return_Valid_Json()
54+
{
55+
// Input and Output
56+
var provider = new Provider();
57+
provider.Type = "deepgram";
58+
59+
var speak = new Speak
60+
{
61+
Provider = provider,
62+
Endpoint = null
63+
};
64+
65+
// Act
66+
var result = speak.ToString();
67+
68+
// Assert
69+
using (new AssertionScope())
70+
{
71+
result.Should().NotBeNull();
72+
result.Should().Contain("provider");
73+
74+
// Verify it's valid JSON by parsing it
75+
var parsed = JsonDocument.Parse(result);
76+
parsed.RootElement.GetProperty("provider").GetProperty("type").GetString().Should().Be("deepgram");
77+
}
78+
}
79+
80+
#endregion
81+
82+
#region Array Format Tests
83+
84+
[Test]
85+
public void Speak_ArrayFormat_Should_Support_Multiple_Providers()
86+
{
87+
// Input and Output
88+
var deepgramProvider = new Provider();
89+
deepgramProvider.Type = "deepgram";
90+
91+
var openAiProvider = new Provider();
92+
openAiProvider.Type = "open_ai";
93+
94+
var speak = new Speak
95+
{
96+
SpeakProviders = new List<SpeakProviderConfig>
97+
{
98+
new SpeakProviderConfig { Provider = deepgramProvider },
99+
new SpeakProviderConfig
100+
{
101+
Provider = openAiProvider,
102+
Endpoint = new Endpoint
103+
{
104+
URL = "https://api.openai.com/v1/audio/speech",
105+
Headers = new Dictionary<string, string> { { "authorization", "Bearer {{OPENAI_API_KEY}}" } }
106+
}
107+
}
108+
}
109+
};
110+
111+
// Assert
112+
using (new AssertionScope())
113+
{
114+
speak.SpeakProviders.Should().NotBeNull();
115+
speak.SpeakProviders.Should().HaveCount(2);
116+
((string)speak.SpeakProviders![0].Provider.Type).Should().Be("deepgram");
117+
((string)speak.SpeakProviders![1].Provider.Type).Should().Be("open_ai");
118+
speak.SpeakProviders![1].Endpoint.Should().NotBeNull();
119+
}
120+
}
121+
122+
[Test]
123+
public void Speak_ArrayFormat_ToString_Should_Return_Valid_Array_Json()
124+
{
125+
// Input and Output
126+
var deepgramProvider = new Provider();
127+
deepgramProvider.Type = "deepgram";
128+
129+
var openAiProvider = new Provider();
130+
openAiProvider.Type = "open_ai";
131+
132+
var speak = new Speak
133+
{
134+
SpeakProviders = new List<SpeakProviderConfig>
135+
{
136+
new SpeakProviderConfig { Provider = deepgramProvider },
137+
new SpeakProviderConfig { Provider = openAiProvider }
138+
}
139+
};
140+
141+
// Act
142+
var result = speak.ToString();
143+
144+
// Assert
145+
using (new AssertionScope())
146+
{
147+
result.Should().NotBeNull();
148+
result.Should().Contain("speak");
149+
result.Should().Contain("[");
150+
result.Should().Contain("]");
151+
152+
// Verify it's valid JSON by parsing it
153+
var parsed = JsonDocument.Parse(result);
154+
var speakArray = parsed.RootElement.GetProperty("speak");
155+
speakArray.ValueKind.Should().Be(JsonValueKind.Array);
156+
speakArray.GetArrayLength().Should().Be(2);
157+
}
158+
}
159+
160+
[Test]
161+
public void SpeakProviderConfig_Should_Have_Correct_Structure()
162+
{
163+
// Input and Output
164+
var provider = new Provider();
165+
provider.Type = "deepgram";
166+
167+
var endpoint = new Endpoint
168+
{
169+
URL = "https://api.deepgram.com/v1/speak",
170+
Headers = new Dictionary<string, string> { { "authorization", "Bearer test-key" } }
171+
};
172+
173+
var speakProviderConfig = new SpeakProviderConfig
174+
{
175+
Provider = provider,
176+
Endpoint = endpoint
177+
};
178+
179+
// Assert
180+
using (new AssertionScope())
181+
{
182+
((object)speakProviderConfig.Provider).Should().NotBeNull();
183+
((string)speakProviderConfig.Provider.Type).Should().Be("deepgram");
184+
speakProviderConfig.Endpoint.Should().NotBeNull();
185+
speakProviderConfig.Endpoint!.URL.Should().Be("https://api.deepgram.com/v1/speak");
186+
}
187+
}
188+
189+
[Test]
190+
public void SpeakProviderConfig_ToString_Should_Return_Valid_Json()
191+
{
192+
// Input and Output
193+
var provider = new Provider();
194+
provider.Type = "open_ai";
195+
196+
var speakProviderConfig = new SpeakProviderConfig
197+
{
198+
Provider = provider,
199+
Endpoint = new Endpoint
200+
{
201+
URL = "https://api.openai.com/v1/audio/speech",
202+
Headers = new Dictionary<string, string> { { "authorization", "Bearer {{OPENAI_API_KEY}}" } }
203+
}
204+
};
205+
206+
// Act
207+
var result = speakProviderConfig.ToString();
208+
209+
// Assert
210+
using (new AssertionScope())
211+
{
212+
result.Should().NotBeNull();
213+
result.Should().Contain("provider");
214+
result.Should().Contain("endpoint");
215+
216+
// Verify it's valid JSON by parsing it
217+
var parsed = JsonDocument.Parse(result);
218+
parsed.RootElement.GetProperty("provider").GetProperty("type").GetString().Should().Be("open_ai");
219+
parsed.RootElement.GetProperty("endpoint").GetProperty("url").GetString().Should().Be("https://api.openai.com/v1/audio/speech");
220+
}
221+
}
222+
223+
#endregion
224+
225+
#region JSON Serialization Tests
226+
227+
[Test]
228+
public void Speak_With_Array_Should_Serialize_Correctly_To_Match_Expected_Format()
229+
{
230+
// Input and Output - This matches your JSON example
231+
var deepgramProvider = new Provider();
232+
deepgramProvider.Type = "deepgram";
233+
// Assuming Provider has a Model property based on your JSON
234+
235+
var openAiProvider = new Provider();
236+
openAiProvider.Type = "open_ai";
237+
238+
var speak = new Speak
239+
{
240+
SpeakProviders = new List<SpeakProviderConfig>
241+
{
242+
new SpeakProviderConfig
243+
{
244+
Provider = deepgramProvider
245+
},
246+
new SpeakProviderConfig
247+
{
248+
Provider = openAiProvider,
249+
Endpoint = new Endpoint
250+
{
251+
URL = "https://api.openai.com/v1/audio/speech",
252+
Headers = new Dictionary<string, string> { { "authorization", "Bearer {{OPENAI_API_KEY}}" } }
253+
}
254+
}
255+
}
256+
};
257+
258+
// Act
259+
var result = speak.ToString();
260+
261+
// Assert
262+
using (new AssertionScope())
263+
{
264+
result.Should().NotBeNull();
265+
266+
// Parse and verify structure matches expected format
267+
var parsed = JsonDocument.Parse(result);
268+
var speakArray = parsed.RootElement.GetProperty("speak");
269+
speakArray.ValueKind.Should().Be(JsonValueKind.Array);
270+
271+
var firstProvider = speakArray[0];
272+
firstProvider.GetProperty("provider").GetProperty("type").GetString().Should().Be("deepgram");
273+
274+
var secondProvider = speakArray[1];
275+
secondProvider.GetProperty("provider").GetProperty("type").GetString().Should().Be("open_ai");
276+
secondProvider.GetProperty("endpoint").GetProperty("url").GetString().Should().Be("https://api.openai.com/v1/audio/speech");
277+
}
278+
}
279+
280+
[Test]
281+
public void Speak_Without_Array_Should_Serialize_As_Single_Provider()
282+
{
283+
// Input and Output
284+
var provider = new Provider();
285+
provider.Type = "deepgram";
286+
287+
var speak = new Speak
288+
{
289+
Provider = provider,
290+
Endpoint = new Endpoint { URL = "https://api.deepgram.com/v1/speak" }
291+
};
292+
293+
// Act
294+
var result = speak.ToString();
295+
296+
// Assert
297+
using (new AssertionScope())
298+
{
299+
result.Should().NotBeNull();
300+
result.Should().Contain("provider");
301+
result.Should().Contain("endpoint");
302+
result.Should().NotContain("\"speak\""); // Should not contain the array property name in quotes
303+
304+
// Parse and verify single provider format
305+
var parsed = JsonDocument.Parse(result);
306+
parsed.RootElement.GetProperty("provider").GetProperty("type").GetString().Should().Be("deepgram");
307+
}
308+
}
309+
310+
#endregion
311+
}

Deepgram/Models/Agent/v2/WebSocket/Speak.cs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,53 @@ public record Speak
88
{
99
/// <summary>
1010
/// The provider configuration for the TTS.
11+
/// For backward compatibility, this can be a single provider object.
12+
/// For new array format, use the SpeakProviders property instead.
1113
/// </summary>
1214
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
1315
[JsonPropertyName("provider")]
1416
public dynamic Provider { get; set; } = new Provider();
1517

1618
/// <summary>
1719
/// Custom endpoint for custom models - to use a custom model, set provider.type to the flavour of API you are using (e.g. open_ai for OpenAI-like APIs).
20+
/// Note: This is for backward compatibility with single provider format.
21+
/// </summary>
22+
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
23+
[JsonPropertyName("endpoint")]
24+
public Endpoint? Endpoint { get; set; } = null;
25+
26+
/// <summary>
27+
/// Array of speak provider configurations. Each provider can have its own provider settings and endpoint.
28+
/// When this property is set, it takes precedence over the single Provider and Endpoint properties.
29+
/// This supports the new array format: [{"provider": {...}, "endpoint": {...}}, ...]
30+
/// </summary>
31+
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
32+
[JsonPropertyName("speak")]
33+
public List<SpeakProviderConfig>? SpeakProviders { get; set; } = null;
34+
35+
/// <summary>
36+
/// Override ToString method to serialize the object
37+
/// </summary>
38+
public override string ToString()
39+
{
40+
return Regex.Unescape(JsonSerializer.Serialize(this, JsonSerializeOptions.DefaultOptions));
41+
}
42+
}
43+
44+
/// <summary>
45+
/// Configuration for a single speak provider in the array format
46+
/// </summary>
47+
public record SpeakProviderConfig
48+
{
49+
/// <summary>
50+
/// The provider configuration for this specific TTS provider
51+
/// </summary>
52+
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
53+
[JsonPropertyName("provider")]
54+
public dynamic Provider { get; set; } = new Provider();
55+
56+
/// <summary>
57+
/// Custom endpoint for this specific provider
1858
/// </summary>
1959
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
2060
[JsonPropertyName("endpoint")]

0 commit comments

Comments
 (0)