Skip to content

Commit 0aaeadb

Browse files
committed
fixes agent example + test tags
1 parent 1c265a7 commit 0aaeadb

File tree

1 file changed

+179
-91
lines changed

1 file changed

+179
-91
lines changed

examples/agent/websocket/simple/Program.cs

Lines changed: 179 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
using Deepgram.Models.Authenticate.v1;
88
using Deepgram.Models.Agent.v2.WebSocket;
99
using System.Collections.Generic;
10+
using System.Runtime.InteropServices;
1011
using PortAudioSharp;
1112

1213
namespace SampleApp
@@ -55,107 +56,38 @@ static async Task Main(string[] args)
5556
DeepgramWsClientOptions options = new DeepgramWsClientOptions(null, null, true);
5657
var agentClient = ClientFactory.CreateAgentWebSocketClient(apiKey: "", options: options);
5758

58-
// current time
59-
var lastAudioTime = DateTime.Now;
60-
var audioFileCount = 0;
59+
// Initialize conversation
60+
Console.WriteLine("🎤 Ready for conversation! Speak into your microphone...");
6161

6262
// Subscribe to the EventResponseReceived event
6363
await agentClient.Subscribe(new EventHandler<OpenResponse>((sender, e) =>
6464
{
6565
Console.WriteLine($"----> {e.Type} received");
6666
}));
67-
await agentClient.Subscribe(new EventHandler<AudioResponse>((sender, e) =>
67+
await agentClient.Subscribe(new EventHandler<AudioResponse>((sender, e) =>
6868
{
6969
Console.WriteLine($"----> {e.Type} received");
7070

71-
// if the last audio response is more than 5 seconds ago, add a wav header
72-
if (DateTime.Now.Subtract(lastAudioTime).TotalSeconds > 7)
71+
if (e.Stream != null && e.Stream.Length > 0)
7372
{
74-
audioFileCount = audioFileCount + 1; // increment the audio file count
73+
var audioData = e.Stream.ToArray();
74+
Console.WriteLine($"🔊 Queueing {audioData.Length} bytes of agent speech for playback");
7575

76-
// delete the file if it exists
77-
if (File.Exists($"output_{audioFileCount}.wav"))
78-
{
79-
File.Delete($"output_{audioFileCount}.wav");
80-
}
81-
82-
using (BinaryWriter writer = new BinaryWriter(File.Open($"output_{audioFileCount}.wav", FileMode.Append)))
83-
{
84-
Console.WriteLine("Adding WAV header to output.wav");
85-
byte[] wavHeader = new byte[44];
86-
int sampleRate = 48000;
87-
short bitsPerSample = 16;
88-
short channels = 1;
89-
int byteRate = sampleRate * channels * (bitsPerSample / 8);
90-
short blockAlign = (short)(channels * (bitsPerSample / 8));
91-
92-
wavHeader[0] = 0x52; // R
93-
wavHeader[1] = 0x49; // I
94-
wavHeader[2] = 0x46; // F
95-
wavHeader[3] = 0x46; // F
96-
wavHeader[4] = 0x00; // Placeholder for file size (will be updated later)
97-
wavHeader[5] = 0x00; // Placeholder for file size (will be updated later)
98-
wavHeader[6] = 0x00; // Placeholder for file size (will be updated later)
99-
wavHeader[7] = 0x00; // Placeholder for file size (will be updated later)
100-
wavHeader[8] = 0x57; // W
101-
wavHeader[9] = 0x41; // A
102-
wavHeader[10] = 0x56; // V
103-
wavHeader[11] = 0x45; // E
104-
wavHeader[12] = 0x66; // f
105-
wavHeader[13] = 0x6D; // m
106-
wavHeader[14] = 0x74; // t
107-
wavHeader[15] = 0x20; // Space
108-
wavHeader[16] = 0x10; // Subchunk1Size (16 for PCM)
109-
wavHeader[17] = 0x00; // Subchunk1Size
110-
wavHeader[18] = 0x00; // Subchunk1Size
111-
wavHeader[19] = 0x00; // Subchunk1Size
112-
wavHeader[20] = 0x01; // AudioFormat (1 for PCM)
113-
wavHeader[21] = 0x00; // AudioFormat
114-
wavHeader[22] = (byte)channels; // NumChannels
115-
wavHeader[23] = 0x00; // NumChannels
116-
wavHeader[24] = (byte)(sampleRate & 0xFF); // SampleRate
117-
wavHeader[25] = (byte)((sampleRate >> 8) & 0xFF); // SampleRate
118-
wavHeader[26] = (byte)((sampleRate >> 16) & 0xFF); // SampleRate
119-
wavHeader[27] = (byte)((sampleRate >> 24) & 0xFF); // SampleRate
120-
wavHeader[28] = (byte)(byteRate & 0xFF); // ByteRate
121-
wavHeader[29] = (byte)((byteRate >> 8) & 0xFF); // ByteRate
122-
wavHeader[30] = (byte)((byteRate >> 16) & 0xFF); // ByteRate
123-
wavHeader[31] = (byte)((byteRate >> 24) & 0xFF); // ByteRate
124-
wavHeader[32] = (byte)blockAlign; // BlockAlign
125-
wavHeader[33] = 0x00; // BlockAlign
126-
wavHeader[34] = (byte)bitsPerSample; // BitsPerSample
127-
wavHeader[35] = 0x00; // BitsPerSample
128-
wavHeader[36] = 0x64; // d
129-
wavHeader[37] = 0x61; // a
130-
wavHeader[38] = 0x74; // t
131-
wavHeader[39] = 0x61; // a
132-
wavHeader[40] = 0x00; // Placeholder for data chunk size (will be updated later)
133-
wavHeader[41] = 0x00; // Placeholder for data chunk size (will be updated later)
134-
wavHeader[42] = 0x00; // Placeholder for data chunk size (will be updated later)
135-
wavHeader[43] = 0x00; // Placeholder for data chunk size (will be updated later)
136-
137-
writer.Write(wavHeader);
138-
}
76+
// Play audio through speakers
77+
PlayAudioThroughSpeakers(audioData);
13978
}
140-
141-
if (e.Stream != null)
79+
else
14280
{
143-
using (BinaryWriter writer = new BinaryWriter(File.Open($"output_{audioFileCount}.wav", FileMode.Append)))
144-
{
145-
writer.Write(e.Stream.ToArray());
146-
}
81+
Console.WriteLine($"⚠️ Received empty audio stream");
14782
}
148-
149-
// record the last audio time
150-
lastAudioTime = DateTime.Now;
15183
}));
152-
await agentClient.Subscribe(new EventHandler<AgentAudioDoneResponse>((sender, e) =>
84+
await agentClient.Subscribe(new EventHandler<AgentAudioDoneResponse>((sender, e) =>
15385
{
154-
Console.WriteLine($"----> {e} received");
86+
Console.WriteLine($"----> {e} received - Agent finished speaking 🎤");
15587
}));
15688
await agentClient.Subscribe(new EventHandler<AgentStartedSpeakingResponse>((sender, e) =>
15789
{
158-
Console.WriteLine($"----> {e} received");
90+
Console.WriteLine($"----> {e} received - Agent is speaking 🗣️");
15991
}));
16092
await agentClient.Subscribe(new EventHandler<AgentThinkingResponse>((sender, e) =>
16193
{
@@ -171,7 +103,7 @@ await agentClient.Subscribe(new EventHandler<FunctionCallRequestResponse>((sende
171103
}));
172104
await agentClient.Subscribe(new EventHandler<UserStartedSpeakingResponse>((sender, e) =>
173105
{
174-
Console.WriteLine($"----> {e} received");
106+
Console.WriteLine($"----> {e} received - User is speaking 👤");
175107
}));
176108
await agentClient.Subscribe(new EventHandler<WelcomeResponse>((sender, e) =>
177109
{
@@ -210,10 +142,15 @@ await agentClient.Subscribe(new EventHandler<ErrorResponse>((sender, e) =>
210142
var settingsConfiguration = new SettingsSchema();
211143
settingsConfiguration.Agent.Think.Provider.Type = "open_ai";
212144
settingsConfiguration.Agent.Think.Provider.Model = "gpt-4o-mini";
213-
settingsConfiguration.Audio.Output.SampleRate = 16000;
214-
settingsConfiguration.Audio.Output.Container = "wav";
215-
settingsConfiguration.Audio.Input.SampleRate = 44100;
216-
settingsConfiguration.Agent.Greeting = "Hello, how can I help you today?";
145+
146+
// Configure audio settings - keep your input format, fix output
147+
settingsConfiguration.Audio.Input.Encoding = "linear16";
148+
settingsConfiguration.Audio.Input.SampleRate = 24000;
149+
settingsConfiguration.Audio.Output.Encoding = "linear16"; // Use linear16 for output too
150+
settingsConfiguration.Audio.Output.SampleRate = 24000;
151+
settingsConfiguration.Audio.Output.Container = "none";
152+
153+
settingsConfiguration.Agent.Greeting = "Hello! How can I help you today?";
217154
settingsConfiguration.Agent.Listen.Provider.Type = "deepgram";
218155
settingsConfiguration.Agent.Listen.Provider.Model = "nova-3";
219156
settingsConfiguration.Agent.Listen.Provider.Keyterms = new List<string> { "Deepgram" };
@@ -236,18 +173,42 @@ await agentClient.Subscribe(new EventHandler<ErrorResponse>((sender, e) =>
236173
return;
237174
}
238175

239-
// Microphone streaming
176+
// Microphone streaming with debugging
240177
Console.WriteLine("Starting microphone...");
241178
Microphone microphone = null;
242-
try
179+
int audioDataCounter = 0;
180+
181+
try
243182
{
244-
microphone = new Microphone(agentClient.SendBinary);
183+
// Create microphone with proper sample rate and debugging
184+
microphone = new Microphone(
185+
push_callback: (audioData, length) =>
186+
{
187+
audioDataCounter++;
188+
Console.WriteLine($"[MIC] Captured audio chunk #{audioDataCounter}: {length} bytes");
189+
190+
// Create array with actual length
191+
byte[] actualData = new byte[length];
192+
Array.Copy(audioData, actualData, length);
193+
194+
// Send to agent
195+
agentClient.SendBinary(actualData);
196+
},
197+
rate: 24000, // Match the agent's expected input rate (24kHz)
198+
chunkSize: 8192, // Standard chunk size
199+
channels: 1, // Mono
200+
device_index: PortAudio.DefaultInputDevice,
201+
format: SampleFormat.Int16
202+
);
203+
245204
microphone.Start();
246-
Console.WriteLine("Microphone started successfully. Waiting for audio input...");
205+
Console.WriteLine("Microphone started successfully. Speak into your microphone now!");
206+
Console.WriteLine("You should see '[MIC] Captured audio chunk' messages when speaking...");
247207
}
248208
catch (Exception ex)
249209
{
250210
Console.WriteLine($"Error starting microphone: {ex.Message}");
211+
Console.WriteLine($"Stack trace: {ex.StackTrace}");
251212
return;
252213
}
253214

@@ -271,6 +232,133 @@ await agentClient.Subscribe(new EventHandler<ErrorResponse>((sender, e) =>
271232
{
272233
Console.WriteLine($"Exception: {ex.Message}");
273234
}
235+
}
236+
237+
// Audio playback queue and position tracking
238+
private static Queue<byte[]> audioQueue = new Queue<byte[]>();
239+
private static byte[]? currentAudioBuffer = null;
240+
private static int audioPosition = 0;
241+
private static readonly object audioLock = new object();
242+
243+
/// <summary>
244+
/// Plays audio data through the system's default output device (speakers)
245+
/// </summary>
246+
/// <param name="audioData">PCM audio data to play</param>
247+
static void PlayAudioThroughSpeakers(byte[] audioData)
248+
{
249+
try
250+
{
251+
lock (audioLock)
252+
{
253+
// Add to queue for playback
254+
audioQueue.Enqueue(audioData);
255+
}
256+
257+
// Start playback stream if not already running
258+
StartAudioPlayback();
259+
}
260+
catch (Exception ex)
261+
{
262+
Console.WriteLine($"❌ Error queuing audio: {ex.Message}");
263+
}
264+
}
265+
266+
private static PortAudioSharp.Stream? _outputStream = null;
267+
268+
private static void StartAudioPlayback()
269+
{
270+
if (_outputStream != null)
271+
return; // Already playing
272+
273+
try
274+
{
275+
// Get default output device
276+
int outputDevice = PortAudio.DefaultOutputDevice;
277+
if (outputDevice == PortAudio.NoDevice)
278+
{
279+
Console.WriteLine("⚠️ No default output device found for audio playback");
280+
return;
281+
}
282+
283+
var deviceInfo = PortAudio.GetDeviceInfo(outputDevice);
284+
Console.WriteLine($"🔊 Playing through: {deviceInfo.name}");
285+
286+
// Set up output stream parameters
287+
var outputParams = new PortAudioSharp.StreamParameters
288+
{
289+
device = outputDevice,
290+
channelCount = 1, // mono
291+
sampleFormat = PortAudioSharp.SampleFormat.Int16,
292+
suggestedLatency = deviceInfo.defaultLowOutputLatency,
293+
hostApiSpecificStreamInfo = IntPtr.Zero
294+
};
295+
296+
// Create and start the output stream
297+
_outputStream = new PortAudioSharp.Stream(
298+
inParams: null,
299+
outParams: outputParams,
300+
sampleRate: 24000, // Match agent output (24kHz)
301+
framesPerBuffer: 512,
302+
streamFlags: PortAudioSharp.StreamFlags.ClipOff,
303+
callback: OutputCallback,
304+
userData: IntPtr.Zero
305+
);
306+
307+
_outputStream.Start();
308+
}
309+
catch (Exception ex)
310+
{
311+
Console.WriteLine($"❌ Error starting audio playback: {ex.Message}");
312+
_outputStream = null;
313+
}
314+
}
315+
316+
private static PortAudioSharp.StreamCallbackResult OutputCallback(nint input, nint output, uint frameCount, ref PortAudioSharp.StreamCallbackTimeInfo timeInfo, PortAudioSharp.StreamCallbackFlags statusFlags, nint userDataPtr)
317+
{
318+
lock (audioLock)
319+
{
320+
int bytesToWrite = (int)(frameCount * sizeof(Int16)); // 16-bit samples
321+
byte[] outputBuffer = new byte[bytesToWrite];
322+
323+
int bytesWritten = 0;
324+
while (bytesWritten < bytesToWrite)
325+
{
326+
// Get next buffer if current one is exhausted
327+
if (currentAudioBuffer == null || audioPosition >= currentAudioBuffer.Length)
328+
{
329+
if (audioQueue.Count > 0)
330+
{
331+
currentAudioBuffer = audioQueue.Dequeue();
332+
audioPosition = 0;
333+
Console.WriteLine($"🔊 Playing new audio buffer: {currentAudioBuffer.Length} bytes (Queue: {audioQueue.Count} remaining)");
334+
}
335+
else
336+
{
337+
// No more audio, fill with silence but KEEP stream running for next audio
338+
for (int i = bytesWritten; i < bytesToWrite; i++)
339+
outputBuffer[i] = 0;
340+
341+
Marshal.Copy(outputBuffer, 0, output, bytesToWrite);
342+
// DON'T stop the stream - keep it running for next conversation
343+
return PortAudioSharp.StreamCallbackResult.Continue;
344+
}
345+
}
346+
347+
// Copy data from current buffer
348+
int remainingInBuffer = currentAudioBuffer.Length - audioPosition;
349+
int remainingToWrite = bytesToWrite - bytesWritten;
350+
int bytesToCopy = Math.Min(remainingInBuffer, remainingToWrite);
351+
352+
Array.Copy(currentAudioBuffer, audioPosition, outputBuffer, bytesWritten, bytesToCopy);
353+
audioPosition += bytesToCopy;
354+
bytesWritten += bytesToCopy;
355+
}
356+
357+
// Copy to output
358+
Marshal.Copy(outputBuffer, 0, output, bytesToWrite);
359+
}
360+
361+
return PortAudioSharp.StreamCallbackResult.Continue;
274362
}
275363
}
276364
}

0 commit comments

Comments
 (0)