77using Deepgram . Models . Authenticate . v1 ;
88using Deepgram . Models . Agent . v2 . WebSocket ;
99using System . Collections . Generic ;
10+ using System . Runtime . InteropServices ;
1011using PortAudioSharp ;
1112
1213namespace SampleApp
@@ -55,107 +56,38 @@ static async Task Main(string[] args)
5556 DeepgramWsClientOptions options = new DeepgramWsClientOptions ( null , null , true ) ;
5657 var agentClient = ClientFactory . CreateAgentWebSocketClient ( apiKey : "" , options : options ) ;
5758
58- // current time
59- var lastAudioTime = DateTime . Now ;
60- var audioFileCount = 0 ;
59+ // Initialize conversation
60+ Console . WriteLine ( "🎤 Ready for conversation! Speak into your microphone..." ) ;
6161
6262 // Subscribe to the EventResponseReceived event
6363 await agentClient . Subscribe ( new EventHandler < OpenResponse > ( ( sender , e ) =>
6464 {
6565 Console . WriteLine ( $ "----> { e . Type } received") ;
6666 } ) ) ;
67- await agentClient . Subscribe ( new EventHandler < AudioResponse > ( ( sender , e ) =>
67+ await agentClient . Subscribe ( new EventHandler < AudioResponse > ( ( sender , e ) =>
6868 {
6969 Console . WriteLine ( $ "----> { e . Type } received") ;
7070
71- // if the last audio response is more than 5 seconds ago, add a wav header
72- if ( DateTime . Now . Subtract ( lastAudioTime ) . TotalSeconds > 7 )
71+ if ( e . Stream != null && e . Stream . Length > 0 )
7372 {
74- audioFileCount = audioFileCount + 1 ; // increment the audio file count
73+ var audioData = e . Stream . ToArray ( ) ;
74+ Console . WriteLine ( $ "🔊 Queueing { audioData . Length } bytes of agent speech for playback") ;
7575
76- // delete the file if it exists
77- if ( File . Exists ( $ "output_{ audioFileCount } .wav") )
78- {
79- File . Delete ( $ "output_{ audioFileCount } .wav") ;
80- }
81-
82- using ( BinaryWriter writer = new BinaryWriter ( File . Open ( $ "output_{ audioFileCount } .wav", FileMode . Append ) ) )
83- {
84- Console . WriteLine ( "Adding WAV header to output.wav" ) ;
85- byte [ ] wavHeader = new byte [ 44 ] ;
86- int sampleRate = 48000 ;
87- short bitsPerSample = 16 ;
88- short channels = 1 ;
89- int byteRate = sampleRate * channels * ( bitsPerSample / 8 ) ;
90- short blockAlign = ( short ) ( channels * ( bitsPerSample / 8 ) ) ;
91-
92- wavHeader [ 0 ] = 0x52 ; // R
93- wavHeader [ 1 ] = 0x49 ; // I
94- wavHeader [ 2 ] = 0x46 ; // F
95- wavHeader [ 3 ] = 0x46 ; // F
96- wavHeader [ 4 ] = 0x00 ; // Placeholder for file size (will be updated later)
97- wavHeader [ 5 ] = 0x00 ; // Placeholder for file size (will be updated later)
98- wavHeader [ 6 ] = 0x00 ; // Placeholder for file size (will be updated later)
99- wavHeader [ 7 ] = 0x00 ; // Placeholder for file size (will be updated later)
100- wavHeader [ 8 ] = 0x57 ; // W
101- wavHeader [ 9 ] = 0x41 ; // A
102- wavHeader [ 10 ] = 0x56 ; // V
103- wavHeader [ 11 ] = 0x45 ; // E
104- wavHeader [ 12 ] = 0x66 ; // f
105- wavHeader [ 13 ] = 0x6D ; // m
106- wavHeader [ 14 ] = 0x74 ; // t
107- wavHeader [ 15 ] = 0x20 ; // Space
108- wavHeader [ 16 ] = 0x10 ; // Subchunk1Size (16 for PCM)
109- wavHeader [ 17 ] = 0x00 ; // Subchunk1Size
110- wavHeader [ 18 ] = 0x00 ; // Subchunk1Size
111- wavHeader [ 19 ] = 0x00 ; // Subchunk1Size
112- wavHeader [ 20 ] = 0x01 ; // AudioFormat (1 for PCM)
113- wavHeader [ 21 ] = 0x00 ; // AudioFormat
114- wavHeader [ 22 ] = ( byte ) channels ; // NumChannels
115- wavHeader [ 23 ] = 0x00 ; // NumChannels
116- wavHeader [ 24 ] = ( byte ) ( sampleRate & 0xFF ) ; // SampleRate
117- wavHeader [ 25 ] = ( byte ) ( ( sampleRate >> 8 ) & 0xFF ) ; // SampleRate
118- wavHeader [ 26 ] = ( byte ) ( ( sampleRate >> 16 ) & 0xFF ) ; // SampleRate
119- wavHeader [ 27 ] = ( byte ) ( ( sampleRate >> 24 ) & 0xFF ) ; // SampleRate
120- wavHeader [ 28 ] = ( byte ) ( byteRate & 0xFF ) ; // ByteRate
121- wavHeader [ 29 ] = ( byte ) ( ( byteRate >> 8 ) & 0xFF ) ; // ByteRate
122- wavHeader [ 30 ] = ( byte ) ( ( byteRate >> 16 ) & 0xFF ) ; // ByteRate
123- wavHeader [ 31 ] = ( byte ) ( ( byteRate >> 24 ) & 0xFF ) ; // ByteRate
124- wavHeader [ 32 ] = ( byte ) blockAlign ; // BlockAlign
125- wavHeader [ 33 ] = 0x00 ; // BlockAlign
126- wavHeader [ 34 ] = ( byte ) bitsPerSample ; // BitsPerSample
127- wavHeader [ 35 ] = 0x00 ; // BitsPerSample
128- wavHeader [ 36 ] = 0x64 ; // d
129- wavHeader [ 37 ] = 0x61 ; // a
130- wavHeader [ 38 ] = 0x74 ; // t
131- wavHeader [ 39 ] = 0x61 ; // a
132- wavHeader [ 40 ] = 0x00 ; // Placeholder for data chunk size (will be updated later)
133- wavHeader [ 41 ] = 0x00 ; // Placeholder for data chunk size (will be updated later)
134- wavHeader [ 42 ] = 0x00 ; // Placeholder for data chunk size (will be updated later)
135- wavHeader [ 43 ] = 0x00 ; // Placeholder for data chunk size (will be updated later)
136-
137- writer . Write ( wavHeader ) ;
138- }
76+ // Play audio through speakers
77+ PlayAudioThroughSpeakers ( audioData ) ;
13978 }
140-
141- if ( e . Stream != null )
79+ else
14280 {
143- using ( BinaryWriter writer = new BinaryWriter ( File . Open ( $ "output_{ audioFileCount } .wav", FileMode . Append ) ) )
144- {
145- writer . Write ( e . Stream . ToArray ( ) ) ;
146- }
81+ Console . WriteLine ( $ "⚠️ Received empty audio stream") ;
14782 }
148-
149- // record the last audio time
150- lastAudioTime = DateTime . Now ;
15183 } ) ) ;
152- await agentClient . Subscribe ( new EventHandler < AgentAudioDoneResponse > ( ( sender , e ) =>
84+ await agentClient . Subscribe ( new EventHandler < AgentAudioDoneResponse > ( ( sender , e ) =>
15385 {
154- Console . WriteLine ( $ "----> { e } received") ;
86+ Console . WriteLine ( $ "----> { e } received - Agent finished speaking 🎤 ") ;
15587 } ) ) ;
15688 await agentClient . Subscribe ( new EventHandler < AgentStartedSpeakingResponse > ( ( sender , e ) =>
15789 {
158- Console . WriteLine ( $ "----> { e } received") ;
90+ Console . WriteLine ( $ "----> { e } received - Agent is speaking 🗣️ ") ;
15991 } ) ) ;
16092 await agentClient . Subscribe ( new EventHandler < AgentThinkingResponse > ( ( sender , e ) =>
16193 {
@@ -171,7 +103,7 @@ await agentClient.Subscribe(new EventHandler<FunctionCallRequestResponse>((sende
171103 } ) ) ;
172104 await agentClient . Subscribe ( new EventHandler < UserStartedSpeakingResponse > ( ( sender , e ) =>
173105 {
174- Console . WriteLine ( $ "----> { e } received") ;
106+ Console . WriteLine ( $ "----> { e } received - User is speaking 👤 ") ;
175107 } ) ) ;
176108 await agentClient . Subscribe ( new EventHandler < WelcomeResponse > ( ( sender , e ) =>
177109 {
@@ -210,10 +142,15 @@ await agentClient.Subscribe(new EventHandler<ErrorResponse>((sender, e) =>
210142 var settingsConfiguration = new SettingsSchema ( ) ;
211143 settingsConfiguration . Agent . Think . Provider . Type = "open_ai" ;
212144 settingsConfiguration . Agent . Think . Provider . Model = "gpt-4o-mini" ;
213- settingsConfiguration . Audio . Output . SampleRate = 16000 ;
214- settingsConfiguration . Audio . Output . Container = "wav" ;
215- settingsConfiguration . Audio . Input . SampleRate = 44100 ;
216- settingsConfiguration . Agent . Greeting = "Hello, how can I help you today?" ;
145+
146+ // Configure audio settings - keep your input format, fix output
147+ settingsConfiguration . Audio . Input . Encoding = "linear16" ;
148+ settingsConfiguration . Audio . Input . SampleRate = 24000 ;
149+ settingsConfiguration . Audio . Output . Encoding = "linear16" ; // Use linear16 for output too
150+ settingsConfiguration . Audio . Output . SampleRate = 24000 ;
151+ settingsConfiguration . Audio . Output . Container = "none" ;
152+
153+ settingsConfiguration . Agent . Greeting = "Hello! How can I help you today?" ;
217154 settingsConfiguration . Agent . Listen . Provider . Type = "deepgram" ;
218155 settingsConfiguration . Agent . Listen . Provider . Model = "nova-3" ;
219156 settingsConfiguration . Agent . Listen . Provider . Keyterms = new List < string > { "Deepgram" } ;
@@ -236,18 +173,42 @@ await agentClient.Subscribe(new EventHandler<ErrorResponse>((sender, e) =>
236173 return ;
237174 }
238175
239- // Microphone streaming
176+ // Microphone streaming with debugging
240177 Console . WriteLine ( "Starting microphone..." ) ;
241178 Microphone microphone = null ;
242- try
179+ int audioDataCounter = 0 ;
180+
181+ try
243182 {
244- microphone = new Microphone ( agentClient . SendBinary ) ;
183+ // Create microphone with proper sample rate and debugging
184+ microphone = new Microphone (
185+ push_callback : ( audioData , length ) =>
186+ {
187+ audioDataCounter ++ ;
188+ Console . WriteLine ( $ "[MIC] Captured audio chunk #{ audioDataCounter } : { length } bytes") ;
189+
190+ // Create array with actual length
191+ byte [ ] actualData = new byte [ length ] ;
192+ Array . Copy ( audioData , actualData , length ) ;
193+
194+ // Send to agent
195+ agentClient . SendBinary ( actualData ) ;
196+ } ,
197+ rate : 24000 , // Match the agent's expected input rate (24kHz)
198+ chunkSize : 8192 , // Standard chunk size
199+ channels : 1 , // Mono
200+ device_index : PortAudio . DefaultInputDevice ,
201+ format : SampleFormat . Int16
202+ ) ;
203+
245204 microphone . Start ( ) ;
246- Console . WriteLine ( "Microphone started successfully. Waiting for audio input..." ) ;
205+ Console . WriteLine ( "Microphone started successfully. Speak into your microphone now!" ) ;
206+ Console . WriteLine ( "You should see '[MIC] Captured audio chunk' messages when speaking..." ) ;
247207 }
248208 catch ( Exception ex )
249209 {
250210 Console . WriteLine ( $ "Error starting microphone: { ex . Message } ") ;
211+ Console . WriteLine ( $ "Stack trace: { ex . StackTrace } ") ;
251212 return ;
252213 }
253214
@@ -271,6 +232,133 @@ await agentClient.Subscribe(new EventHandler<ErrorResponse>((sender, e) =>
271232 {
272233 Console . WriteLine ( $ "Exception: { ex . Message } ") ;
273234 }
235+ }
236+
237+ // Audio playback queue and position tracking
238+ private static Queue < byte [ ] > audioQueue = new Queue < byte [ ] > ( ) ;
239+ private static byte [ ] ? currentAudioBuffer = null ;
240+ private static int audioPosition = 0 ;
241+ private static readonly object audioLock = new object ( ) ;
242+
243+ /// <summary>
244+ /// Plays audio data through the system's default output device (speakers)
245+ /// </summary>
246+ /// <param name="audioData">PCM audio data to play</param>
247+ static void PlayAudioThroughSpeakers ( byte [ ] audioData )
248+ {
249+ try
250+ {
251+ lock ( audioLock )
252+ {
253+ // Add to queue for playback
254+ audioQueue . Enqueue ( audioData ) ;
255+ }
256+
257+ // Start playback stream if not already running
258+ StartAudioPlayback ( ) ;
259+ }
260+ catch ( Exception ex )
261+ {
262+ Console . WriteLine ( $ "❌ Error queuing audio: { ex . Message } ") ;
263+ }
264+ }
265+
266+ private static PortAudioSharp . Stream ? _outputStream = null ;
267+
268+ private static void StartAudioPlayback ( )
269+ {
270+ if ( _outputStream != null )
271+ return ; // Already playing
272+
273+ try
274+ {
275+ // Get default output device
276+ int outputDevice = PortAudio . DefaultOutputDevice ;
277+ if ( outputDevice == PortAudio . NoDevice )
278+ {
279+ Console . WriteLine ( "⚠️ No default output device found for audio playback" ) ;
280+ return ;
281+ }
282+
283+ var deviceInfo = PortAudio . GetDeviceInfo ( outputDevice ) ;
284+ Console . WriteLine ( $ "🔊 Playing through: { deviceInfo . name } ") ;
285+
286+ // Set up output stream parameters
287+ var outputParams = new PortAudioSharp . StreamParameters
288+ {
289+ device = outputDevice ,
290+ channelCount = 1 , // mono
291+ sampleFormat = PortAudioSharp . SampleFormat . Int16 ,
292+ suggestedLatency = deviceInfo . defaultLowOutputLatency ,
293+ hostApiSpecificStreamInfo = IntPtr . Zero
294+ } ;
295+
296+ // Create and start the output stream
297+ _outputStream = new PortAudioSharp . Stream (
298+ inParams : null ,
299+ outParams : outputParams ,
300+ sampleRate : 24000 , // Match agent output (24kHz)
301+ framesPerBuffer : 512 ,
302+ streamFlags : PortAudioSharp . StreamFlags . ClipOff ,
303+ callback : OutputCallback ,
304+ userData : IntPtr . Zero
305+ ) ;
306+
307+ _outputStream . Start ( ) ;
308+ }
309+ catch ( Exception ex )
310+ {
311+ Console . WriteLine ( $ "❌ Error starting audio playback: { ex . Message } ") ;
312+ _outputStream = null ;
313+ }
314+ }
315+
316+ private static PortAudioSharp . StreamCallbackResult OutputCallback ( nint input , nint output , uint frameCount , ref PortAudioSharp . StreamCallbackTimeInfo timeInfo , PortAudioSharp . StreamCallbackFlags statusFlags , nint userDataPtr )
317+ {
318+ lock ( audioLock )
319+ {
320+ int bytesToWrite = ( int ) ( frameCount * sizeof ( Int16 ) ) ; // 16-bit samples
321+ byte [ ] outputBuffer = new byte [ bytesToWrite ] ;
322+
323+ int bytesWritten = 0 ;
324+ while ( bytesWritten < bytesToWrite )
325+ {
326+ // Get next buffer if current one is exhausted
327+ if ( currentAudioBuffer == null || audioPosition >= currentAudioBuffer . Length )
328+ {
329+ if ( audioQueue . Count > 0 )
330+ {
331+ currentAudioBuffer = audioQueue . Dequeue ( ) ;
332+ audioPosition = 0 ;
333+ Console . WriteLine ( $ "🔊 Playing new audio buffer: { currentAudioBuffer . Length } bytes (Queue: { audioQueue . Count } remaining)") ;
334+ }
335+ else
336+ {
337+ // No more audio, fill with silence but KEEP stream running for next audio
338+ for ( int i = bytesWritten ; i < bytesToWrite ; i ++ )
339+ outputBuffer [ i ] = 0 ;
340+
341+ Marshal . Copy ( outputBuffer , 0 , output , bytesToWrite ) ;
342+ // DON'T stop the stream - keep it running for next conversation
343+ return PortAudioSharp . StreamCallbackResult . Continue ;
344+ }
345+ }
346+
347+ // Copy data from current buffer
348+ int remainingInBuffer = currentAudioBuffer . Length - audioPosition ;
349+ int remainingToWrite = bytesToWrite - bytesWritten ;
350+ int bytesToCopy = Math . Min ( remainingInBuffer , remainingToWrite ) ;
351+
352+ Array . Copy ( currentAudioBuffer , audioPosition , outputBuffer , bytesWritten , bytesToCopy ) ;
353+ audioPosition += bytesToCopy ;
354+ bytesWritten += bytesToCopy ;
355+ }
356+
357+ // Copy to output
358+ Marshal . Copy ( outputBuffer , 0 , output , bytesToWrite ) ;
359+ }
360+
361+ return PortAudioSharp . StreamCallbackResult . Continue ;
274362 }
275363 }
276364}
0 commit comments