From 619e321fb2fd2f3d4ad4427316602fc9b15615d0 Mon Sep 17 00:00:00 2001
From: farouk <farouk.Abdallah@univ-grenoble-alpes.fr>
Date: Tue, 22 Jul 2025 13:35:11 +0200
Subject: [PATCH 1/2] feat: add microphone button and speech-to-text input
 using Whisper (Groq API key required)

---
 .env.example                       |  13 ----
 package.json                       |   1 +
 pnpm-lock.yaml                     |  59 ++++++++--------
 src/app/api/transcribe/route.ts    |  55 +++++++++++++++
 src/components/thread/index.tsx    |  40 ++++++++++-
 src/components/ui/voice-button.tsx |  71 +++++++++++++++++++
 src/hooks/use-voice-recording.tsx  | 108 +++++++++++++++++++++++++++++
 7 files changed, 301 insertions(+), 46 deletions(-)
 delete mode 100644 .env.example
 create mode 100644 src/app/api/transcribe/route.ts
 create mode 100644 src/components/ui/voice-button.tsx
 create mode 100644 src/hooks/use-voice-recording.tsx

diff --git a/.env.example b/.env.example
deleted file mode 100644
index a38138d3..00000000
--- a/.env.example
+++ /dev/null
@@ -1,13 +0,0 @@
-# LangGraph Configuration
-NEXT_PUBLIC_API_URL=http://localhost:2024
-NEXT_PUBLIC_ASSISTANT_ID=agent
-# Do NOT prefix this with "NEXT_PUBLIC_" as we do not want this exposed in the client.
-LANGSMITH_API_KEY=
-
-# Production LangGraph Configuration (quickstart) - Uncomment to use
-# NEXT_PUBLIC_ASSISTANT_ID="agent"
-# This should be the deployment URL of your LangGraph server
-# LANGGRAPH_API_URL="https://my-agent.default.us.langgraph.app"
-# This should be the URL of your website + "/api". This is how you connect to the API proxy
-# NEXT_PUBLIC_API_URL="https://my-website.com/api"
-# LANGSMITH_API_KEY="lsv2_..."
diff --git a/package.json b/package.json
index 64910dda..7b8490b5 100644
--- a/package.json
+++ b/package.json
@@ -35,6 +35,7 @@
     "esbuild": "^0.25.0",
     "esbuild-plugin-tailwindcss": "^2.0.1",
     "framer-motion": "^12.4.9",
+    "groq-sdk": "^0.27.0",
     "katex": "^0.16.21",
     "langgraph-nextjs-api-passthrough": "^0.0.4",
     "lodash": "^4.17.21",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 030fd063..7574c54c 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -56,6 +56,9 @@ importers:
       framer-motion:
         specifier: ^12.4.9
         version: 12.12.1(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
+      groq-sdk:
+        specifier: ^0.27.0
+        version: 0.27.0
       katex:
         specifier: ^0.16.21
         version: 0.16.22
@@ -2054,6 +2057,9 @@ packages:
   graphemer@1.4.0:
     resolution: {integrity: sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==}
 
+  groq-sdk@0.27.0:
+    resolution: {integrity: sha512-fiEou5jdIgXGolo6fz5N2G++0fGMRhC/GzAqKtSnagt9eOwFn7C8FYjVeqs7ah6/VN/yygEUvFVJa0XD6T+3mg==}
+
   has-bigints@1.1.0:
     resolution: {integrity: sha512-R3pbpkcIqv2Pm3dUwgjclDRVmWpTJW2DcMzcIhEXEx1oh/CEMObMm3KLmRJOdvhM7o4uQBnwr8pzRK2sJWIqfg==}
     engines: {node: '>= 0.4'}
@@ -4355,12 +4361,10 @@ snapshots:
     dependencies:
       '@types/node': 22.15.18
       form-data: 4.0.2
-    optional: true
 
   '@types/node@18.19.100':
     dependencies:
       undici-types: 5.26.5
-    optional: true
 
   '@types/node@22.15.18':
     dependencies:
@@ -4523,7 +4527,6 @@ snapshots:
   abort-controller@3.0.0:
     dependencies:
       event-target-shim: 5.0.1
-    optional: true
 
   acorn-jsx@5.3.2(acorn@8.14.1):
     dependencies:
@@ -4534,7 +4537,6 @@ snapshots:
   agentkeepalive@4.6.0:
     dependencies:
       humanize-ms: 1.2.1
-    optional: true
 
   ajv@6.12.6:
     dependencies:
@@ -4626,8 +4628,7 @@ snapshots:
 
   async-function@1.0.0: {}
 
-  asynckit@0.4.0:
-    optional: true
+  asynckit@0.4.0: {}
 
   autoprefixer@10.4.21(postcss@8.5.3):
     dependencies:
@@ -4752,7 +4753,6 @@ snapshots:
   combined-stream@1.0.8:
     dependencies:
       delayed-stream: 1.0.0
-    optional: true
 
   comma-separated-tokens@1.0.8: {}
 
@@ -4866,8 +4866,7 @@ snapshots:
       has-property-descriptors: 1.0.2
       object-keys: 1.1.1
 
-  delayed-stream@1.0.0:
-    optional: true
+  delayed-stream@1.0.0: {}
 
   dequal@2.0.3: {}
 
@@ -5249,8 +5248,7 @@ snapshots:
 
   esutils@2.0.3: {}
 
-  event-target-shim@5.0.1:
-    optional: true
+  event-target-shim@5.0.1: {}
 
   eventemitter3@4.0.7: {}
 
@@ -5316,8 +5314,7 @@ snapshots:
     dependencies:
       is-callable: 1.2.7
 
-  form-data-encoder@1.7.2:
-    optional: true
+  form-data-encoder@1.7.2: {}
 
   form-data@4.0.2:
     dependencies:
@@ -5325,7 +5322,6 @@ snapshots:
       combined-stream: 1.0.8
       es-set-tostringtag: 2.1.0
       mime-types: 2.1.35
-    optional: true
 
   format@0.2.2: {}
 
@@ -5333,7 +5329,6 @@ snapshots:
     dependencies:
       node-domexception: 1.0.0
       web-streams-polyfill: 4.0.0-beta.3
-    optional: true
 
   fraction.js@4.3.7: {}
 
@@ -5419,6 +5414,18 @@ snapshots:
 
   graphemer@1.4.0: {}
 
+  groq-sdk@0.27.0:
+    dependencies:
+      '@types/node': 18.19.100
+      '@types/node-fetch': 2.6.12
+      abort-controller: 3.0.0
+      agentkeepalive: 4.6.0
+      form-data-encoder: 1.7.2
+      formdata-node: 4.4.1
+      node-fetch: 2.7.0
+    transitivePeerDependencies:
+      - encoding
+
   has-bigints@1.1.0: {}
 
   has-flag@4.0.0: {}
@@ -5540,7 +5547,6 @@ snapshots:
   humanize-ms@1.2.1:
     dependencies:
       ms: 2.1.3
-    optional: true
 
   icss-utils@5.1.0(postcss@8.5.3):
     dependencies:
@@ -6233,13 +6239,11 @@ snapshots:
       braces: 3.0.3
       picomatch: 2.3.1
 
-  mime-db@1.52.0:
-    optional: true
+  mime-db@1.52.0: {}
 
   mime-types@2.1.35:
     dependencies:
       mime-db: 1.52.0
-    optional: true
 
   minimatch@3.1.2:
     dependencies:
@@ -6308,13 +6312,11 @@ snapshots:
       - '@babel/core'
       - babel-plugin-macros
 
-  node-domexception@1.0.0:
-    optional: true
+  node-domexception@1.0.0: {}
 
   node-fetch@2.7.0:
     dependencies:
       whatwg-url: 5.0.0
-    optional: true
 
   node-releases@2.0.19: {}
 
@@ -7012,8 +7014,7 @@ snapshots:
     dependencies:
       is-number: 7.0.0
 
-  tr46@0.0.3:
-    optional: true
+  tr46@0.0.3: {}
 
   trim-lines@3.0.1: {}
 
@@ -7088,8 +7089,7 @@ snapshots:
       has-symbols: 1.1.0
       which-boxed-primitive: 1.1.1
 
-  undici-types@5.26.5:
-    optional: true
+  undici-types@5.26.5: {}
 
   undici-types@6.21.0: {}
 
@@ -7233,17 +7233,14 @@ snapshots:
 
   web-namespaces@2.0.1: {}
 
-  web-streams-polyfill@4.0.0-beta.3:
-    optional: true
+  web-streams-polyfill@4.0.0-beta.3: {}
 
-  webidl-conversions@3.0.1:
-    optional: true
+  webidl-conversions@3.0.1: {}
 
   whatwg-url@5.0.0:
     dependencies:
       tr46: 0.0.3
       webidl-conversions: 3.0.1
-    optional: true
 
   which-boxed-primitive@1.1.1:
     dependencies:
diff --git a/src/app/api/transcribe/route.ts b/src/app/api/transcribe/route.ts
new file mode 100644
index 00000000..44e62e16
--- /dev/null
+++ b/src/app/api/transcribe/route.ts
@@ -0,0 +1,55 @@
+// src/app/api/transcribe/route.ts
+import { NextRequest, NextResponse } from 'next/server';
+import Groq from 'groq-sdk';
+
+const groq = new Groq({
+  apiKey: process.env.GROQ_API_KEY,
+});
+
+export async function POST(request: NextRequest) {
+  try {
+    const formData = await request.formData();
+    const audioFile = formData.get('audio') as File;
+    
+    if (!audioFile) {
+      return NextResponse.json(
+        { error: 'No audio file provided' },
+        { status: 400 }
+      );
+    }
+
+    console.log('Audio file received:', {
+      name: audioFile.name,
+      size: audioFile.size,
+      type: audioFile.type
+    });
+
+    // Create a proper File object for Groq API
+    const transcription = await groq.audio.transcriptions.create({
+      file: audioFile, // Pass the File directly, not a Blob
+      model: "whisper-large-v3-turbo",
+      response_format: "text",
+      language: "fr",
+    });
+
+    console.log('Transcription result:', transcription);
+
+    return NextResponse.json({ 
+      transcription: typeof transcription === 'string' ? transcription : transcription.text 
+    });
+    
+  } catch (error) {
+    console.error('Transcription error:', error);
+    
+    // More detailed error logging
+    if (error instanceof Error) {
+      console.error('Error message:', error.message);
+      console.error('Error stack:', error.stack);
+    }
+    
+    return NextResponse.json(
+      { error: 'Failed to transcribe audio', details: error instanceof Error ? error.message : 'Unknown error' },
+      { status: 500 }
+    );
+  }
+}
\ No newline at end of file
diff --git a/src/components/thread/index.tsx b/src/components/thread/index.tsx
index d52a1594..76c1ab22 100644
--- a/src/components/thread/index.tsx
+++ b/src/components/thread/index.tsx
@@ -1,3 +1,4 @@
+// src/components/thread/index.tsx
 import { v4 as uuidv4 } from "uuid";
 import { ReactNode, useEffect, useRef } from "react";
 import { motion } from "framer-motion";
@@ -5,6 +6,8 @@ import { cn } from "@/lib/utils";
 import { useStreamContext } from "@/providers/Stream";
 import { useState, FormEvent } from "react";
 import { Button } from "../ui/button";
+import { VoiceButton } from "../ui/voice-button";
+import { useVoiceRecording } from "@/hooks/use-voice-recording";
 import { Checkpoint, Message } from "@langchain/langgraph-sdk";
 import { AssistantMessage, AssistantMessageLoading } from "./messages/ai";
 import { HumanMessage } from "./messages/human";
@@ -145,6 +148,28 @@ export function Thread() {
 
   const lastError = useRef<string | undefined>(undefined);
 
+  // Voice recording functionality
+  const voiceRecording = useVoiceRecording({
+    onTranscriptionComplete: (text: string) => {
+      // Append transcription to existing input, or replace if input is empty
+      setInput(prev => {
+        const trimmedPrev = prev.trim();
+        if (trimmedPrev.length === 0) {
+          return text;
+        }
+        // Add a space between existing text and new transcription
+        return trimmedPrev + " " + text;
+      });
+    },
+    onError: (error: string) => {
+      toast.error("Voice recording error", {
+        description: error,
+        richColors: true,
+        closeButton: true,
+      });
+    },
+  });
+
   const setThreadId = (id: string | null) => {
     _setThreadId(id);
 
@@ -513,6 +538,15 @@ export function Thread() {
                           accept="image/jpeg,image/png,image/gif,image/webp,application/pdf"
                           className="hidden"
                         />
+                        
+                        {/* Voice Recording Button */}
+                        <VoiceButton
+                          isRecording={voiceRecording.isRecording}
+                          isTranscribing={voiceRecording.isTranscribing}
+                          onToggleRecording={voiceRecording.toggleRecording}
+                          disabled={isLoading}
+                        />
+
                         {stream.isLoading ? (
                           <Button
                             key="stop"
@@ -528,7 +562,9 @@ export function Thread() {
                             className="ml-auto shadow-md transition-all"
                             disabled={
                               isLoading ||
-                              (!input.trim() && contentBlocks.length === 0)
+                              (!input.trim() && contentBlocks.length === 0) ||
+                              voiceRecording.isRecording ||
+                              voiceRecording.isTranscribing
                             }
                           >
                             Send
@@ -559,4 +595,4 @@ export function Thread() {
       </div>
     </div>
   );
-}
+}
\ No newline at end of file
diff --git a/src/components/ui/voice-button.tsx b/src/components/ui/voice-button.tsx
new file mode 100644
index 00000000..619c519d
--- /dev/null
+++ b/src/components/ui/voice-button.tsx
@@ -0,0 +1,71 @@
+// src/components/ui/voice-button.tsx
+import React from 'react';
+import { Button, ButtonProps } from './button';
+import { Mic, MicOff, Loader2 } from 'lucide-react';
+import { cn } from '@/lib/utils';
+import {
+  Tooltip,
+  TooltipContent,
+  TooltipProvider,
+  TooltipTrigger,
+} from './tooltip';
+
+export interface VoiceButtonProps extends Omit<ButtonProps, 'onClick'> {
+  isRecording: boolean;
+  isTranscribing: boolean;
+  onToggleRecording: () => void;
+  disabled?: boolean;
+}
+
+export function VoiceButton({
+  isRecording,
+  isTranscribing,
+  onToggleRecording,
+  disabled = false,
+  className,
+  ...props
+}: VoiceButtonProps) {
+  const getTooltipText = () => {
+    if (isTranscribing) return 'Transcribing...';
+    if (isRecording) return 'Stop recording';
+    return 'Start voice recording';
+  };
+
+  const getIcon = () => {
+    if (isTranscribing) {
+      return <Loader2 className="h-4 w-4 animate-spin" />;
+    }
+    if (isRecording) {
+      return <MicOff className="h-4 w-4" />;
+    }
+    return <Mic className="h-4 w-4" />;
+  };
+
+  return (
+    <TooltipProvider>
+      <Tooltip>
+        <TooltipTrigger asChild>
+          <Button
+            type="button"
+            variant={isRecording ? "destructive" : "ghost"}
+            size="icon"
+            onClick={onToggleRecording}
+            disabled={disabled || isTranscribing}
+            className={cn(
+              "transition-all duration-200",
+              isRecording && "animate-pulse bg-red-500 hover:bg-red-600",
+              isTranscribing && "cursor-not-allowed",
+              className
+            )}
+            {...props}
+          >
+            {getIcon()}
+          </Button>
+        </TooltipTrigger>
+        <TooltipContent side="top">
+          <p>{getTooltipText()}</p>
+        </TooltipContent>
+      </Tooltip>
+    </TooltipProvider>
+  );
+}
\ No newline at end of file
diff --git a/src/hooks/use-voice-recording.tsx b/src/hooks/use-voice-recording.tsx
new file mode 100644
index 00000000..1a10ea9c
--- /dev/null
+++ b/src/hooks/use-voice-recording.tsx
@@ -0,0 +1,108 @@
+// src/hooks/use-voice-recording.tsx
+import { useState, useRef, useCallback } from 'react';
+
+export interface UseVoiceRecordingProps {
+  onTranscriptionComplete: (text: string) => void;
+  onError?: (error: string) => void;
+}
+
+export function useVoiceRecording({ 
+  onTranscriptionComplete, 
+  onError 
+}: UseVoiceRecordingProps) {
+  const [isRecording, setIsRecording] = useState(false);
+  const [isTranscribing, setIsTranscribing] = useState(false);
+  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
+  const audioChunksRef = useRef<Blob[]>([]);
+
+  const startRecording = useCallback(async () => {
+    try {
+      const stream = await navigator.mediaDevices.getUserMedia({ 
+        audio: {
+          echoCancellation: true,
+          noiseSuppression: true,
+          autoGainControl: true,
+        } 
+      });
+      
+      const mediaRecorder = new MediaRecorder(stream, {
+        mimeType: 'audio/webm;codecs=opus'
+      });
+      
+      mediaRecorderRef.current = mediaRecorder;
+      audioChunksRef.current = [];
+
+      mediaRecorder.ondataavailable = (event) => {
+        if (event.data.size > 0) {
+          audioChunksRef.current.push(event.data);
+        }
+      };
+
+      mediaRecorder.onstop = async () => {
+        const audioBlob = new Blob(audioChunksRef.current, { 
+          type: 'audio/webm;codecs=opus' 
+        });
+        
+        // Stop all tracks to release microphone
+        stream.getTracks().forEach(track => track.stop());
+        
+        // Send to transcription API
+        await transcribeAudio(audioBlob);
+      };
+
+      mediaRecorder.start(1000); // Collect data every second
+      setIsRecording(true);
+    } catch (error) {
+      console.error('Error starting recording:', error);
+      onError?.('Failed to access microphone. Please check permissions.');
+    }
+  }, [onError]);
+
+  const stopRecording = useCallback(() => {
+    if (mediaRecorderRef.current && isRecording) {
+      mediaRecorderRef.current.stop();
+      setIsRecording(false);
+      setIsTranscribing(true);
+    }
+  }, [isRecording]);
+
+  const transcribeAudio = useCallback(async (audioBlob: Blob) => {
+    try {
+      const formData = new FormData();
+      formData.append('audio', audioBlob, 'recording.webm');
+
+      const response = await fetch('/api/transcribe', {
+        method: 'POST',
+        body: formData,
+      });
+
+      if (!response.ok) {
+        throw new Error(`Transcription failed: ${response.statusText}`);
+      }
+
+      const { transcription } = await response.json();
+      onTranscriptionComplete(transcription.trim());
+    } catch (error) {
+      console.error('Transcription error:', error);
+      onError?.('Failed to transcribe audio. Please try again.');
+    } finally {
+      setIsTranscribing(false);
+    }
+  }, [onTranscriptionComplete, onError]);
+
+  const toggleRecording = useCallback(() => {
+    if (isRecording) {
+      stopRecording();
+    } else {
+      startRecording();
+    }
+  }, [isRecording, startRecording, stopRecording]);
+
+  return {
+    isRecording,
+    isTranscribing,
+    startRecording,
+    stopRecording,
+    toggleRecording,
+  };
+}
\ No newline at end of file

From c42461effb56dde6f01375c5c1e21e6d5da141cf Mon Sep 17 00:00:00 2001
From: farouk <farouk.Abdallah@univ-grenoble-alpes.fr>
Date: Tue, 22 Jul 2025 13:41:18 +0200
Subject: [PATCH 2/2] add .env.example with groq api key

---
 .env.example | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 .env.example

diff --git a/.env.example b/.env.example
new file mode 100644
index 00000000..75061cd9
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,16 @@
+#For the speech to text model
+GROQ_API_KEY=
+
+# LangGraph Configuration
+NEXT_PUBLIC_API_URL=http://localhost:2024
+NEXT_PUBLIC_ASSISTANT_ID=agent
+# Do NOT prefix this with "NEXT_PUBLIC_" as we do not want this exposed in the client.
+LANGSMITH_API_KEY=
+
+# Production LangGraph Configuration (quickstart) - Uncomment to use
+# NEXT_PUBLIC_ASSISTANT_ID="agent"
+# This should be the deployment URL of your LangGraph server
+# LANGGRAPH_API_URL="https://my-agent.default.us.langgraph.app"
+# This should be the URL of your website + "/api". This is how you connect to the API proxy
+# NEXT_PUBLIC_API_URL="https://my-website.com/api"
+# LANGSMITH_API_KEY="lsv2_..."
\ No newline at end of file