From 619e321fb2fd2f3d4ad4427316602fc9b15615d0 Mon Sep 17 00:00:00 2001 From: farouk Date: Tue, 22 Jul 2025 13:35:11 +0200 Subject: [PATCH 1/2] feat: add microphone button and speech-to-text input using Whisper (Groq API key required) --- .env.example | 13 ---- package.json | 1 + pnpm-lock.yaml | 59 ++++++++-------- src/app/api/transcribe/route.ts | 55 +++++++++++++++ src/components/thread/index.tsx | 40 ++++++++++- src/components/ui/voice-button.tsx | 71 +++++++++++++++++++ src/hooks/use-voice-recording.tsx | 108 +++++++++++++++++++++++++++++ 7 files changed, 301 insertions(+), 46 deletions(-) delete mode 100644 .env.example create mode 100644 src/app/api/transcribe/route.ts create mode 100644 src/components/ui/voice-button.tsx create mode 100644 src/hooks/use-voice-recording.tsx diff --git a/.env.example b/.env.example deleted file mode 100644 index a38138d3..00000000 --- a/.env.example +++ /dev/null @@ -1,13 +0,0 @@ -# LangGraph Configuration -NEXT_PUBLIC_API_URL=http://localhost:2024 -NEXT_PUBLIC_ASSISTANT_ID=agent -# Do NOT prefix this with "NEXT_PUBLIC_" as we do not want this exposed in the client. -LANGSMITH_API_KEY= - -# Production LangGraph Configuration (quickstart) - Uncomment to use -# NEXT_PUBLIC_ASSISTANT_ID="agent" -# This should be the deployment URL of your LangGraph server -# LANGGRAPH_API_URL="https://my-agent.default.us.langgraph.app" -# This should be the URL of your website + "/api". This is how you connect to the API proxy -# NEXT_PUBLIC_API_URL="https://my-website.com/api" -# LANGSMITH_API_KEY="lsv2_..." diff --git a/package.json b/package.json index 64910dda..7b8490b5 100644 --- a/package.json +++ b/package.json @@ -35,6 +35,7 @@ "esbuild": "^0.25.0", "esbuild-plugin-tailwindcss": "^2.0.1", "framer-motion": "^12.4.9", + "groq-sdk": "^0.27.0", "katex": "^0.16.21", "langgraph-nextjs-api-passthrough": "^0.0.4", "lodash": "^4.17.21", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 030fd063..7574c54c 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -56,6 +56,9 @@ importers: framer-motion: specifier: ^12.4.9 version: 12.12.1(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + groq-sdk: + specifier: ^0.27.0 + version: 0.27.0 katex: specifier: ^0.16.21 version: 0.16.22 @@ -2054,6 +2057,9 @@ packages: graphemer@1.4.0: resolution: {integrity: sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==} + groq-sdk@0.27.0: + resolution: {integrity: sha512-fiEou5jdIgXGolo6fz5N2G++0fGMRhC/GzAqKtSnagt9eOwFn7C8FYjVeqs7ah6/VN/yygEUvFVJa0XD6T+3mg==} + has-bigints@1.1.0: resolution: {integrity: sha512-R3pbpkcIqv2Pm3dUwgjclDRVmWpTJW2DcMzcIhEXEx1oh/CEMObMm3KLmRJOdvhM7o4uQBnwr8pzRK2sJWIqfg==} engines: {node: '>= 0.4'} @@ -4355,12 +4361,10 @@ snapshots: dependencies: '@types/node': 22.15.18 form-data: 4.0.2 - optional: true '@types/node@18.19.100': dependencies: undici-types: 5.26.5 - optional: true '@types/node@22.15.18': dependencies: @@ -4523,7 +4527,6 @@ snapshots: abort-controller@3.0.0: dependencies: event-target-shim: 5.0.1 - optional: true acorn-jsx@5.3.2(acorn@8.14.1): dependencies: @@ -4534,7 +4537,6 @@ snapshots: agentkeepalive@4.6.0: dependencies: humanize-ms: 1.2.1 - optional: true ajv@6.12.6: dependencies: @@ -4626,8 +4628,7 @@ snapshots: async-function@1.0.0: {} - asynckit@0.4.0: - optional: true + asynckit@0.4.0: {} autoprefixer@10.4.21(postcss@8.5.3): dependencies: @@ -4752,7 +4753,6 @@ snapshots: combined-stream@1.0.8: dependencies: delayed-stream: 1.0.0 - optional: true comma-separated-tokens@1.0.8: {} @@ -4866,8 +4866,7 @@ snapshots: has-property-descriptors: 1.0.2 object-keys: 1.1.1 - delayed-stream@1.0.0: - optional: true + delayed-stream@1.0.0: {} dequal@2.0.3: {} @@ -5249,8 +5248,7 @@ snapshots: esutils@2.0.3: {} - event-target-shim@5.0.1: - optional: true + event-target-shim@5.0.1: {} eventemitter3@4.0.7: {} @@ -5316,8 +5314,7 @@ snapshots: dependencies: is-callable: 1.2.7 - form-data-encoder@1.7.2: - optional: true + form-data-encoder@1.7.2: {} form-data@4.0.2: dependencies: @@ -5325,7 +5322,6 @@ snapshots: combined-stream: 1.0.8 es-set-tostringtag: 2.1.0 mime-types: 2.1.35 - optional: true format@0.2.2: {} @@ -5333,7 +5329,6 @@ snapshots: dependencies: node-domexception: 1.0.0 web-streams-polyfill: 4.0.0-beta.3 - optional: true fraction.js@4.3.7: {} @@ -5419,6 +5414,18 @@ snapshots: graphemer@1.4.0: {} + groq-sdk@0.27.0: + dependencies: + '@types/node': 18.19.100 + '@types/node-fetch': 2.6.12 + abort-controller: 3.0.0 + agentkeepalive: 4.6.0 + form-data-encoder: 1.7.2 + formdata-node: 4.4.1 + node-fetch: 2.7.0 + transitivePeerDependencies: + - encoding + has-bigints@1.1.0: {} has-flag@4.0.0: {} @@ -5540,7 +5547,6 @@ snapshots: humanize-ms@1.2.1: dependencies: ms: 2.1.3 - optional: true icss-utils@5.1.0(postcss@8.5.3): dependencies: @@ -6233,13 +6239,11 @@ snapshots: braces: 3.0.3 picomatch: 2.3.1 - mime-db@1.52.0: - optional: true + mime-db@1.52.0: {} mime-types@2.1.35: dependencies: mime-db: 1.52.0 - optional: true minimatch@3.1.2: dependencies: @@ -6308,13 +6312,11 @@ snapshots: - '@babel/core' - babel-plugin-macros - node-domexception@1.0.0: - optional: true + node-domexception@1.0.0: {} node-fetch@2.7.0: dependencies: whatwg-url: 5.0.0 - optional: true node-releases@2.0.19: {} @@ -7012,8 +7014,7 @@ snapshots: dependencies: is-number: 7.0.0 - tr46@0.0.3: - optional: true + tr46@0.0.3: {} trim-lines@3.0.1: {} @@ -7088,8 +7089,7 @@ snapshots: has-symbols: 1.1.0 which-boxed-primitive: 1.1.1 - undici-types@5.26.5: - optional: true + undici-types@5.26.5: {} undici-types@6.21.0: {} @@ -7233,17 +7233,14 @@ snapshots: web-namespaces@2.0.1: {} - web-streams-polyfill@4.0.0-beta.3: - optional: true + web-streams-polyfill@4.0.0-beta.3: {} - webidl-conversions@3.0.1: - optional: true + webidl-conversions@3.0.1: {} whatwg-url@5.0.0: dependencies: tr46: 0.0.3 webidl-conversions: 3.0.1 - optional: true which-boxed-primitive@1.1.1: dependencies: diff --git a/src/app/api/transcribe/route.ts b/src/app/api/transcribe/route.ts new file mode 100644 index 00000000..44e62e16 --- /dev/null +++ b/src/app/api/transcribe/route.ts @@ -0,0 +1,55 @@ +// src/app/api/transcribe/route.ts +import { NextRequest, NextResponse } from 'next/server'; +import Groq from 'groq-sdk'; + +const groq = new Groq({ + apiKey: process.env.GROQ_API_KEY, +}); + +export async function POST(request: NextRequest) { + try { + const formData = await request.formData(); + const audioFile = formData.get('audio') as File; + + if (!audioFile) { + return NextResponse.json( + { error: 'No audio file provided' }, + { status: 400 } + ); + } + + console.log('Audio file received:', { + name: audioFile.name, + size: audioFile.size, + type: audioFile.type + }); + + // Create a proper File object for Groq API + const transcription = await groq.audio.transcriptions.create({ + file: audioFile, // Pass the File directly, not a Blob + model: "whisper-large-v3-turbo", + response_format: "text", + language: "fr", + }); + + console.log('Transcription result:', transcription); + + return NextResponse.json({ + transcription: typeof transcription === 'string' ? transcription : transcription.text + }); + + } catch (error) { + console.error('Transcription error:', error); + + // More detailed error logging + if (error instanceof Error) { + console.error('Error message:', error.message); + console.error('Error stack:', error.stack); + } + + return NextResponse.json( + { error: 'Failed to transcribe audio', details: error instanceof Error ? error.message : 'Unknown error' }, + { status: 500 } + ); + } +} \ No newline at end of file diff --git a/src/components/thread/index.tsx b/src/components/thread/index.tsx index d52a1594..76c1ab22 100644 --- a/src/components/thread/index.tsx +++ b/src/components/thread/index.tsx @@ -1,3 +1,4 @@ +// src/components/thread/index.tsx import { v4 as uuidv4 } from "uuid"; import { ReactNode, useEffect, useRef } from "react"; import { motion } from "framer-motion"; @@ -5,6 +6,8 @@ import { cn } from "@/lib/utils"; import { useStreamContext } from "@/providers/Stream"; import { useState, FormEvent } from "react"; import { Button } from "../ui/button"; +import { VoiceButton } from "../ui/voice-button"; +import { useVoiceRecording } from "@/hooks/use-voice-recording"; import { Checkpoint, Message } from "@langchain/langgraph-sdk"; import { AssistantMessage, AssistantMessageLoading } from "./messages/ai"; import { HumanMessage } from "./messages/human"; @@ -145,6 +148,28 @@ export function Thread() { const lastError = useRef(undefined); + // Voice recording functionality + const voiceRecording = useVoiceRecording({ + onTranscriptionComplete: (text: string) => { + // Append transcription to existing input, or replace if input is empty + setInput(prev => { + const trimmedPrev = prev.trim(); + if (trimmedPrev.length === 0) { + return text; + } + // Add a space between existing text and new transcription + return trimmedPrev + " " + text; + }); + }, + onError: (error: string) => { + toast.error("Voice recording error", { + description: error, + richColors: true, + closeButton: true, + }); + }, + }); + const setThreadId = (id: string | null) => { _setThreadId(id); @@ -513,6 +538,15 @@ export function Thread() { accept="image/jpeg,image/png,image/gif,image/webp,application/pdf" className="hidden" /> + + {/* Voice Recording Button */} + + {stream.isLoading ? ( + + +

{getTooltipText()}

+
+ + + ); +} \ No newline at end of file diff --git a/src/hooks/use-voice-recording.tsx b/src/hooks/use-voice-recording.tsx new file mode 100644 index 00000000..1a10ea9c --- /dev/null +++ b/src/hooks/use-voice-recording.tsx @@ -0,0 +1,108 @@ +// src/hooks/use-voice-recording.tsx +import { useState, useRef, useCallback } from 'react'; + +export interface UseVoiceRecordingProps { + onTranscriptionComplete: (text: string) => void; + onError?: (error: string) => void; +} + +export function useVoiceRecording({ + onTranscriptionComplete, + onError +}: UseVoiceRecordingProps) { + const [isRecording, setIsRecording] = useState(false); + const [isTranscribing, setIsTranscribing] = useState(false); + const mediaRecorderRef = useRef(null); + const audioChunksRef = useRef([]); + + const startRecording = useCallback(async () => { + try { + const stream = await navigator.mediaDevices.getUserMedia({ + audio: { + echoCancellation: true, + noiseSuppression: true, + autoGainControl: true, + } + }); + + const mediaRecorder = new MediaRecorder(stream, { + mimeType: 'audio/webm;codecs=opus' + }); + + mediaRecorderRef.current = mediaRecorder; + audioChunksRef.current = []; + + mediaRecorder.ondataavailable = (event) => { + if (event.data.size > 0) { + audioChunksRef.current.push(event.data); + } + }; + + mediaRecorder.onstop = async () => { + const audioBlob = new Blob(audioChunksRef.current, { + type: 'audio/webm;codecs=opus' + }); + + // Stop all tracks to release microphone + stream.getTracks().forEach(track => track.stop()); + + // Send to transcription API + await transcribeAudio(audioBlob); + }; + + mediaRecorder.start(1000); // Collect data every second + setIsRecording(true); + } catch (error) { + console.error('Error starting recording:', error); + onError?.('Failed to access microphone. Please check permissions.'); + } + }, [onError]); + + const stopRecording = useCallback(() => { + if (mediaRecorderRef.current && isRecording) { + mediaRecorderRef.current.stop(); + setIsRecording(false); + setIsTranscribing(true); + } + }, [isRecording]); + + const transcribeAudio = useCallback(async (audioBlob: Blob) => { + try { + const formData = new FormData(); + formData.append('audio', audioBlob, 'recording.webm'); + + const response = await fetch('/api/transcribe', { + method: 'POST', + body: formData, + }); + + if (!response.ok) { + throw new Error(`Transcription failed: ${response.statusText}`); + } + + const { transcription } = await response.json(); + onTranscriptionComplete(transcription.trim()); + } catch (error) { + console.error('Transcription error:', error); + onError?.('Failed to transcribe audio. Please try again.'); + } finally { + setIsTranscribing(false); + } + }, [onTranscriptionComplete, onError]); + + const toggleRecording = useCallback(() => { + if (isRecording) { + stopRecording(); + } else { + startRecording(); + } + }, [isRecording, startRecording, stopRecording]); + + return { + isRecording, + isTranscribing, + startRecording, + stopRecording, + toggleRecording, + }; +} \ No newline at end of file From c42461effb56dde6f01375c5c1e21e6d5da141cf Mon Sep 17 00:00:00 2001 From: farouk Date: Tue, 22 Jul 2025 13:41:18 +0200 Subject: [PATCH 2/2] add .env.example with groq api key --- .env.example | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 .env.example diff --git a/.env.example b/.env.example new file mode 100644 index 00000000..75061cd9 --- /dev/null +++ b/.env.example @@ -0,0 +1,16 @@ +#For the speech to text model +GROQ_API_KEY= + +# LangGraph Configuration +NEXT_PUBLIC_API_URL=http://localhost:2024 +NEXT_PUBLIC_ASSISTANT_ID=agent +# Do NOT prefix this with "NEXT_PUBLIC_" as we do not want this exposed in the client. +LANGSMITH_API_KEY= + +# Production LangGraph Configuration (quickstart) - Uncomment to use +# NEXT_PUBLIC_ASSISTANT_ID="agent" +# This should be the deployment URL of your LangGraph server +# LANGGRAPH_API_URL="https://my-agent.default.us.langgraph.app" +# This should be the URL of your website + "/api". This is how you connect to the API proxy +# NEXT_PUBLIC_API_URL="https://my-website.com/api" +# LANGSMITH_API_KEY="lsv2_..." \ No newline at end of file