diff --git a/.claude/settings.local.json b/.claude/settings.local.json index c7df3d3d9f..b698e146d4 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -5,7 +5,9 @@ "Bash(pnpm lint:*)", "Bash(pnpm build:*)", "Bash(cargo check:*)", - "Bash(cargo fmt:*)" + "Bash(cargo fmt:*)", + "Bash(pnpm format:*)", + "Bash(pnpm exec biome check:*)" ], "deny": [], "ask": [] diff --git a/Cargo.lock b/Cargo.lock index 94269460bc..81ec4842fd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -177,9 +177,9 @@ checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "aho-corasick" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" dependencies = [ "memchr", ] @@ -1174,6 +1174,7 @@ dependencies = [ name = "cap-desktop" version = "0.4.0" dependencies = [ + "aho-corasick", "anyhow", "async-stream", "axum", @@ -1221,9 +1222,11 @@ dependencies = [ "png 0.17.16", "posthog-rs", "rand 0.8.5", + "regex", "relative-path", "reqwest 0.12.24", "rodio", + "sanitize-filename", "scap-direct3d", "scap-screencapturekit", "scap-targets", @@ -1601,12 +1604,14 @@ dependencies = [ name = "cap-utils" version = "0.1.0" dependencies = [ + "aho-corasick", "directories 5.0.1", "flume", "futures", "nix 0.29.0", "serde", "serde_json", + "tempfile", "tokio", "tracing", "uuid", @@ -7641,6 +7646,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "sanitize-filename" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc984f4f9ceb736a7bb755c3e3bd17dc56370af2600c9780dcc48c66453da34d" +dependencies = [ + "regex", +] + [[package]] name = "scap-cpal" version = "0.1.0" @@ -8499,6 +8513,7 @@ version = "2.0.0-rc.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ccbb212565d2dc177bc15ecb7b039d66c4490da892436a4eee5b394d620c9bc" dependencies = [ + "chrono", "paste", "serde_json", "specta-macros", diff --git a/Cargo.toml b/Cargo.toml index 661b3b85fd..7eae47f5d6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,11 @@ [workspace] resolver = "2" -members = ["apps/cli", "apps/desktop/src-tauri", "crates/*", "crates/workspace-hack"] +members = [ + "apps/cli", + "apps/desktop/src-tauri", + "crates/*", + "crates/workspace-hack", +] [workspace.dependencies] anyhow = { version = "1.0.86" } @@ -22,6 +27,7 @@ specta = { version = "=2.0.0-rc.20", features = [ "derive", "serde_json", "uuid", + "chrono" ] } serde = { version = "1", features = ["derive"] } @@ -40,6 +46,7 @@ sentry = { version = "0.42.0", features = [ ] } tracing = "0.1.41" futures = "0.3.31" +aho-corasick = "1.1.4" cidre = { git = "https://github.com/CapSoftware/cidre", rev = "bf84b67079a8", features = [ "macos_12_7", diff --git a/apps/desktop/src-tauri/Cargo.toml b/apps/desktop/src-tauri/Cargo.toml index e8d13e2828..df574378e5 100644 --- a/apps/desktop/src-tauri/Cargo.toml +++ b/apps/desktop/src-tauri/Cargo.toml @@ -20,11 +20,11 @@ swift-rs = { version = "1.0.6", features = ["build"] } [dependencies] tauri = { workspace = true, features = [ - "macos-private-api", - "protocol-asset", - "tray-icon", - "image-png", - "devtools", + "macos-private-api", + "protocol-asset", + "tray-icon", + "image-png", + "devtools", ] } tauri-specta = { version = "=2.0.0-rc.20", features = ["derive", "typescript"] } tauri-plugin-dialog = "2.2.0" @@ -60,6 +60,7 @@ tracing.workspace = true tempfile = "3.9.0" ffmpeg.workspace = true chrono = { version = "0.4.31", features = ["serde"] } +regex = "1.10.4" rodio = "0.19.0" png = "0.17.13" device_query = "4.0.1" @@ -106,22 +107,24 @@ tauri-plugin-sentry = "0.5.0" thiserror.workspace = true bytes = "1.10.1" async-stream = "0.3.6" +sanitize-filename = "0.6.0" tracing-futures = { version = "0.2.5", features = ["futures-03"] } tracing-opentelemetry = "0.32.0" opentelemetry = "0.31.0" -opentelemetry-otlp = "0.31.0" #{ version = , features = ["http-proto", "reqwest-client"] } +opentelemetry-otlp = "0.31.0" #{ version = , features = ["http-proto", "reqwest-client"] } opentelemetry_sdk = { version = "0.31.0", features = ["rt-tokio", "trace"] } posthog-rs = "0.3.7" workspace-hack = { version = "0.1", path = "../../../crates/workspace-hack" } +aho-corasick.workspace = true [target.'cfg(target_os = "macos")'.dependencies] core-graphics = "0.24.0" core-foundation = "0.10.0" objc2-app-kit = { version = "0.3.0", features = [ - "NSWindow", - "NSResponder", - "NSHapticFeedback", + "NSWindow", + "NSResponder", + "NSHapticFeedback", ] } cocoa = "0.26.0" objc = "0.2.7" @@ -131,10 +134,10 @@ cidre = { workspace = true } [target.'cfg(target_os= "windows")'.dependencies] windows = { workspace = true, features = [ - "Win32_Foundation", - "Win32_System", - "Win32_UI_WindowsAndMessaging", - "Win32_Graphics_Gdi", + "Win32_Foundation", + "Win32_System", + "Win32_UI_WindowsAndMessaging", + "Win32_Graphics_Gdi", ] } windows-sys = { workspace = true } diff --git a/apps/desktop/src-tauri/src/captions.rs b/apps/desktop/src-tauri/src/captions.rs index 05678af591..9904909fc9 100644 --- a/apps/desktop/src-tauri/src/captions.rs +++ b/apps/desktop/src-tauri/src/captions.rs @@ -18,12 +18,10 @@ use tokio::sync::Mutex; use tracing::instrument; use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters}; -// Re-export caption types from cap_project -pub use cap_project::{CaptionSegment, CaptionSettings}; +pub use cap_project::{CaptionSegment, CaptionSettings, CaptionWord}; use crate::http_client; -// Convert the project type's float precision from f32 to f64 for compatibility #[derive(Debug, Serialize, Deserialize, Type, Clone)] pub struct CaptionData { pub segments: Vec, @@ -39,15 +37,12 @@ impl Default for CaptionData { } } -// Model context is shared and cached lazy_static::lazy_static! { - static ref WHISPER_CONTEXT: Arc>> = Arc::new(Mutex::new(None)); + static ref WHISPER_CONTEXT: Arc>>> = Arc::new(Mutex::new(None)); } -// Constants const WHISPER_SAMPLE_RATE: u32 = 16000; -/// Function to handle creating directories for the model #[tauri::command] #[specta::specta] #[instrument] @@ -55,7 +50,6 @@ pub async fn create_dir(path: String, _recursive: bool) -> Result<(), String> { std::fs::create_dir_all(path).map_err(|e| format!("Failed to create directory: {e}")) } -/// Function to save the model file #[tauri::command] #[specta::specta] #[instrument] @@ -63,15 +57,14 @@ pub async fn save_model_file(path: String, data: Vec) -> Result<(), String> std::fs::write(&path, &data).map_err(|e| format!("Failed to write model file: {e}")) } -/// Extract audio from a video file and save it as a temporary WAV file async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Result<(), String> { + log::info!("=== EXTRACT AUDIO START ==="); log::info!("Attempting to extract audio from: {video_path}"); + log::info!("Output path: {:?}", output_path); - // Check if this is a .cap directory if video_path.ends_with(".cap") { log::info!("Detected .cap project directory"); - // Read the recording metadata let meta_path = std::path::Path::new(video_path).join("recording-meta.json"); let meta_content = std::fs::read_to_string(&meta_path) .map_err(|e| format!("Failed to read recording metadata: {e}"))?; @@ -79,21 +72,23 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re let meta: serde_json::Value = serde_json::from_str(&meta_content) .map_err(|e| format!("Failed to parse recording metadata: {e}"))?; - // Get paths for both audio sources let base_path = std::path::Path::new(video_path); let mut audio_sources = Vec::new(); if let Some(segments) = meta["segments"].as_array() { for segment in segments { - // Add system audio if available - if let Some(system_audio) = segment["system_audio"]["path"].as_str() { - audio_sources.push(base_path.join(system_audio)); - } + let mut push_source = |path: Option<&str>| { + if let Some(path) = path { + let full_path = base_path.join(path); + if !audio_sources.contains(&full_path) { + audio_sources.push(full_path); + } + } + }; - // Add microphone audio if available - if let Some(audio) = segment["audio"]["path"].as_str() { - audio_sources.push(base_path.join(audio)); - } + push_source(segment["system_audio"]["path"].as_str()); + push_source(segment["mic"]["path"].as_str()); + push_source(segment["audio"]["path"].as_str()); } } @@ -103,7 +98,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re log::info!("Found {} audio sources", audio_sources.len()); - // Process each audio source using AudioData let mut mixed_samples = Vec::new(); let mut channel_count = 0; @@ -121,7 +115,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re mixed_samples = audio.samples().to_vec(); channel_count = audio.channels() as usize; } else { - // Handle potential different channel counts by mixing to mono first if needed if audio.channels() as usize != channel_count { log::info!( "Channel count mismatch: {} vs {}, mixing to mono", @@ -129,24 +122,20 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re audio.channels() ); - // If we have mixed samples with multiple channels, convert to mono if channel_count > 1 { let mono_samples = convert_to_mono(&mixed_samples, channel_count); mixed_samples = mono_samples; channel_count = 1; } - // Convert the new audio to mono too if it has multiple channels let samples = if audio.channels() > 1 { convert_to_mono(audio.samples(), audio.channels() as usize) } else { audio.samples().to_vec() }; - // Mix mono samples mix_samples(&mut mixed_samples, &samples); } else { - // Same channel count, simple mix mix_samples(&mut mixed_samples, audio.samples()); } } @@ -158,7 +147,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re } } - // No matter what, ensure we have mono audio for Whisper if channel_count > 1 { log::info!("Converting final mixed audio from {channel_count} channels to mono"); mixed_samples = convert_to_mono(&mixed_samples, channel_count); @@ -166,10 +154,22 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re } if mixed_samples.is_empty() { + log::error!("No audio samples after processing all sources"); return Err("Failed to process any audio sources".to_string()); } - // Convert to WAV format with desired sample rate + log::info!("Final mixed audio: {} samples", mixed_samples.len()); + let mix_rms = + (mixed_samples.iter().map(|&s| s * s).sum::() / mixed_samples.len() as f32).sqrt(); + log::info!("Mixed audio RMS: {:.4}", mix_rms); + + if mix_rms < 0.001 { + log::warn!( + "WARNING: Mixed audio RMS is very low ({:.6}) - audio may be nearly silent!", + mix_rms + ); + } + let mut output = avformat::output(&output_path) .map_err(|e| format!("Failed to create output file: {e}"))?; @@ -199,7 +199,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re .write_header() .map_err(|e| format!("Failed to write header: {e}"))?; - // Create resampler for sample rate conversion let mut resampler = resampling::Context::get( avformat::Sample::F32(avformat::sample::Type::Packed), channel_layout, @@ -210,9 +209,7 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re ) .map_err(|e| format!("Failed to create resampler: {e}"))?; - // Process audio in chunks let frame_size = encoder.frame_size() as usize; - // Check if frame_size is zero and use a fallback let frame_size = if frame_size == 0 { 1024 } else { frame_size }; log::info!( @@ -229,15 +226,12 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re ); frame.set_rate(WHISPER_SAMPLE_RATE); - // Make sure we have samples and a valid chunk size if !mixed_samples.is_empty() && frame_size * channel_count > 0 { - // Process chunks of audio for (chunk_idx, chunk) in mixed_samples.chunks(frame_size * channel_count).enumerate() { if chunk_idx % 100 == 0 { log::info!("Processing chunk {}, size: {}", chunk_idx, chunk.len()); } - // Create a new input frame with actual data from the chunk let mut input_frame = ffmpeg::frame::Audio::new( avformat::Sample::F32(avformat::sample::Type::Packed), chunk.len() / channel_count, @@ -245,7 +239,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re ); input_frame.set_rate(AudioData::SAMPLE_RATE); - // Copy data from chunk to frame let bytes = unsafe { std::slice::from_raw_parts( chunk.as_ptr() as *const u8, @@ -254,7 +247,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re }; input_frame.data_mut(0)[0..bytes.len()].copy_from_slice(bytes); - // Create output frame for resampled data let mut output_frame = ffmpeg::frame::Audio::new( avformat::Sample::I16(avformat::sample::Type::Packed), frame_size, @@ -262,7 +254,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re ); output_frame.set_rate(WHISPER_SAMPLE_RATE); - // Use the input frame with actual data instead of the empty frame match resampler.run(&input_frame, &mut output_frame) { Ok(_) => { if chunk_idx % 100 == 0 { @@ -284,7 +275,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re continue; } - // Process each encoded packet loop { let mut packet = ffmpeg::Packet::empty(); match encoder.receive_packet(&mut packet) { @@ -299,12 +289,10 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re } } - // Flush the encoder encoder .send_eof() .map_err(|e| format!("Failed to send EOF: {e}"))?; - // Process final packets in a loop with limited borrow scope loop { let mut packet = ffmpeg::Packet::empty(); let received = encoder.receive_packet(&mut packet); @@ -313,7 +301,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re break; } - // Use a block to limit the scope of the output borrow { if let Err(e) = packet.write_interleaved(&mut output) { return Err(format!("Failed to write final packet: {e}")); @@ -325,9 +312,9 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re .write_trailer() .map_err(|e| format!("Failed to write trailer: {e}"))?; + log::info!("=== EXTRACT AUDIO END (from .cap) ==="); Ok(()) } else { - // Handle regular video file let mut input = avformat::input(&video_path).map_err(|e| format!("Failed to open video file: {e}"))?; @@ -338,25 +325,20 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re let codec_params = stream.parameters(); - // Get decoder parameters first let decoder_ctx = avcodec::Context::from_parameters(codec_params.clone()) .map_err(|e| format!("Failed to create decoder context: {e}"))?; - // Create and open the decoder let mut decoder = decoder_ctx .decoder() .audio() .map_err(|e| format!("Failed to create decoder: {e}"))?; - // Now we can access audio-specific methods let decoder_format = decoder.format(); let decoder_channel_layout = decoder.channel_layout(); let decoder_rate = decoder.rate(); - // Set up and prepare encoder and output separately to avoid multiple borrows let channel_layout = ChannelLayout::MONO; - // Create encoder first let mut encoder_ctx = avcodec::Context::new() .encoder() .audio() @@ -373,11 +355,9 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re .open_as(codec) .map_err(|e| format!("Failed to open encoder: {e}"))?; - // Create output context separately let mut output = avformat::output(&output_path) .map_err(|e| format!("Failed to create output file: {e}"))?; - // Add stream and get parameters in a block to limit the borrow let stream_params = { let mut output_stream = output .add_stream(codec) @@ -385,16 +365,13 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re output_stream.set_parameters(&encoder); - // Store the stream parameters we need for later (output_stream.index(), output_stream.id()) }; - // Write header output .write_header() .map_err(|e| format!("Failed to write header: {e}"))?; - // Create resampler let mut resampler = resampling::Context::get( decoder_format, decoder_channel_layout, @@ -405,7 +382,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re ) .map_err(|e| format!("Failed to create resampler: {e}"))?; - // Create frames let mut decoded_frame = ffmpeg::frame::Audio::empty(); let mut resampled_frame = ffmpeg::frame::Audio::new( avformat::Sample::I16(avformat::sample::Type::Packed), @@ -413,22 +389,15 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re channel_layout, ); - // Save the stream index from the original stream (not the output stream) let input_stream_index = stream.index(); - // Process packets one at a time, cloning what we need from input packets let mut packet_queue = Vec::new(); - // First collect all the packets we need by cloning the data { - // Use a separate block to limit the immutable borrow lifetime for (stream_idx, packet) in input.packets() { if stream_idx.index() == input_stream_index { - // Clone the packet data to avoid borrowing input if let Some(data) = packet.data() { - // Copy the packet data to a new packet let mut cloned_packet = ffmpeg::Packet::copy(data); - // Copy timing information if let Some(pts) = packet.pts() { cloned_packet.set_pts(Some(pts)); } @@ -441,14 +410,12 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re } } - // Then process each cloned packet for packet_res in packet_queue { if let Err(e) = decoder.send_packet(&packet_res) { log::warn!("Failed to send packet to decoder: {e}"); continue; } - // Process decoded frames while decoder.receive_frame(&mut decoded_frame).is_ok() { if let Err(e) = resampler.run(&decoded_frame, &mut resampled_frame) { log::warn!("Failed to resample audio: {e}"); @@ -460,12 +427,10 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re continue; } - // Process encoded packets loop { let mut packet = ffmpeg::Packet::empty(); match encoder.receive_packet(&mut packet) { Ok(_) => { - // Set the stream for the output packet packet.set_stream(stream_params.0); if let Err(e) = packet.write_interleaved(&mut output) { @@ -478,7 +443,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re } } - // Flush the decoder decoder .send_eof() .map_err(|e| format!("Failed to send EOF to decoder: {e}"))?; @@ -492,7 +456,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re .send_frame(&resampled_frame) .map_err(|e| format!("Failed to send final frame: {e}"))?; - // Process final encoded packets loop { let mut packet = ffmpeg::Packet::empty(); let received = encoder.receive_packet(&mut packet); @@ -507,53 +470,73 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re } } - // Close the output file with trailer output .write_trailer() .map_err(|e| format!("Failed to write trailer: {e}"))?; + log::info!("=== EXTRACT AUDIO END (from video) ==="); Ok(()) } } -/// Load or initialize the WhisperContext async fn get_whisper_context(model_path: &str) -> Result, String> { let mut context_guard = WHISPER_CONTEXT.lock().await; - // Always create a new context to avoid issues with multiple uses + if let Some(ref existing) = *context_guard { + log::info!("Reusing cached Whisper context"); + return Ok(existing.clone()); + } + log::info!("Initializing Whisper context with model: {model_path}"); let ctx = WhisperContext::new_with_params(model_path, WhisperContextParameters::default()) .map_err(|e| format!("Failed to load Whisper model: {e}"))?; - *context_guard = Some(ctx); + let ctx_arc = Arc::new(ctx); + *context_guard = Some(ctx_arc.clone()); + + Ok(ctx_arc) +} + +fn is_special_token(token_text: &str) -> bool { + let trimmed = token_text.trim(); + if trimmed.is_empty() { + return true; + } + + let is_special = trimmed.contains('[') + || trimmed.contains(']') + || trimmed.contains("_TT_") + || trimmed.contains("_BEG_") + || trimmed.contains("<|"); - // Get a reference to the context and wrap it in an Arc - let context_ref = context_guard.as_ref().unwrap(); - let context_arc = unsafe { Arc::new(std::ptr::read(context_ref)) }; - Ok(context_arc) + if is_special { + log::debug!("Filtering special token: {:?}", token_text); + } + + is_special } -/// Process audio file with Whisper for transcription fn process_with_whisper( audio_path: &PathBuf, context: Arc, language: &str, ) -> Result { + log::info!("=== WHISPER TRANSCRIPTION START ==="); log::info!("Processing audio file: {audio_path:?}"); + log::info!("Language setting: {}", language); - // Set up parameters for Whisper let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 }); - // Configure parameters for better caption quality params.set_translate(false); params.set_print_special(false); params.set_print_progress(false); params.set_print_realtime(false); - params.set_token_timestamps(true); // Enable timestamps for captions - params.set_language(Some(if language == "auto" { "auto" } else { language })); // Use selected language or auto-detect - params.set_max_len(i32::MAX); // No max length for transcription + params.set_token_timestamps(true); + params.set_language(Some(if language == "auto" { "auto" } else { language })); + params.set_max_len(i32::MAX); + + log::info!("Whisper params - translate: false, token_timestamps: true, max_len: MAX"); - // Load audio file let mut audio_file = File::open(audio_path) .map_err(|e| format!("Failed to open audio file: {e} at path: {audio_path:?}"))?; let mut audio_data = Vec::new(); @@ -563,7 +546,6 @@ fn process_with_whisper( log::info!("Processing audio file of size: {} bytes", audio_data.len()); - // Convert audio data to the required format (16-bit mono PCM) let mut audio_data_f32 = Vec::new(); for i in (0..audio_data.len()).step_by(2) { if i + 1 < audio_data.len() { @@ -572,24 +554,42 @@ fn process_with_whisper( } } - log::info!("Converted {} samples to f32 format", audio_data_f32.len()); + let duration_seconds = audio_data_f32.len() as f32 / WHISPER_SAMPLE_RATE as f32; + log::info!( + "Converted {} samples to f32 format (duration: {:.2}s at {}Hz)", + audio_data_f32.len(), + duration_seconds, + WHISPER_SAMPLE_RATE + ); - // Log sample data statistics for debugging if !audio_data_f32.is_empty() { let min_sample = audio_data_f32.iter().fold(f32::MAX, |a, &b| a.min(b)); let max_sample = audio_data_f32.iter().fold(f32::MIN, |a, &b| a.max(b)); let avg_sample = audio_data_f32.iter().sum::() / audio_data_f32.len() as f32; - log::info!("Audio samples - min: {min_sample}, max: {max_sample}, avg: {avg_sample}"); + let rms = (audio_data_f32.iter().map(|&s| s * s).sum::() + / audio_data_f32.len() as f32) + .sqrt(); + log::info!( + "Audio samples - min: {:.4}, max: {:.4}, avg: {:.6}, RMS: {:.4}", + min_sample, + max_sample, + avg_sample, + rms + ); + + if rms < 0.001 { + log::warn!( + "WARNING: Audio RMS is very low ({:.6}) - audio may be nearly silent!", + rms + ); + } - // Sample a few values - let sample_count = audio_data_f32.len().min(10); - for i in 0..sample_count { - let idx = i * audio_data_f32.len() / sample_count; - log::info!("Sample {}: {}", idx, audio_data_f32[idx]); + log::info!("First 20 audio samples:"); + for i in 0..audio_data_f32.len().min(20) { + log::info!(" Sample[{}] = {:.6}", i, audio_data_f32[i]); } } - // Run the transcription let mut state = context .create_state() .map_err(|e| format!("Failed to create Whisper state: {e}"))?; @@ -598,7 +598,6 @@ fn process_with_whisper( .full(params, &audio_data_f32[..]) .map_err(|e| format!("Failed to run Whisper transcription: {e}"))?; - // Process results: convert Whisper segments to CaptionSegment let num_segments = state .full_n_segments() .map_err(|e| format!("Failed to get number of segments: {e}"))?; @@ -608,11 +607,10 @@ fn process_with_whisper( let mut segments = Vec::new(); for i in 0..num_segments { - let text = state + let raw_text = state .full_get_segment_text(i) .map_err(|e| format!("Failed to get segment text: {e}"))?; - // Properly unwrap the Result first, then convert i64 to f64 let start_i64 = state .full_get_segment_t0(i) .map_err(|e| format!("Failed to get segment start time: {e}"))?; @@ -620,30 +618,180 @@ fn process_with_whisper( .full_get_segment_t1(i) .map_err(|e| format!("Failed to get segment end time: {e}"))?; - // Convert timestamps from centiseconds to seconds (as f32 for CaptionSegment) let start_time = (start_i64 as f32) / 100.0; let end_time = (end_i64 as f32) / 100.0; - // Add debug logging for timestamps log::info!( - "Segment {}: start={}, end={}, text='{}'", + "=== Segment {}: start={:.2}s, end={:.2}s, raw_text='{}'", i, start_time, end_time, - text.trim() + raw_text.trim() ); - if !text.trim().is_empty() { + let mut words = Vec::new(); + let num_tokens = state + .full_n_tokens(i) + .map_err(|e| format!("Failed to get token count: {e}"))?; + + log::info!(" Segment {} has {} tokens", i, num_tokens); + + let mut current_word = String::new(); + let mut word_start: Option = None; + let mut word_end: f32 = start_time; + + for t in 0..num_tokens { + let token_text = state.full_get_token_text(i, t).unwrap_or_default(); + let token_id = state.full_get_token_id(i, t).unwrap_or(0); + let token_prob = state.full_get_token_prob(i, t).unwrap_or(0.0); + + if is_special_token(&token_text) { + log::debug!( + " Token[{}]: id={}, text={:?} -> SKIPPED (special)", + t, + token_id, + token_text + ); + continue; + } + + let token_data = state.full_get_token_data(i, t).ok(); + + if let Some(data) = token_data { + let token_start = (data.t0 as f32) / 100.0; + let token_end = (data.t1 as f32) / 100.0; + + log::info!( + " Token[{}]: id={}, text={:?}, t0={:.2}s, t1={:.2}s, prob={:.4}", + t, + token_id, + token_text, + token_start, + token_end, + token_prob + ); + + if token_text.starts_with(' ') || token_text.starts_with('\n') { + if !current_word.is_empty() { + if let Some(ws) = word_start { + log::info!( + " -> Completing word: '{}' ({:.2}s - {:.2}s)", + current_word.trim(), + ws, + word_end + ); + words.push(CaptionWord { + text: current_word.trim().to_string(), + start: ws, + end: word_end, + }); + } + } + current_word = token_text.trim().to_string(); + word_start = Some(token_start); + log::debug!( + " -> Starting new word: '{}' at {:.2}s", + current_word, + token_start + ); + } else { + if word_start.is_none() { + word_start = Some(token_start); + log::debug!(" -> Word start set to {:.2}s", token_start); + } + current_word.push_str(&token_text); + log::debug!(" -> Appending to word: '{}'", current_word); + } + word_end = token_end; + } else { + log::warn!( + " Token[{}]: id={}, text={:?} -> NO TIMING DATA", + t, + token_id, + token_text + ); + } + } + + if !current_word.trim().is_empty() { + if let Some(ws) = word_start { + log::info!( + " -> Final word: '{}' ({:.2}s - {:.2}s)", + current_word.trim(), + ws, + word_end + ); + words.push(CaptionWord { + text: current_word.trim().to_string(), + start: ws, + end: word_end, + }); + } + } + + log::info!(" Segment {} produced {} words", i, words.len()); + for (w_idx, word) in words.iter().enumerate() { + log::info!( + " Word[{}]: '{}' ({:.2}s - {:.2}s)", + w_idx, + word.text, + word.start, + word.end + ); + } + + if words.is_empty() { + log::warn!(" Segment {} has no words, skipping", i); + continue; + } + + const MAX_WORDS_PER_SEGMENT: usize = 6; + + let word_chunks: Vec> = words + .chunks(MAX_WORDS_PER_SEGMENT) + .map(|chunk| chunk.to_vec()) + .collect(); + + for (chunk_idx, chunk_words) in word_chunks.into_iter().enumerate() { + let segment_text = chunk_words + .iter() + .map(|word| word.text.clone()) + .collect::>() + .join(" "); + + let segment_start = chunk_words + .first() + .map(|word| word.start) + .unwrap_or(start_time); + let segment_end = chunk_words.last().map(|word| word.end).unwrap_or(end_time); + segments.push(CaptionSegment { - id: format!("segment-{i}"), - start: start_time, - end: end_time, - text: text.trim().to_string(), + id: format!("segment-{i}-{chunk_idx}"), + start: segment_start, + end: segment_end, + text: segment_text, + words: chunk_words, }); } } - log::info!("Successfully processed {} segments", segments.len()); + log::info!("=== WHISPER TRANSCRIPTION COMPLETE ==="); + log::info!("Total segments: {}", segments.len()); + + let total_words: usize = segments.iter().map(|s| s.words.len()).sum(); + log::info!("Total words: {}", total_words); + + log::info!("=== FINAL TRANSCRIPTION SUMMARY ==="); + for segment in &segments { + log::info!( + "Segment '{}' ({:.2}s - {:.2}s): {}", + segment.id, + segment.start, + segment.end, + segment.text + ); + } + log::info!("=== END SUMMARY ==="); Ok(CaptionData { segments, @@ -651,7 +799,6 @@ fn process_with_whisper( }) } -/// Function to transcribe audio from a video file using Whisper #[tauri::command] #[specta::specta] #[instrument] @@ -660,20 +807,25 @@ pub async fn transcribe_audio( model_path: String, language: String, ) -> Result { - // Check if files exist with detailed error messages + log::info!("=== TRANSCRIBE AUDIO COMMAND START ==="); + log::info!("Video path: {}", video_path); + log::info!("Model path: {}", model_path); + log::info!("Language: {}", language); + if !std::path::Path::new(&video_path).exists() { + log::error!("Video file not found at path: {}", video_path); return Err(format!("Video file not found at path: {video_path}")); } if !std::path::Path::new(&model_path).exists() { + log::error!("Model file not found at path: {}", model_path); return Err(format!("Model file not found at path: {model_path}")); } - // Create temp dir with better error handling let temp_dir = tempdir().map_err(|e| format!("Failed to create temporary directory: {e}"))?; let audio_path = temp_dir.path().join("audio.wav"); + log::info!("Temp audio path: {:?}", audio_path); - // First try the ffmpeg implementation match extract_audio_from_video(&video_path, &audio_path).await { Ok(_) => log::info!("Successfully extracted audio to {audio_path:?}"), Err(e) => { @@ -682,39 +834,73 @@ pub async fn transcribe_audio( } } - // Verify the audio file was created if !audio_path.exists() { + log::error!("Audio file was not created at {:?}", audio_path); return Err("Failed to create audio file for transcription".to_string()); } - log::info!("Audio file created at: {audio_path:?}"); + let audio_metadata = std::fs::metadata(&audio_path).ok(); + if let Some(meta) = &audio_metadata { + log::info!( + "Audio file created at: {:?}, size: {} bytes", + audio_path, + meta.len() + ); + } - // Get or initialize Whisper context with detailed error handling let context = match get_whisper_context(&model_path).await { - Ok(ctx) => ctx, + Ok(ctx) => { + log::info!("Whisper context ready"); + ctx + } Err(e) => { log::error!("Failed to initialize Whisper context: {e}"); return Err(format!("Failed to initialize transcription model: {e}")); } }; - // Process with Whisper and handle errors - match process_with_whisper(&audio_path, context, &language) { + let audio_path_clone = audio_path.clone(); + let language_clone = language.clone(); + log::info!("Starting Whisper transcription in blocking task..."); + let whisper_result = tokio::task::spawn_blocking(move || { + process_with_whisper(&audio_path_clone, context, &language_clone) + }) + .await + .map_err(|e| format!("Whisper task panicked: {e}"))?; + + match whisper_result { Ok(captions) => { + log::info!("=== TRANSCRIBE AUDIO RESULT ==="); + log::info!( + "Transcription produced {} segments", + captions.segments.len() + ); + + for (idx, segment) in captions.segments.iter().enumerate() { + log::info!( + " Result Segment[{}]: '{}' ({} words)", + idx, + segment.text, + segment.words.len() + ); + } + if captions.segments.is_empty() { log::warn!("No caption segments were generated"); return Err("No speech detected in the audio".to_string()); } + + log::info!("=== TRANSCRIBE AUDIO COMMAND END (success) ==="); Ok(captions) } Err(e) => { log::error!("Failed to process audio with Whisper: {e}"); + log::info!("=== TRANSCRIBE AUDIO COMMAND END (error) ==="); Err(format!("Failed to transcribe audio: {e}")) } } } -/// Function to save caption data to a file #[tauri::command] #[specta::specta] #[instrument(skip(app))] @@ -723,7 +909,30 @@ pub async fn save_captions( video_id: String, captions: CaptionData, ) -> Result<(), String> { + tracing::info!("=== SAVE CAPTIONS START ==="); tracing::info!("Saving captions for video_id: {}", video_id); + tracing::info!("Received {} segments to save", captions.segments.len()); + + for (idx, segment) in captions.segments.iter().enumerate() { + tracing::info!( + " Segment[{}] '{}': '{}' ({} words, {:.2}s - {:.2}s)", + idx, + segment.id, + segment.text, + segment.words.len(), + segment.start, + segment.end + ); + for (w_idx, word) in segment.words.iter().enumerate() { + tracing::debug!( + " Word[{}]: '{}' ({:.2}s - {:.2}s)", + w_idx, + word.text, + word.start, + word.end + ); + } + } let captions_dir = app_captions_dir(&app, &video_id)?; @@ -739,13 +948,10 @@ pub async fn save_captions( tracing::info!("Writing captions to: {:?}", captions_path); - // Ensure settings are included with default values if not provided let settings = captions.settings.unwrap_or_default(); - // Create a JSON structure manually to ensure field naming consistency let mut json_obj = serde_json::Map::new(); - // Add segments array let segments_array = serde_json::to_value( captions .segments @@ -769,6 +975,18 @@ pub async fn save_captions( "text".to_string(), serde_json::Value::String(seg.text.clone()), ); + let words_array: Vec = seg + .words + .iter() + .map(|w| { + serde_json::json!({ + "text": w.text, + "start": w.start, + "end": w.end + }) + }) + .collect(); + segment.insert("words".to_string(), serde_json::Value::Array(words_array)); segment }) .collect::>(), @@ -780,7 +998,6 @@ pub async fn save_captions( json_obj.insert("segments".to_string(), segments_array); - // Add settings object with camelCase naming let mut settings_obj = serde_json::Map::new(); settings_obj.insert( "enabled".to_string(), @@ -827,13 +1044,22 @@ pub async fn save_captions( "exportWithSubtitles".to_string(), serde_json::Value::Bool(settings.export_with_subtitles), ); + settings_obj.insert( + "highlightColor".to_string(), + serde_json::Value::String(settings.highlight_color.clone()), + ); + settings_obj.insert( + "fadeDuration".to_string(), + serde_json::Value::Number( + serde_json::Number::from_f64(settings.fade_duration as f64).unwrap(), + ), + ); json_obj.insert( "settings".to_string(), serde_json::Value::Object(settings_obj), ); - // Convert to pretty JSON string let json = serde_json::to_string_pretty(&json_obj).map_err(|e| { tracing::error!("Failed to serialize captions: {}", e); format!("Failed to serialize captions: {e}") @@ -845,19 +1071,16 @@ pub async fn save_captions( })?; tracing::info!("Successfully saved captions"); + tracing::info!("=== SAVE CAPTIONS END ==="); Ok(()) } -/// Helper function to parse captions from a JSON string -/// This can be used by other modules to parse captions without duplicating code pub fn parse_captions_json(json: &str) -> Result { - // Use a more flexible parsing approach match serde_json::from_str::(json) { Ok(json_value) => { if let Some(segments_array) = json_value.get("segments").and_then(|v| v.as_array()) { let mut segments = Vec::new(); - // Process each segment for segment in segments_array { if let (Some(id), Some(start), Some(end), Some(text)) = ( segment.get("id").and_then(|v| v.as_str()), @@ -865,18 +1088,33 @@ pub fn parse_captions_json(json: &str) -> Result Result Result Result Result Result Result, String> { + tracing::info!("=== LOAD CAPTIONS START ==="); + tracing::info!("Loading captions for video_id: {}", video_id); + let captions_dir = app_captions_dir(&app, &video_id)?; let captions_path = captions_dir.join("captions.json"); if !captions_path.exists() { tracing::info!("No captions file found at: {:?}", captions_path); + tracing::info!("=== LOAD CAPTIONS END (no file) ==="); return Ok(None); } @@ -994,6 +1248,8 @@ pub async fn load_captions( } }; + tracing::info!("Captions JSON length: {} bytes", json.len()); + tracing::info!("Parsing captions JSON"); match parse_captions_json(&json) { Ok(project_captions) => { @@ -1002,33 +1258,42 @@ pub async fn load_captions( project_captions.segments.len() ); - // Create the CaptionData structure + for (idx, segment) in project_captions.segments.iter().enumerate() { + tracing::info!( + " Loaded Segment[{}] '{}': '{}' ({} words, {:.2}s - {:.2}s)", + idx, + segment.id, + segment.text, + segment.words.len(), + segment.start, + segment.end + ); + } + let tauri_captions = CaptionData { segments: project_captions.segments, settings: Some(project_captions.settings), }; + tracing::info!("=== LOAD CAPTIONS END (success) ==="); Ok(Some(tauri_captions)) } Err(e) => { tracing::error!("Failed to parse captions: {}", e); + tracing::info!("=== LOAD CAPTIONS END (error) ==="); Err(format!("Failed to parse captions: {e}")) } } } -/// Helper function to get the captions directory for a video fn app_captions_dir(app: &AppHandle, video_id: &str) -> Result { tracing::info!("Getting captions directory for video_id: {}", video_id); - // Get the app data directory let app_dir = app .path() .app_data_dir() .map_err(|_| "Failed to get app data directory".to_string())?; - // Create a dedicated captions directory - // Strip .cap extension if present in video_id let clean_video_id = video_id.trim_end_matches(".cap"); let captions_dir = app_dir.join("captions").join(clean_video_id); @@ -1036,7 +1301,6 @@ fn app_captions_dir(app: &AppHandle, video_id: &str) -> Result Ok(captions_dir) } -// Add new type for download progress #[derive(Debug, Serialize, Type, tauri_specta::Event, Clone)] pub struct DownloadProgress { pub progress: f64, @@ -1047,7 +1311,6 @@ impl DownloadProgress { const EVENT_NAME: &'static str = "download-progress"; } -/// Helper function to download a Whisper model from Hugging Face Hub #[tauri::command] #[specta::specta] #[instrument(skip(window))] @@ -1057,7 +1320,6 @@ pub async fn download_whisper_model( model_name: String, output_path: String, ) -> Result<(), String> { - // Define model URLs based on model names let model_url = match model_name.as_str() { "tiny" => "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin", "base" => "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin", @@ -1065,10 +1327,9 @@ pub async fn download_whisper_model( "medium" => "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin", "large" => "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3.bin", "large-v3" => "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3.bin", - _ => "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin", // Default to tiny + _ => "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin", }; - // Create the client and download the model let response = app .state::() .get(model_url) @@ -1083,10 +1344,8 @@ pub async fn download_whisper_model( )); } - // Get the total size for progress calculation let total_size = response.content_length().unwrap_or(0); - // Create a file to write to if let Some(parent) = std::path::Path::new(&output_path).parent() { std::fs::create_dir_all(parent) .map_err(|e| format!("Failed to create parent directories: {e}"))?; @@ -1095,15 +1354,13 @@ pub async fn download_whisper_model( .await .map_err(|e| format!("Failed to create file: {e}"))?; - // Download and write in chunks let mut downloaded = 0; let mut bytes = response .bytes() .await .map_err(|e| format!("Failed to get response bytes: {e}"))?; - // Write the bytes in chunks to show progress - const CHUNK_SIZE: usize = 1024 * 1024; // 1MB chunks + const CHUNK_SIZE: usize = 1024 * 1024; while !bytes.is_empty() { let chunk_size = std::cmp::min(CHUNK_SIZE, bytes.len()); let chunk = bytes.split_to(chunk_size); @@ -1114,7 +1371,6 @@ pub async fn download_whisper_model( downloaded += chunk_size as u64; - // Calculate and emit progress let progress = if total_size > 0 { (downloaded as f64 / total_size as f64) * 100.0 } else { @@ -1132,7 +1388,6 @@ pub async fn download_whisper_model( .map_err(|e| format!("Failed to emit progress: {e}"))?; } - // Ensure file is properly written file.flush() .await .map_err(|e| format!("Failed to flush file: {e}"))?; @@ -1140,7 +1395,6 @@ pub async fn download_whisper_model( Ok(()) } -/// Function to check if a model file exists #[tauri::command] #[specta::specta] #[instrument] @@ -1148,7 +1402,6 @@ pub async fn check_model_exists(model_path: String) -> Result { Ok(std::path::Path::new(&model_path).exists()) } -/// Function to delete a downloaded model #[tauri::command] #[specta::specta] #[instrument] @@ -1164,15 +1417,12 @@ pub async fn delete_whisper_model(model_path: String) -> Result<(), String> { Ok(()) } -/// Convert caption segments to SRT format fn captions_to_srt(captions: &CaptionData) -> String { let mut srt = String::new(); for (i, segment) in captions.segments.iter().enumerate() { - // Convert start and end times from seconds to HH:MM:SS,mmm format let start_time = format_srt_time(f64::from(segment.start)); let end_time = format_srt_time(f64::from(segment.end)); - // Write SRT entry srt.push_str(&format!( "{}\n{} --> {}\n{}\n\n", i + 1, @@ -1184,7 +1434,6 @@ fn captions_to_srt(captions: &CaptionData) -> String { srt } -/// Format time in seconds to SRT time format (HH:MM:SS,mmm) fn format_srt_time(seconds: f64) -> String { let hours = (seconds / 3600.0) as i32; let minutes = ((seconds % 3600.0) / 60.0) as i32; @@ -1193,7 +1442,6 @@ fn format_srt_time(seconds: f64) -> String { format!("{hours:02}:{minutes:02}:{secs:02},{millis:03}") } -/// Export captions to an SRT file #[tauri::command] #[specta::specta] #[instrument(skip(app))] @@ -1203,7 +1451,6 @@ pub async fn export_captions_srt( ) -> Result, String> { tracing::info!("Starting SRT export for video_id: {}", video_id); - // Load captions let captions = match load_captions(app.clone(), video_id.clone()).await? { Some(c) => { tracing::info!("Found {} caption segments to export", c.segments.len()); @@ -1215,8 +1462,6 @@ pub async fn export_captions_srt( } }; - // Ensure we have settings (this should already be handled by load_captions, - // but we add this check for extra safety) let captions_with_settings = CaptionData { segments: captions.segments, settings: captions @@ -1224,16 +1469,13 @@ pub async fn export_captions_srt( .or_else(|| Some(CaptionSettings::default())), }; - // Convert to SRT format tracing::info!("Converting captions to SRT format"); let srt_content = captions_to_srt(&captions_with_settings); - // Get path for SRT file let captions_dir = app_captions_dir(&app, &video_id)?; let srt_path = captions_dir.join("captions.srt"); tracing::info!("Will write SRT file to: {:?}", srt_path); - // Write SRT file match std::fs::write(&srt_path, srt_content) { Ok(_) => { tracing::info!("Successfully wrote SRT file to: {:?}", srt_path); @@ -1246,7 +1488,6 @@ pub async fn export_captions_srt( } } -// Helper function to convert multi-channel audio to mono fn convert_to_mono(samples: &[f32], channels: usize) -> Vec { if channels == 1 { return samples.to_vec(); @@ -1266,11 +1507,9 @@ fn convert_to_mono(samples: &[f32], channels: usize) -> Vec { mono_samples } -// Helper function to mix two sample arrays together fn mix_samples(dest: &mut [f32], source: &[f32]) -> usize { let length = dest.len().min(source.len()); for i in 0..length { - // Simple mix with equal weight (0.5) to prevent clipping dest[i] = (dest[i] + source[i]) * 0.5; } length diff --git a/apps/desktop/src-tauri/src/general_settings.rs b/apps/desktop/src-tauri/src/general_settings.rs index 1b193e75b3..1d3fc7fa61 100644 --- a/apps/desktop/src-tauri/src/general_settings.rs +++ b/apps/desktop/src-tauri/src/general_settings.rs @@ -122,6 +122,8 @@ pub struct GeneralSettingsStore { pub delete_instant_recordings_after_upload: bool, #[serde(default = "default_instant_mode_max_resolution")] pub instant_mode_max_resolution: u32, + #[serde(default)] + pub default_project_name_template: Option, } fn default_enable_native_camera_preview() -> bool { @@ -187,6 +189,7 @@ impl Default for GeneralSettingsStore { excluded_windows: default_excluded_windows(), delete_instant_recordings_after_upload: false, instant_mode_max_resolution: 1920, + default_project_name_template: None, } } } diff --git a/apps/desktop/src-tauri/src/lib.rs b/apps/desktop/src-tauri/src/lib.rs index 4e776fe3b1..7fa14ab281 100644 --- a/apps/desktop/src-tauri/src/lib.rs +++ b/apps/desktop/src-tauri/src/lib.rs @@ -26,6 +26,7 @@ mod screenshot_editor; mod target_select_overlay; mod thumbnails; mod tray; +mod update_project_names; mod upload; mod web_api; mod window_exclusion; @@ -649,10 +650,18 @@ fn spawn_camera_watcher(app_handle: AppHandle) { ) }; - if should_check && let Some(selected_id) = camera_id { - let available = is_camera_available(&selected_id); + if should_check && let Some(ref selected_id) = camera_id { + let available = is_camera_available(selected_id); + debug!( + "Camera watcher: checking availability for {:?}, available={}, is_marked={}", + selected_id, available, is_marked + ); if !available && !is_marked { + warn!( + "Camera watcher: camera {:?} detected as unavailable, pausing recording", + selected_id + ); let mut app = state.write().await; if let Err(err) = app .handle_input_disconnect(RecordingInputKind::Camera) @@ -674,7 +683,21 @@ fn spawn_camera_watcher(app_handle: AppHandle) { } fn is_camera_available(id: &DeviceOrModelID) -> bool { - cap_camera::list_cameras().any(|info| match id { + let cameras: Vec<_> = cap_camera::list_cameras().collect(); + debug!( + "is_camera_available: looking for {:?} in {} cameras", + id, + cameras.len() + ); + for camera in &cameras { + debug!( + " - device_id={}, model_id={:?}, name={}", + camera.device_id(), + camera.model_id(), + camera.display_name() + ); + } + cameras.iter().any(|info| match id { DeviceOrModelID::DeviceID(device_id) => info.device_id() == device_id, DeviceOrModelID::ModelID(model_id) => { info.model_id().is_some_and(|existing| existing == model_id) @@ -1470,6 +1493,17 @@ async fn set_project_config( Ok(()) } +#[tauri::command] +#[specta::specta] +#[instrument(skip(editor_instance))] +async fn update_project_config_in_memory( + editor_instance: WindowEditorInstance, + config: ProjectConfiguration, +) -> Result<(), String> { + editor_instance.project_config.0.send(config).ok(); + Ok(()) +} + #[tauri::command] #[specta::specta] #[instrument(skip(editor_instance))] @@ -2301,6 +2335,7 @@ pub async fn run(recording_logging_handle: LoggingHandle, logs_dir: PathBuf) { stop_playback, set_playhead_position, set_project_config, + update_project_config_in_memory, generate_zoom_segments_from_clicks, permissions::open_permission_settings, permissions::do_permissions_check, @@ -2351,7 +2386,8 @@ pub async fn run(recording_logging_handle: LoggingHandle, logs_dir: PathBuf) { target_select_overlay::display_information, target_select_overlay::get_window_icon, target_select_overlay::focus_window, - editor_delete_project + editor_delete_project, + format_project_name, ]) .events(tauri_specta::collect_events![ RecordingOptionsChanged, @@ -2496,6 +2532,11 @@ pub async fn run(recording_logging_handle: LoggingHandle, logs_dir: PathBuf) { .invoke_handler(specta_builder.invoke_handler()) .setup(move |app| { let app = app.handle().clone(); + + if let Err(err) = update_project_names::migrate_if_needed(&app) { + tracing::error!("Failed to migrate project file names: {}", err); + } + specta_builder.mount_events(&app); hotkeys::init(&app); general_settings::init(&app); @@ -3061,13 +3102,13 @@ async fn create_editor_instance_impl( RenderFrameEvent::listen_any(&app, { let preview_tx = instance.preview_tx.clone(); move |e| { - preview_tx - .send(Some(( + preview_tx.send_modify(|v| { + *v = Some(( e.payload.frame_number, e.payload.fps, e.payload.resolution_base, - ))) - .ok(); + )); + }); } }); @@ -3116,6 +3157,24 @@ async fn write_clipboard_string( .map_err(|e| format!("Failed to write text to clipboard: {e}")) } +#[tauri::command(async)] +#[specta::specta] +fn format_project_name( + template: Option, + target_name: String, + target_kind: String, + recording_mode: RecordingMode, + datetime: Option>, +) -> String { + recording::format_project_name( + template.as_deref(), + target_name.as_str(), + target_kind.as_str(), + recording_mode, + datetime, + ) +} + trait EventExt: tauri_specta::Event { fn listen_any_spawn( app: &AppHandle, diff --git a/apps/desktop/src-tauri/src/recording.rs b/apps/desktop/src-tauri/src/recording.rs index 0fc5b6cb73..5d7062d7f7 100644 --- a/apps/desktop/src-tauri/src/recording.rs +++ b/apps/desktop/src-tauri/src/recording.rs @@ -23,10 +23,13 @@ use cap_recording::{ studio_recording, }; use cap_rendering::ProjectRecordingsMeta; -use cap_utils::{ensure_dir, spawn_actor}; +use cap_utils::{ensure_dir, moment_format_to_chrono, spawn_actor}; use futures::{FutureExt, stream}; +use lazy_static::lazy_static; +use regex::Regex; use serde::{Deserialize, Serialize}; use specta::Type; +use std::borrow::Cow; use std::{ any::Any, collections::{HashMap, VecDeque}, @@ -76,12 +79,12 @@ pub enum InProgressRecording { progressive_upload: InstantMultipartUpload, video_upload_info: VideoUploadInfo, common: InProgressRecordingCommon, - // camera isn't used as part of recording pipeline so we hold lock here camera_feed: Option>, }, Studio { handle: studio_recording::ActorHandle, common: InProgressRecordingCommon, + camera_feed: Option>, }, } @@ -349,6 +352,82 @@ pub enum RecordingAction { UpgradeRequired, } +pub fn format_project_name<'a>( + template: Option<&str>, + target_name: &'a str, + target_kind: &'a str, + recording_mode: RecordingMode, + datetime: Option>, +) -> String { + const DEFAULT_FILENAME_TEMPLATE: &str = "{target_name} ({target_kind}) {date} {time}"; + let datetime = datetime.unwrap_or(chrono::Local::now()); + + lazy_static! { + static ref DATE_REGEX: Regex = Regex::new(r"\{date(?::([^}]+))?\}").unwrap(); + static ref TIME_REGEX: Regex = Regex::new(r"\{time(?::([^}]+))?\}").unwrap(); + static ref MOMENT_REGEX: Regex = Regex::new(r"\{moment(?::([^}]+))?\}").unwrap(); + static ref AC: aho_corasick::AhoCorasick = { + aho_corasick::AhoCorasick::new([ + "{recording_mode}", + "{mode}", + "{target_kind}", + "{target_name}", + ]) + .expect("Failed to build AhoCorasick automaton") + }; + } + let haystack = template.unwrap_or(DEFAULT_FILENAME_TEMPLATE); + + // Get recording mode information + let (recording_mode, mode) = match recording_mode { + RecordingMode::Studio => ("Studio", "studio"), + RecordingMode::Instant => ("Instant", "instant"), + RecordingMode::Screenshot => ("Screenshot", "screenshot"), + }; + + let result = AC + .try_replace_all(haystack, &[recording_mode, mode, target_kind, target_name]) + .expect("AhoCorasick replace should never fail with default configuration"); + + let result = DATE_REGEX.replace_all(&result, |caps: ®ex::Captures| { + datetime + .format( + &caps + .get(1) + .map(|m| m.as_str()) + .map(moment_format_to_chrono) + .unwrap_or(Cow::Borrowed("%Y-%m-%d")), + ) + .to_string() + }); + + let result = TIME_REGEX.replace_all(&result, |caps: ®ex::Captures| { + datetime + .format( + &caps + .get(1) + .map(|m| m.as_str()) + .map(moment_format_to_chrono) + .unwrap_or(Cow::Borrowed("%I:%M %p")), + ) + .to_string() + }); + + let result = MOMENT_REGEX.replace_all(&result, |caps: ®ex::Captures| { + datetime + .format( + &caps + .get(1) + .map(|m| m.as_str()) + .map(moment_format_to_chrono) + .unwrap_or(Cow::Borrowed("%Y-%m-%d %H:%M")), + ) + .to_string() + }); + + result.into_owned() +} + #[tauri::command] #[specta::specta] #[tracing::instrument(name = "recording", skip_all)] @@ -361,34 +440,41 @@ pub async fn start_recording( return Err("Recording already in progress".to_string()); } - let id = uuid::Uuid::new_v4().to_string(); let general_settings = GeneralSettingsStore::get(&app).ok().flatten(); let general_settings = general_settings.as_ref(); - let recording_dir = app - .path() - .app_data_dir() - .unwrap() - .join("recordings") - .join(format!("{id}.cap")); + let project_name = format_project_name( + general_settings + .and_then(|s| s.default_project_name_template.clone()) + .as_deref(), + inputs + .capture_target + .title() + .as_deref() + .unwrap_or("Unknown"), + inputs.capture_target.kind_str(), + inputs.mode, + None, + ); + + let filename = project_name.replace(":", "."); + let filename = format!("{}.cap", sanitize_filename::sanitize(&filename)); + + let recordings_base_dir = app.path().app_data_dir().unwrap().join("recordings"); - ensure_dir(&recording_dir).map_err(|e| format!("Failed to create recording directory: {e}"))?; + let project_file_path = recordings_base_dir.join(&cap_utils::ensure_unique_filename( + &filename, + &recordings_base_dir, + )?); + + ensure_dir(&project_file_path) + .map_err(|e| format!("Failed to create recording directory: {e}"))?; state_mtx .write() .await - .add_recording_logging_handle(&recording_dir.join("recording-logs.log")) + .add_recording_logging_handle(&project_file_path.join("recording-logs.log")) .await?; - let target_name = { - let title = inputs.capture_target.title(); - - match inputs.capture_target.clone() { - ScreenCaptureTarget::Area { .. } => title.unwrap_or_else(|| "Area".to_string()), - ScreenCaptureTarget::Window { .. } => title.unwrap_or_else(|| "Window".to_string()), - ScreenCaptureTarget::Display { .. } => title.unwrap_or_else(|| "Screen".to_string()), - } - }; - if let Some(window) = CapWindowId::Camera.get(&app) { let _ = window.set_content_protected(matches!(inputs.mode, RecordingMode::Studio)); } @@ -402,10 +488,7 @@ pub async fn start_recording( &app, false, None, - Some(format!( - "{target_name} {}", - chrono::Local::now().format("%Y-%m-%d %H:%M:%S") - )), + Some(project_name.clone()), None, inputs.organization_id.clone(), ) @@ -444,17 +527,10 @@ pub async fn start_recording( RecordingMode::Screenshot => return Err("Use take_screenshot for screenshots".to_string()), }; - let date_time = if cfg!(windows) { - // Windows doesn't support colon in file paths - chrono::Local::now().format("%Y-%m-%d %H.%M.%S") - } else { - chrono::Local::now().format("%Y-%m-%d %H:%M:%S") - }; - let meta = RecordingMeta { platform: Some(Platform::default()), - project_path: recording_dir.clone(), - pretty_name: format!("{target_name} {date_time}"), + project_path: project_file_path.clone(), + pretty_name: project_name.clone(), inner: match inputs.mode { RecordingMode::Studio => { RecordingMetaInner::Studio(StudioRecordingMeta::MultipleSegments { @@ -544,8 +620,7 @@ pub async fn start_recording( let actor_task = { let state_mtx = Arc::clone(&state_mtx); let general_settings = general_settings.cloned(); - let recording_dir = recording_dir.clone(); - let target_name = target_name.clone(); + let recording_dir = project_file_path.clone(); let inputs = inputs.clone(); async move { fail!("recording::spawn_actor"); @@ -607,7 +682,7 @@ pub async fn start_recording( acquire_shareable_content_for_target(&inputs.capture_target).await?; let common = InProgressRecordingCommon { - target_name, + target_name: project_name, inputs: inputs.clone(), recording_dir: recording_dir.clone(), }; @@ -674,6 +749,7 @@ pub async fn start_recording( Ok(InProgressRecording::Studio { handle, common: common.clone(), + camera_feed: camera_feed.clone(), }) } RecordingMode::Instant => { @@ -769,15 +845,25 @@ pub async fn start_recording( Ok(Ok(rx)) => rx, Ok(Err(err)) => { let message = format!("{err:#}"); - handle_spawn_failure(&app, &state_mtx, recording_dir.as_path(), message.clone()) - .await?; + handle_spawn_failure( + &app, + &state_mtx, + project_file_path.as_path(), + message.clone(), + ) + .await?; return Err(message); } Err(panic) => { let panic_msg = panic_message(panic); let message = format!("Failed to spawn recording actor: {panic_msg}"); - handle_spawn_failure(&app, &state_mtx, recording_dir.as_path(), message.clone()) - .await?; + handle_spawn_failure( + &app, + &state_mtx, + project_file_path.as_path(), + message.clone(), + ) + .await?; return Err(message); } }; @@ -818,7 +904,7 @@ pub async fn start_recording( dialog.blocking_show(); // this clears the current recording for us - handle_recording_end(app, Err(e.to_string()), &mut state, recording_dir) + handle_recording_end(app, Err(e.to_string()), &mut state, project_file_path) .await .ok(); } @@ -1035,6 +1121,19 @@ pub async fn take_screenshot( use image::ImageEncoder; use std::time::Instant; + let general_settings = GeneralSettingsStore::get(&app).ok().flatten(); + let general_settings = general_settings.as_ref(); + + let project_name = format_project_name( + general_settings + .and_then(|s| s.default_project_name_template.clone()) + .as_deref(), + target.title().as_deref().unwrap_or("Unknown"), + target.kind_str(), + RecordingMode::Screenshot, + None, + ); + let image = capture_screenshot(target) .await .map_err(|e| format!("Failed to capture screenshot: {e}"))?; @@ -1043,23 +1142,22 @@ pub async fn take_screenshot( let image_height = image.height(); let image_data = image.into_raw(); - let screenshots_dir = app.path().app_data_dir().unwrap().join("screenshots"); + let filename = project_name.replace(":", "."); + let filename = format!("{}.cap", sanitize_filename::sanitize(&filename)); - std::fs::create_dir_all(&screenshots_dir).map_err(|e| e.to_string())?; + let screenshots_base_dir = app.path().app_data_dir().unwrap().join("screenshots"); - let date_time = if cfg!(windows) { - chrono::Local::now().format("%Y-%m-%d %H.%M.%S") - } else { - chrono::Local::now().format("%Y-%m-%d %H:%M:%S") - }; + let project_file_path = screenshots_base_dir.join(&cap_utils::ensure_unique_filename( + &filename, + &screenshots_base_dir, + )?); - let id = uuid::Uuid::new_v4().to_string(); - let cap_dir = screenshots_dir.join(format!("{id}.cap")); - std::fs::create_dir_all(&cap_dir).map_err(|e| e.to_string())?; + ensure_dir(&project_file_path) + .map_err(|e| format!("Failed to create screenshots directory: {e}"))?; let image_filename = "original.png"; - let image_path = cap_dir.join(image_filename); - let cap_dir_key = cap_dir.to_string_lossy().to_string(); + let image_path = project_file_path.join(image_filename); + let cap_dir_key = project_file_path.to_string_lossy().to_string(); let pending_screenshots = app.state::(); pending_screenshots.insert( @@ -1089,8 +1187,8 @@ pub async fn take_screenshot( let meta = cap_project::RecordingMeta { platform: Some(Platform::default()), - project_path: cap_dir.clone(), - pretty_name: format!("Screenshot {}", date_time), + project_path: project_file_path.clone(), + pretty_name: project_name, sharing: None, inner: cap_project::RecordingMetaInner::Studio( cap_project::StudioRecordingMeta::SingleSegment { segment }, @@ -1102,7 +1200,7 @@ pub async fn take_screenshot( .map_err(|e| format!("Failed to save recording meta: {e}"))?; cap_project::ProjectConfiguration::default() - .write(&cap_dir) + .write(&project_file_path) .map_err(|e| format!("Failed to save project config: {e}"))?; let is_large_capture = (image_width as u64).saturating_mul(image_height as u64) > 8_000_000; @@ -1705,6 +1803,8 @@ fn project_config_from_recording( segments: timeline_segments, zoom_segments, scene_segments: Vec::new(), + mask_segments: Vec::new(), + text_segments: Vec::new(), }); config diff --git a/apps/desktop/src-tauri/src/target_select_overlay.rs b/apps/desktop/src-tauri/src/target_select_overlay.rs index d52424235e..1ea7a0642f 100644 --- a/apps/desktop/src-tauri/src/target_select_overlay.rs +++ b/apps/desktop/src-tauri/src/target_select_overlay.rs @@ -15,7 +15,7 @@ use crate::{ }; use scap_targets::{ Display, DisplayId, Window, WindowId, - bounds::{LogicalBounds, PhysicalSize}, + bounds::{LogicalBounds, LogicalSize, PhysicalSize}, }; use serde::Serialize; use specta::Type; @@ -42,6 +42,7 @@ pub struct WindowUnderCursor { pub struct DisplayInformation { name: Option, physical_size: Option, + logical_size: Option, refresh_rate: String, } @@ -217,6 +218,7 @@ pub async fn display_information(display_id: &str) -> Result Result<(), String> { + use tauri_plugin_store::StoreExt; + + let store = app + .store("store") + .map_err(|e| format!("Failed to access store: {}", e))?; + + if store + .get(STORE_KEY) + .and_then(|v| v.as_bool()) + .unwrap_or(false) + { + return Ok(()); + } + + if let Err(err) = futures::executor::block_on(migrate(app)) { + tracing::error!("Updating project names failed: {err}"); + } + + store.set(STORE_KEY, true); + store + .save() + .map_err(|e| format!("Failed to save store: {}", e))?; + + Ok(()) +} + +use std::time::Instant; + +/// Performs a one-time migration of all UUID-named projects to pretty name-based naming. +pub async fn migrate(app: &AppHandle) -> Result<(), String> { + let recordings_dir = recordings_path(app); + if !fs::try_exists(&recordings_dir) + .await + .map_err(|e| format!("Failed to check recordings directory: {}", e))? + { + return Ok(()); + } + + let uuid_projects = collect_uuid_projects(&recordings_dir).await?; + if uuid_projects.is_empty() { + tracing::debug!("No UUID-named projects found to migrate"); + return Ok(()); + } + + tracing::info!( + "Found {} UUID-named projects to migrate", + uuid_projects.len() + ); + + let total_found = uuid_projects.len(); + let concurrency_limit = std::thread::available_parallelism() + .map(|n| n.get()) + .unwrap_or(4) + .clamp(2, 16) + .min(total_found); + tracing::debug!("Using concurrency limit of {}", concurrency_limit); + + let wall_start = Instant::now(); + let in_flight_bases = Arc::new(Mutex::new(HashSet::new())); + + // (project_name, result, duration) + let migration_results = futures::stream::iter(uuid_projects) + .map(|project_path| { + let in_flight = in_flight_bases.clone(); + async move { + let project_name = project_path + .file_name() + .map(|s| s.to_string_lossy().into_owned()) + .unwrap_or_else(|| project_path.display().to_string()); + + let start = Instant::now(); + let res = migrate_single_project(project_path, in_flight).await; + let dur = start.elapsed(); + + (project_name, res, dur) + } + }) + .buffer_unordered(concurrency_limit) + .collect::>() + .await; + + let wall_elapsed = wall_start.elapsed(); + + let mut migrated = 0usize; + let mut skipped = 0usize; + let mut failed = 0usize; + + let mut total_ms: u128 = 0; + let mut per_project: Vec<(String, std::time::Duration)> = + Vec::with_capacity(migration_results.len()); + + for (name, result, dur) in migration_results.into_iter() { + match result { + Ok(ProjectMigrationResult::Migrated) => migrated += 1, + Ok(ProjectMigrationResult::Skipped) => skipped += 1, + Err(_) => failed += 1, + } + total_ms += dur.as_millis(); + per_project.push((name, dur)); + } + + let avg_ms = if total_found > 0 { + (total_ms as f64) / (total_found as f64) + } else { + 0.0 + }; + + // Sort by duration descending to pick slowest + per_project.sort_by(|a, b| b.1.cmp(&a.1)); + + tracing::info!( + total_found = total_found, + migrated = migrated, + skipped = skipped, + failed = failed, + wall_ms = wall_elapsed.as_millis(), + avg_per_project_ms = ?avg_ms, + "Migration complete" + ); + + // Log top slowest N (choose 5 or less) + let top_n = 5.min(per_project.len()); + if top_n > 0 { + tracing::info!("Top {} slowest project migrations:", top_n); + for (name, dur) in per_project.into_iter().take(top_n) { + tracing::info!(project = %name, ms = dur.as_millis()); + } + } + + Ok(()) +} + +async fn collect_uuid_projects(recordings_dir: &Path) -> Result, String> { + let mut uuid_projects = Vec::new(); + let mut entries = fs::read_dir(recordings_dir) + .await + .map_err(|e| format!("Failed to read recordings directory: {}", e))?; + + while let Some(entry) = entries + .next_entry() + .await + .map_err(|e| format!("Failed to read directory entry: {}", e))? + { + let path = entry.path(); + if !path.is_dir() { + continue; + } + + let Some(filename) = path.file_name().and_then(|s| s.to_str()) else { + continue; + }; + + if filename.ends_with(".cap") && fast_is_project_filename_uuid(filename) { + uuid_projects.push(path); + } + } + + Ok(uuid_projects) +} + +#[derive(Debug)] +enum ProjectMigrationResult { + Migrated, + Skipped, +} + +async fn migrate_single_project( + path: PathBuf, + in_flight_basis: Arc>>, +) -> Result { + let filename = path + .file_name() + .and_then(|s| s.to_str()) + .unwrap_or("unknown"); + + let meta = match RecordingMeta::load_for_project(&path) { + Ok(meta) => meta, + Err(e) => { + tracing::warn!("Failed to load metadata for {}: {}", filename, e); + return Err(format!("Failed to load metadata: {}", e)); + } + }; + + // Lock on the base sanitized name to prevent concurrent migrations with same target + let base_name = sanitize_filename::sanitize(meta.pretty_name.replace(":", ".")); + { + let mut in_flight = in_flight_basis.lock().await; + let mut wait_count = 0; + while !in_flight.insert(base_name.clone()) { + wait_count += 1; + if wait_count == 1 { + tracing::debug!( + "Project {} waiting for concurrent migration of base name \"{}\"", + filename, + base_name + ); + } + drop(in_flight); + tokio::time::sleep(std::time::Duration::from_millis(5)).await; + in_flight = in_flight_basis.lock().await; + } + if wait_count > 0 { + tracing::debug!( + "Project {} acquired lock for \"{}\" after {} waits", + filename, + base_name, + wait_count + ); + } + } + + let result = migrate_project_filename_async(&path, &meta).await; + + in_flight_basis.lock().await.remove(&base_name); + + match result { + Ok(new_path) => { + if new_path != path { + let new_name = new_path.file_name().unwrap().to_string_lossy(); + tracing::info!("Updated name: \"{}\" -> \"{}\"", filename, new_name); + Ok(ProjectMigrationResult::Migrated) + } else { + Ok(ProjectMigrationResult::Skipped) + } + } + Err(e) => { + tracing::error!("Failed to migrate {}: {}", filename, e); + Err(e) + } + } +} + +/// Migrates a project filename from UUID to sanitized pretty name +async fn migrate_project_filename_async( + project_path: &Path, + meta: &RecordingMeta, +) -> Result { + let sanitized = sanitize_filename::sanitize(meta.pretty_name.replace(":", ".")); + + let filename = if sanitized.ends_with(".cap") { + sanitized + } else { + format!("{}.cap", sanitized) + }; + + let parent_dir = project_path + .parent() + .ok_or("Project path has no parent directory")?; + + let unique_filename = cap_utils::ensure_unique_filename(&filename, parent_dir) + .map_err(|e| format!("Failed to ensure unique filename: {}", e))?; + + let final_path = parent_dir.join(&unique_filename); + + fs::rename(project_path, &final_path) + .await + .map_err(|e| format!("Failed to rename project directory: {}", e))?; + + Ok(final_path) +} + +pub fn fast_is_project_filename_uuid(filename: &str) -> bool { + if filename.len() != 40 || !filename.ends_with(".cap") { + return false; + } + + let uuid_part = &filename[..36]; + + if uuid_part.as_bytes()[8] != b'-' + || uuid_part.as_bytes()[13] != b'-' + || uuid_part.as_bytes()[18] != b'-' + || uuid_part.as_bytes()[23] != b'-' + { + return false; + } + + uuid_part.chars().all(|c| c.is_ascii_hexdigit() || c == '-') +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_is_project_filename_uuid() { + // Valid UUID + assert!(fast_is_project_filename_uuid( + "a1b2c3d4-e5f6-7890-abcd-ef1234567890.cap" + )); + assert!(fast_is_project_filename_uuid( + "00000000-0000-0000-0000-000000000000.cap" + )); + + // Invalid cases + assert!(!fast_is_project_filename_uuid("my-project-name.cap")); + assert!(!fast_is_project_filename_uuid( + "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + )); + assert!(!fast_is_project_filename_uuid( + "a1b2c3d4-e5f6-7890-abcd-ef1234567890.txt" + )); + assert!(!fast_is_project_filename_uuid( + "g1b2c3d4-e5f6-7890-abcd-ef1234567890.cap" + )); + } +} diff --git a/apps/desktop/src/components/Toggle.tsx b/apps/desktop/src/components/Toggle.tsx index 60f29d664e..c16036fd33 100644 --- a/apps/desktop/src/components/Toggle.tsx +++ b/apps/desktop/src/components/Toggle.tsx @@ -40,8 +40,8 @@ export function Toggle( const [local, others] = splitProps(props, ["size"]); return ( - - + + diff --git a/apps/desktop/src/routes/(window-chrome)/settings/general.tsx b/apps/desktop/src/routes/(window-chrome)/settings/general.tsx index 38a48f8b4f..bf282d4b5b 100644 --- a/apps/desktop/src/routes/(window-chrome)/settings/general.tsx +++ b/apps/desktop/src/routes/(window-chrome)/settings/general.tsx @@ -6,6 +6,7 @@ import { } from "@tauri-apps/plugin-notification"; import { type OsType, type } from "@tauri-apps/plugin-os"; import "@total-typescript/ts-reset/filter-boolean"; +import { Collapsible } from "@kobalte/core/collapsible"; import { CheckMenuItem, Menu, MenuItem } from "@tauri-apps/api/menu"; import { confirm } from "@tauri-apps/plugin-dialog"; import { cx } from "cva"; @@ -13,7 +14,9 @@ import { createEffect, createMemo, createResource, + createSignal, For, + onMount, type ParentProps, Show, } from "solid-js"; @@ -102,6 +105,9 @@ const INSTANT_MODE_RESOLUTION_OPTIONS = [ label: string; }[]; +const DEFAULT_PROJECT_NAME_TEMPLATE = + "{target_name} ({target_kind}) {date} {time}"; + export default function GeneralSettings() { const [store] = createResource(() => generalSettingsStore.get()); @@ -568,6 +574,13 @@ function Inner(props: { initialStore: GeneralSettingsStore | null }) { /> + + handleChange("defaultProjectNameTemplate", value) + } + value={settings.defaultProjectNameTemplate ?? null} + /> + Promise; +}) { + const MOMENT_EXAMPLE_TEMPLATE = "{moment:DDDD, MMMM D, YYYY h:mm A}"; + const macos = type() === "macos"; + const today = new Date(); + const datetime = new Date( + today.getFullYear(), + today.getMonth(), + today.getDate(), + macos ? 9 : 12, + macos ? 41 : 0, + 0, + 0, + ).toISOString(); + + let inputRef: HTMLInputElement | undefined; + + const dateString = today.toISOString().split("T")[0]; + const initialTemplate = () => props.value ?? DEFAULT_PROJECT_NAME_TEMPLATE; + + const [inputValue, setInputValue] = createSignal(initialTemplate()); + const [preview, setPreview] = createSignal(null); + const [momentExample, setMomentExample] = createSignal(""); + + async function updatePreview(val = inputValue()) { + const formatted = await commands.formatProjectName( + val, + macos ? "Safari" : "Chrome", + "Window", + "instant", + datetime, + ); + setPreview(formatted); + } + + onMount(() => { + commands + .formatProjectName( + MOMENT_EXAMPLE_TEMPLATE, + macos ? "Safari" : "Chrome", + "Window", + "instant", + datetime, + ) + .then(setMomentExample); + + const seed = initialTemplate(); + setInputValue(seed); + if (inputRef) inputRef.value = seed; + updatePreview(seed); + }); + + const isSaveDisabled = () => { + const input = inputValue(); + return ( + !input || + input === (props.value ?? DEFAULT_PROJECT_NAME_TEMPLATE) || + input.length <= 3 + ); + }; + + function CodeView(props: { children: string }) { + return ( + + ); + } + + return ( +
+
+
+

Default Project Name

+

+ Choose the template to use as the default project and file name. +

+
+
+ + + +
+
+ +
+ { + setInputValue(e.currentTarget.value); + updatePreview(e.currentTarget.value); + }} + /> + +
+ +

{preview()}

+
+ + + + +

How to customize?

+
+ + +

+ Use placeholders in your template that will be automatically + filled in. +

+ +
+

Recording Mode

+

+ {"{recording_mode}"} → "Studio", "Instant", + or "Screenshot" +

+

+ {"{mode}"} → "studio", "instant", or + "screenshot" +

+
+ +
+

Target

+

+ {"{target_kind}"} → "Display", "Window", or + "Area" +

+

+ {"{target_name}"} → The name of the monitor + or the title of the app depending on the recording mode. +

+
+ +
+

Date & Time

+

+ {"{date}"} → {dateString} +

+

+ {"{time}"} →{" "} + {macos ? "09:41 AM" : "12:00 PM"} +

+
+ +
+

Custom Formats

+

+ You can also use a custom format for time. The placeholders are + case-sensitive. For 24-hour time, use{" "} + {"{moment:HH:mm}"} or use lower cased{" "} + hh for 12-hour format. +

+

+ {MOMENT_EXAMPLE_TEMPLATE} →{" "} + {momentExample()} +

+
+
+
+
+
+ ); +} + function ExcludedWindowsCard(props: { excludedWindows: WindowExclusion[]; availableWindows: CaptureWindow[]; @@ -735,7 +951,7 @@ function ExcludedWindowsCard(props: {

-
+
- ); +interface PositionOption { + value: string; + label: string; } +const POSITION_OPTIONS: PositionOption[] = [ + { value: "top-left", label: "Top Left" }, + { value: "top-center", label: "Top Center" }, + { value: "top-right", label: "Top Right" }, + { value: "bottom-left", label: "Bottom Left" }, + { value: "bottom-center", label: "Bottom Center" }, + { value: "bottom-right", label: "Bottom Right" }, +]; + +const DEFAULT_MODEL = "small"; +const MODEL_FOLDER = "transcription_models"; + const fontOptions = [ { value: "System Sans-Serif", label: "System Sans-Serif" }, { value: "System Serif", label: "System Serif" }, { value: "System Monospace", label: "System Monospace" }, ]; -// Add type definitions at the top -interface CaptionsResponse { - segments: CaptionSegment[]; -} - -// Color conversion types -type RGB = [number, number, number]; - -// Helper functions for color conversion -function hexToRgb(hex: string): RGB { - const result = /^#?([a-f\d]{2})([a-f\d]{2})([a-f\d]{2})$/i.exec(hex); - return result - ? [ - parseInt(result[1], 16), - parseInt(result[2], 16), - parseInt(result[3], 16), - ] - : [0, 0, 0]; -} - -function rgbToHex(rgb: RGB): string { - return `#${rgb.map((x) => x.toString(16).padStart(2, "0")).join("")}`; -} - -// Add RgbInput component at the top level function RgbInput(props: { value: string; onChange: (value: string) => void }) { const [text, setText] = createWritableMemo(() => props.value); let prevColor = props.value; @@ -164,115 +146,29 @@ function RgbInput(props: { value: string; onChange: (value: string) => void }) { ); } -// Add scroll position preservation for the container export function CaptionsTab() { const { project, setProject, editorInstance, editorState } = useEditorContext(); - // Scroll management - let scrollContainerRef: HTMLDivElement | undefined; - const [scrollState, setScrollState] = createStore({ - lastScrollTop: 0, - isScrolling: false, - }); - - // Track container size changes - const size = createElementSize(() => scrollContainerRef); - - // Create a local store for caption settings to avoid direct project mutations - const [captionSettings, setCaptionSettings] = createStore( - project?.captions?.settings || { - enabled: false, - font: "Arial", - size: 24, - color: "#FFFFFF", - backgroundColor: "#000000", - backgroundOpacity: 80, - position: "bottom", - bold: true, - italic: false, - outline: true, - outlineColor: "#000000", - exportWithSubtitles: false, - }, - ); + const getSetting = ( + key: K, + ): NonNullable => + (project?.captions?.settings?.[key] ?? + defaultCaptionSettings[key]) as NonNullable; - // Sync caption settings with project and update player - createEffect(() => { - if (!project?.captions) return; - - const settings = captionSettings; - - // Only update if there are actual changes - if ( - JSON.stringify(settings) !== JSON.stringify(project.captions.settings) - ) { - batch(() => { - // Update project settings - setProject("captions", "settings", settings); - - // Force player refresh - events.renderFrameEvent.emit({ - frame_number: Math.floor(editorState.playbackTime * FPS), - fps: FPS, - resolution_base: OUTPUT_SIZE, - }); - }); - } - }); - - // Sync project settings to local store - createEffect(() => { - if (project?.captions?.settings) { - setCaptionSettings(project.captions.settings); - } - }); - - // Helper function to update caption settings - const updateCaptionSetting = (key: keyof CaptionSettings, value: any) => { + const updateCaptionSetting = ( + key: K, + value: CaptionSettings[K], + ) => { if (!project?.captions) return; - // Store scroll position before update - if (scrollContainerRef) { - setScrollState("lastScrollTop", scrollContainerRef.scrollTop); - } - - // Update local store - setCaptionSettings({ - ...captionSettings, - [key]: value, - }); - - // For font changes, force an immediate player update - if (key === "font") { - events.renderFrameEvent.emit({ - frame_number: Math.floor(editorState.playbackTime * FPS), - fps: FPS, - resolution_base: OUTPUT_SIZE, - }); - } + setProject("captions", "settings", key, value); }; - // Restore scroll position after any content changes - createEffect(() => { - // Track any size changes - const _ = size.height; - - // Restore scroll position if we have one - if (scrollContainerRef && scrollState.lastScrollTop > 0) { - requestAnimationFrame(() => { - scrollContainerRef!.scrollTop = scrollState.lastScrollTop; - }); - } - }); - - // Add model selection state const [selectedModel, setSelectedModel] = createSignal(DEFAULT_MODEL); const [selectedLanguage, setSelectedLanguage] = createSignal("auto"); const [downloadedModels, setDownloadedModels] = createSignal([]); - // States for captions - const [modelExists, setModelExists] = createSignal(false); const [isDownloading, setIsDownloading] = createSignal(false); const [downloadProgress, setDownloadProgress] = createSignal(0); const [downloadingModel, setDownloadingModel] = createSignal( @@ -280,48 +176,30 @@ export function CaptionsTab() { ); const [isGenerating, setIsGenerating] = createSignal(false); const [hasAudio, setHasAudio] = createSignal(false); - const [modelPath, setModelPath] = createSignal(""); - const [currentCaption, setCurrentCaption] = createSignal(null); - - // Ensure captions object is initialized in project config - createEffect(() => { - if (!project || !editorInstance) return; - - if (!project.captions) { - // Initialize captions with default settings - setProject("captions", { - segments: [], - settings: { - enabled: false, - font: "Arial", - size: 24, - color: "#FFFFFF", - backgroundColor: "#000000", - backgroundOpacity: 80, - position: "bottom", - bold: true, - italic: false, - outline: true, - outlineColor: "#000000", - exportWithSubtitles: false, - }, - }); - } - }); - // Check downloaded models on mount + createEffect( + on( + () => project && editorInstance && !project.captions, + (shouldInit) => { + if (shouldInit) { + setProject("captions", { + segments: [], + settings: { ...defaultCaptionSettings }, + }); + } + }, + ), + ); + onMount(async () => { try { - // Check for downloaded models const appDataDirPath = await appLocalDataDir(); const modelsPath = await join(appDataDirPath, MODEL_FOLDER); - // Create models directory if it doesn't exist if (!(await exists(modelsPath))) { await commands.createDir(modelsPath, true); } - // Check which models are already downloaded const models = await Promise.all( MODEL_OPTIONS.map(async (model) => { const downloaded = await checkModelExists(model.name); @@ -329,25 +207,32 @@ export function CaptionsTab() { }), ); - // Set available models setDownloadedModels( models.filter((m) => m.downloaded).map((m) => m.name), ); - // Check if current model exists - if (selectedModel()) { - setModelExists(await checkModelExists(selectedModel())); + const savedModel = localStorage.getItem("selectedTranscriptionModel"); + if (savedModel && MODEL_OPTIONS.some((m) => m.name === savedModel)) { + setSelectedModel(savedModel); + } + + const savedLanguage = localStorage.getItem( + "selectedTranscriptionLanguage", + ); + if ( + savedLanguage && + LANGUAGE_OPTIONS.some((l) => l.code === savedLanguage) + ) { + setSelectedLanguage(savedLanguage); } - // Check if the video has audio - if (editorInstance && editorInstance.recordings) { + if (editorInstance?.recordings) { const hasAudioTrack = editorInstance.recordings.segments.some( (segment) => segment.mic !== null || segment.system_audio !== null, ); setHasAudio(hasAudioTrack); } - // Restore download state if there was an ongoing download const downloadState = localStorage.getItem("modelDownloadState"); if (downloadState) { const { model, progress } = JSON.parse(downloadState); @@ -364,70 +249,40 @@ export function CaptionsTab() { } }); - // Save download state when it changes - createEffect(() => { - if (isDownloading() && downloadingModel()) { - localStorage.setItem( - "modelDownloadState", - JSON.stringify({ - model: downloadingModel(), - progress: downloadProgress(), - }), - ); - } else { - localStorage.removeItem("modelDownloadState"); - } - }); - - // Effect to update current caption based on playback time - createEffect(() => { - if (!project?.captions?.segments || editorState.playbackTime === undefined) - return; - - const time = editorState.playbackTime; - const segments = project.captions.segments; - - // Binary search for the correct segment - const findSegment = ( - time: number, - segments: CaptionSegment[], - ): CaptionSegment | undefined => { - let left = 0; - let right = segments.length - 1; - - while (left <= right) { - const mid = Math.floor((left + right) / 2); - const segment = segments[mid]; - - if (time >= segment.start && time < segment.end) { - return segment; - } - - if (time < segment.start) { - right = mid - 1; + createEffect( + on( + () => [isDownloading(), downloadingModel(), downloadProgress()] as const, + ([downloading, model, progress]) => { + if (downloading && model) { + localStorage.setItem( + "modelDownloadState", + JSON.stringify({ model, progress }), + ); } else { - left = mid + 1; + localStorage.removeItem("modelDownloadState"); } - } - - return undefined; - }; + }, + ), + ); - // Find the current segment using binary search - const currentSegment = findSegment(time, segments); + createEffect( + on(selectedModel, (model) => { + if (model) localStorage.setItem("selectedTranscriptionModel", model); + }), + ); - // Only update if the caption has changed - if (currentSegment?.text !== currentCaption()) { - setCurrentCaption(currentSegment?.text || null); - } - }); + createEffect( + on(selectedLanguage, (language) => { + if (language) + localStorage.setItem("selectedTranscriptionLanguage", language); + }), + ); const checkModelExists = async (modelName: string) => { const appDataDirPath = await appLocalDataDir(); const modelsPath = await join(appDataDirPath, MODEL_FOLDER); - const modelPath = await join(modelsPath, `${modelName}.bin`); - setModelPath(modelPath); - return await commands.checkModelExists(modelPath); + const path = await join(modelsPath, `${modelName}.bin`); + return await commands.checkModelExists(path); }; const downloadModel = async () => { @@ -437,7 +292,6 @@ export function CaptionsTab() { setDownloadProgress(0); setDownloadingModel(modelToDownload); - // Create the directory if it doesn't exist const appDataDirPath = await appLocalDataDir(); const modelsPath = await join(appDataDirPath, MODEL_FOLDER); const modelPath = await join(modelsPath, `${modelToDownload}.bin`); @@ -448,20 +302,14 @@ export function CaptionsTab() { console.error("Error creating directory:", err); } - // Set up progress listener const unlisten = await events.downloadProgress.listen((event) => { setDownloadProgress(event.payload.progress); }); - // Download the model await commands.downloadWhisperModel(modelToDownload, modelPath); - - // Clean up listener unlisten(); - // Update downloaded models list setDownloadedModels((prev) => [...prev, modelToDownload]); - setModelExists(true); toast.success("Transcription model downloaded successfully!"); } catch (error) { console.error("Error downloading model:", error); @@ -489,7 +337,6 @@ export function CaptionsTab() { `${selectedModel()}.bin`, ); - // Verify file existence before proceeding const result = await commands.transcribeAudio( videoPath, currentModelPath, @@ -497,7 +344,6 @@ export function CaptionsTab() { ); if (result && result.segments.length > 0) { - // Update project with the new segments setProject("captions", "segments", result.segments); updateCaptionSetting("enabled", true); toast.success("Captions generated successfully!"); @@ -516,7 +362,6 @@ export function CaptionsTab() { errorMessage = error; } - // Provide more user-friendly error messages if (errorMessage.includes("No audio stream found")) { errorMessage = "No audio found in the video file"; } else if (errorMessage.includes("Model file not found")) { @@ -532,7 +377,6 @@ export function CaptionsTab() { } }; - // Segment operations that update project directly const deleteSegment = (id: string) => { if (!project?.captions?.segments) return; @@ -573,395 +417,201 @@ export function CaptionsTab() { ]); }; + const hasCaptions = createMemo( + () => (project.captions?.segments?.length ?? 0) > 0, + ); + return ( -
-
(scrollContainerRef = el)} - onScroll={() => { - if (!scrollState.isScrolling && scrollContainerRef) { - setScrollState("isScrolling", true); - setScrollState("lastScrollTop", scrollContainerRef.scrollTop); - - // Reset scrolling flag after scroll ends - setTimeout(() => { - setScrollState("isScrolling", false); - }, 150); - } - }} - > - }> -
- - updateCaptionSetting("enabled", checked)} - /> + }> +
+
+
+
+ +
+ + {(model) => { + const isDownloaded = () => + downloadedModels().includes(model.name); + const isSelected = () => selectedModel() === model.name; + + return ( + + ); + }} + +
+
+ + + + options={LANGUAGE_OPTIONS.map((l) => l.code)} + value={selectedLanguage()} + onChange={(value: string | null) => { + if (value) setSelectedLanguage(value); + }} + itemComponent={(props) => ( + + as={KSelect.Item} + item={props.item} + > + + { + LANGUAGE_OPTIONS.find( + (l) => l.code === props.item.rawValue, + )?.label + } + + + )} + > + + class="flex-1 text-left truncate"> + {(state) => { + const language = LANGUAGE_OPTIONS.find( + (l) => l.code === state.selectedOption(), + ); + return ( + {language?.label || "Select a language"} + ); + }} + + + + + + + + as={KSelect.Content} + class={topLeftAnimateClasses} + > + + class="max-h-48 overflow-y-auto" + as={KSelect.Listbox} + /> + + + - -
- {/* Model Selection and Download Section */} -
-
- - - options={MODEL_OPTIONS.filter((m) => - downloadedModels().includes(m.name), - ).map((m) => m.name)} - value={selectedModel()} - onChange={(value: string | null) => { - if (value) { - batch(() => { - setSelectedModel(value); - setModelExists(downloadedModels().includes(value)); - }); - } - }} - itemComponent={(props) => ( - - as={KSelect.Item} - item={props.item} - > - - { - MODEL_OPTIONS.find( - (m) => m.name === props.item.rawValue, - )?.label - } - - - )} - > - - class="flex-1 text-left truncate"> - {(state) => { - const model = MODEL_OPTIONS.find( - (m) => m.name === state.selectedOption(), - ); - return ( - {model?.label || "Select a model"} - ); - }} - - - - - - - - as={KSelect.Content} - class={topLeftAnimateClasses} - > - - class="max-h-48 overflow-y-auto" - as={KSelect.Listbox} - /> - - - -
- +
+ - - - options={MODEL_OPTIONS.map((m) => m.name)} - value={selectedModel()} - onChange={(value: string | null) => { - if (value) setSelectedModel(value); - }} +
- - } > - Download{" "} - { - MODEL_OPTIONS.find((m) => m.name === selectedModel()) - ?.label - } - - } - > -
-
+ Downloading... {Math.round(downloadProgress())}% + + + +
-

- Downloading{" "} - { - MODEL_OPTIONS.find( - (m) => m.name === downloadingModel(), - )?.label - } - : {Math.round(downloadProgress())}% -

-
-
-
- - {/* Language Selection */} - - - options={LANGUAGE_OPTIONS.map((l) => l.code)} - value={selectedLanguage()} - onChange={(value: string | null) => { - if (value) setSelectedLanguage(value); - }} - itemComponent={(props) => ( - - as={KSelect.Item} - item={props.item} - > - - { - LANGUAGE_OPTIONS.find( - (l) => l.code === props.item.rawValue, - )?.label - } - - - )} - > - - class="flex-1 text-left truncate"> - {(state) => { - const language = LANGUAGE_OPTIONS.find( - (l) => l.code === state.selectedOption(), - ); - return ( - - {language?.label || "Select a language"} - - ); - }} - - - - - - - - as={KSelect.Content} - class={topLeftAnimateClasses} - > - - class="max-h-48 overflow-y-auto" - as={KSelect.Listbox} - /> - - - - - - {/* Generate Captions Button */} + +
+ } + > +
+
+
- {/* Font Settings */} - }> -
-
- Font Family - - options={fontOptions.map((f) => f.value)} - value={captionSettings.font} - onChange={(value) => { - if (value === null) return; - updateCaptionSetting("font", value); - }} - itemComponent={(props) => ( - - as={KSelect.Item} - item={props.item} - > - - { - fontOptions.find( - (f) => f.value === props.item.rawValue, - )?.label - } - - - )} - > - - > - {(state) => - fontOptions.find( - (f) => f.value === state.selectedOption(), - )?.label - } - - - - - - - - as={KSelect.Content} - class={topLeftAnimateClasses} - > - - class="max-h-48 overflow-y-auto" - as={KSelect.Listbox} - /> - - - -
- -
- Size - updateCaptionSetting("size", v[0])} - minValue={12} - maxValue={48} - step={1} - /> -
- -
- Font Color - - updateCaptionSetting("color", value) - } - /> -
-
-
- - {/* Background Settings */} - } - > -
-
- - Background Color - - - updateCaptionSetting("backgroundColor", value) - } - /> -
- -
- - Background Opacity - - - updateCaptionSetting("backgroundOpacity", v[0]) - } - minValue={0} - maxValue={100} - step={1} - /> -
-
-
- - {/* Position Settings */} - }> +
+ }> +
+
+ Font Family - options={["top", "bottom"]} - value={captionSettings.position || "bottom"} + options={fontOptions.map((f) => f.value)} + value={getSetting("font")} onChange={(value) => { if (value === null) return; - updateCaptionSetting("position", value); + updateCaptionSetting("font", value); }} + disabled={!hasCaptions()} itemComponent={(props) => ( as={KSelect.Item} item={props.item} > - - {props.item.rawValue} + + { + fontOptions.find( + (f) => f.value === props.item.rawValue, + )?.label + } )} > > - {(state) => ( - - {state.selectedOption()} - - )} + {(state) => + fontOptions.find( + (f) => f.value === state.selectedOption(), + )?.label + } @@ -973,174 +623,282 @@ export function CaptionsTab() { class={topLeftAnimateClasses} > + class="max-h-48 overflow-y-auto" as={KSelect.Listbox} /> - - - {/* Style Options */} - }> -
-
- - - updateCaptionSetting("bold", checked) - } - /> - - - - updateCaptionSetting("italic", checked) - } - /> - - - - updateCaptionSetting("outline", checked) - } - /> - -
- - -
- Outline Color - - updateCaptionSetting("outlineColor", value) - } - /> -
-
-
-
+
- {/* Export Options */} - }> - +
+ Size + updateCaptionSetting("size", v[0])} + minValue={12} + maxValue={100} + step={1} + disabled={!hasCaptions()} + /> +
+ +
+ Font Color + updateCaptionSetting("color", value)} + /> +
+
+
+ + }> +
+
+ Background Color + + updateCaptionSetting("backgroundColor", value) + } + /> +
+ +
+ Background Opacity + + updateCaptionSetting("backgroundOpacity", v[0]) + } + minValue={0} + maxValue={100} + step={1} + disabled={!hasCaptions()} + /> +
+
+
+ + }> + + options={POSITION_OPTIONS.map((p) => p.value)} + value={getSetting("position")} + onChange={(value) => { + if (value === null) return; + updateCaptionSetting("position", value); + }} + disabled={!hasCaptions()} + itemComponent={(props) => ( + + as={KSelect.Item} + item={props.item} + > + + { + POSITION_OPTIONS.find( + (p) => p.value === props.item.rawValue, + )?.label + } + + + )} + > + + > + {(state) => ( + + { + POSITION_OPTIONS.find( + (p) => p.value === state.selectedOption(), + )?.label + } + + )} + + + + + + + + as={KSelect.Content} + class={topLeftAnimateClasses} + > + + as={KSelect.Listbox} + /> + + + + + + }> +
+
+ Highlight Color + + updateCaptionSetting("highlightColor", value) + } + /> +
+
+ Fade Duration + + updateCaptionSetting("fadeDuration", v[0] / 100) + } + minValue={0} + maxValue={50} + step={1} + disabled={!hasCaptions()} + /> + + {(getSetting("fadeDuration") * 1000).toFixed(0)}ms + +
+
+
+ + }> +
+
+ - updateCaptionSetting("exportWithSubtitles", checked) + updateCaptionSetting("outline", checked) } + disabled={!hasCaptions()} /> - +
+ + +
+ Outline Color + + updateCaptionSetting("outlineColor", value) + } + /> +
+
+
+
+ + }> + + + updateCaptionSetting("exportWithSubtitles", checked) + } + disabled={!hasCaptions()} + /> + + +
- {/* Caption Segments Section */} - - } + + }> +
+
+ -
+ Add at Current Time + +
-
- {project.captions?.segments.length === 0 ? ( -

- No caption segments found. -

- ) : ( - project.captions?.segments.map((segment) => ( -
-
-
-
- - - updateSegment(segment.id, { - start: parseFloat(e.target.value), - }) - } - /> -
-
- - - updateSegment(segment.id, { - end: parseFloat(e.target.value), - }) - } - /> -
-
- -
- -
-