diff --git a/.claude/settings.local.json b/.claude/settings.local.json
index c7df3d3d9f..b698e146d4 100644
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -5,7 +5,9 @@
 			"Bash(pnpm lint:*)",
 			"Bash(pnpm build:*)",
 			"Bash(cargo check:*)",
-			"Bash(cargo fmt:*)"
+			"Bash(cargo fmt:*)",
+			"Bash(pnpm format:*)",
+			"Bash(pnpm exec biome check:*)"
 		],
 		"deny": [],
 		"ask": []
diff --git a/Cargo.lock b/Cargo.lock
index 94269460bc..81ec4842fd 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -177,9 +177,9 @@ checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
 
 [[package]]
 name = "aho-corasick"
-version = "1.1.3"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
 dependencies = [
  "memchr",
 ]
@@ -1174,6 +1174,7 @@ dependencies = [
 name = "cap-desktop"
 version = "0.4.0"
 dependencies = [
+ "aho-corasick",
  "anyhow",
  "async-stream",
  "axum",
@@ -1221,9 +1222,11 @@ dependencies = [
  "png 0.17.16",
  "posthog-rs",
  "rand 0.8.5",
+ "regex",
  "relative-path",
  "reqwest 0.12.24",
  "rodio",
+ "sanitize-filename",
  "scap-direct3d",
  "scap-screencapturekit",
  "scap-targets",
@@ -1601,12 +1604,14 @@ dependencies = [
 name = "cap-utils"
 version = "0.1.0"
 dependencies = [
+ "aho-corasick",
  "directories 5.0.1",
  "flume",
  "futures",
  "nix 0.29.0",
  "serde",
  "serde_json",
+ "tempfile",
  "tokio",
  "tracing",
  "uuid",
@@ -7641,6 +7646,15 @@ dependencies = [
  "winapi-util",
 ]
 
+[[package]]
+name = "sanitize-filename"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc984f4f9ceb736a7bb755c3e3bd17dc56370af2600c9780dcc48c66453da34d"
+dependencies = [
+ "regex",
+]
+
 [[package]]
 name = "scap-cpal"
 version = "0.1.0"
@@ -8499,6 +8513,7 @@ version = "2.0.0-rc.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4ccbb212565d2dc177bc15ecb7b039d66c4490da892436a4eee5b394d620c9bc"
 dependencies = [
+ "chrono",
  "paste",
  "serde_json",
  "specta-macros",
diff --git a/Cargo.toml b/Cargo.toml
index 661b3b85fd..7eae47f5d6 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,11 @@
 [workspace]
 resolver = "2"
-members = ["apps/cli", "apps/desktop/src-tauri", "crates/*", "crates/workspace-hack"]
+members = [
+    "apps/cli",
+    "apps/desktop/src-tauri",
+    "crates/*",
+    "crates/workspace-hack",
+]
 
 [workspace.dependencies]
 anyhow = { version = "1.0.86" }
@@ -22,6 +27,7 @@ specta = { version = "=2.0.0-rc.20", features = [
     "derive",
     "serde_json",
     "uuid",
+    "chrono"
 ] }
 serde = { version = "1", features = ["derive"] }
 
@@ -40,6 +46,7 @@ sentry = { version = "0.42.0", features = [
 ] }
 tracing = "0.1.41"
 futures = "0.3.31"
+aho-corasick = "1.1.4"
 
 cidre = { git = "https://github.com/CapSoftware/cidre", rev = "bf84b67079a8", features = [
     "macos_12_7",
diff --git a/apps/desktop/src-tauri/Cargo.toml b/apps/desktop/src-tauri/Cargo.toml
index e8d13e2828..df574378e5 100644
--- a/apps/desktop/src-tauri/Cargo.toml
+++ b/apps/desktop/src-tauri/Cargo.toml
@@ -20,11 +20,11 @@ swift-rs = { version = "1.0.6", features = ["build"] }
 
 [dependencies]
 tauri = { workspace = true, features = [
-	"macos-private-api",
-	"protocol-asset",
-	"tray-icon",
-	"image-png",
-	"devtools",
+    "macos-private-api",
+    "protocol-asset",
+    "tray-icon",
+    "image-png",
+    "devtools",
 ] }
 tauri-specta = { version = "=2.0.0-rc.20", features = ["derive", "typescript"] }
 tauri-plugin-dialog = "2.2.0"
@@ -60,6 +60,7 @@ tracing.workspace = true
 tempfile = "3.9.0"
 ffmpeg.workspace = true
 chrono = { version = "0.4.31", features = ["serde"] }
+regex = "1.10.4"
 rodio = "0.19.0"
 png = "0.17.13"
 device_query = "4.0.1"
@@ -106,22 +107,24 @@ tauri-plugin-sentry = "0.5.0"
 thiserror.workspace = true
 bytes = "1.10.1"
 async-stream = "0.3.6"
+sanitize-filename = "0.6.0"
 tracing-futures = { version = "0.2.5", features = ["futures-03"] }
 tracing-opentelemetry = "0.32.0"
 opentelemetry = "0.31.0"
-opentelemetry-otlp = "0.31.0" #{ version = , features = ["http-proto", "reqwest-client"] }
+opentelemetry-otlp = "0.31.0"                                                 #{ version = , features = ["http-proto", "reqwest-client"] }
 opentelemetry_sdk = { version = "0.31.0", features = ["rt-tokio", "trace"] }
 posthog-rs = "0.3.7"
 workspace-hack = { version = "0.1", path = "../../../crates/workspace-hack" }
+aho-corasick.workspace = true
 
 
 [target.'cfg(target_os = "macos")'.dependencies]
 core-graphics = "0.24.0"
 core-foundation = "0.10.0"
 objc2-app-kit = { version = "0.3.0", features = [
-	"NSWindow",
-	"NSResponder",
-	"NSHapticFeedback",
+    "NSWindow",
+    "NSResponder",
+    "NSHapticFeedback",
 ] }
 cocoa = "0.26.0"
 objc = "0.2.7"
@@ -131,10 +134,10 @@ cidre = { workspace = true }
 
 [target.'cfg(target_os= "windows")'.dependencies]
 windows = { workspace = true, features = [
-	"Win32_Foundation",
-	"Win32_System",
-	"Win32_UI_WindowsAndMessaging",
-	"Win32_Graphics_Gdi",
+    "Win32_Foundation",
+    "Win32_System",
+    "Win32_UI_WindowsAndMessaging",
+    "Win32_Graphics_Gdi",
 ] }
 windows-sys = { workspace = true }
 
diff --git a/apps/desktop/src-tauri/src/captions.rs b/apps/desktop/src-tauri/src/captions.rs
index 05678af591..9904909fc9 100644
--- a/apps/desktop/src-tauri/src/captions.rs
+++ b/apps/desktop/src-tauri/src/captions.rs
@@ -18,12 +18,10 @@ use tokio::sync::Mutex;
 use tracing::instrument;
 use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters};
 
-// Re-export caption types from cap_project
-pub use cap_project::{CaptionSegment, CaptionSettings};
+pub use cap_project::{CaptionSegment, CaptionSettings, CaptionWord};
 
 use crate::http_client;
 
-// Convert the project type's float precision from f32 to f64 for compatibility
 #[derive(Debug, Serialize, Deserialize, Type, Clone)]
 pub struct CaptionData {
     pub segments: Vec<CaptionSegment>,
@@ -39,15 +37,12 @@ impl Default for CaptionData {
     }
 }
 
-// Model context is shared and cached
 lazy_static::lazy_static! {
-    static ref WHISPER_CONTEXT: Arc<Mutex<Option<WhisperContext>>> = Arc::new(Mutex::new(None));
+    static ref WHISPER_CONTEXT: Arc<Mutex<Option<Arc<WhisperContext>>>> = Arc::new(Mutex::new(None));
 }
 
-// Constants
 const WHISPER_SAMPLE_RATE: u32 = 16000;
 
-/// Function to handle creating directories for the model
 #[tauri::command]
 #[specta::specta]
 #[instrument]
@@ -55,7 +50,6 @@ pub async fn create_dir(path: String, _recursive: bool) -> Result<(), String> {
     std::fs::create_dir_all(path).map_err(|e| format!("Failed to create directory: {e}"))
 }
 
-/// Function to save the model file
 #[tauri::command]
 #[specta::specta]
 #[instrument]
@@ -63,15 +57,14 @@ pub async fn save_model_file(path: String, data: Vec<u8>) -> Result<(), String>
     std::fs::write(&path, &data).map_err(|e| format!("Failed to write model file: {e}"))
 }
 
-/// Extract audio from a video file and save it as a temporary WAV file
 async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Result<(), String> {
+    log::info!("=== EXTRACT AUDIO START ===");
     log::info!("Attempting to extract audio from: {video_path}");
+    log::info!("Output path: {:?}", output_path);
 
-    // Check if this is a .cap directory
     if video_path.ends_with(".cap") {
         log::info!("Detected .cap project directory");
 
-        // Read the recording metadata
         let meta_path = std::path::Path::new(video_path).join("recording-meta.json");
         let meta_content = std::fs::read_to_string(&meta_path)
             .map_err(|e| format!("Failed to read recording metadata: {e}"))?;
@@ -79,21 +72,23 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
         let meta: serde_json::Value = serde_json::from_str(&meta_content)
             .map_err(|e| format!("Failed to parse recording metadata: {e}"))?;
 
-        // Get paths for both audio sources
         let base_path = std::path::Path::new(video_path);
         let mut audio_sources = Vec::new();
 
         if let Some(segments) = meta["segments"].as_array() {
             for segment in segments {
-                // Add system audio if available
-                if let Some(system_audio) = segment["system_audio"]["path"].as_str() {
-                    audio_sources.push(base_path.join(system_audio));
-                }
+                let mut push_source = |path: Option<&str>| {
+                    if let Some(path) = path {
+                        let full_path = base_path.join(path);
+                        if !audio_sources.contains(&full_path) {
+                            audio_sources.push(full_path);
+                        }
+                    }
+                };
 
-                // Add microphone audio if available
-                if let Some(audio) = segment["audio"]["path"].as_str() {
-                    audio_sources.push(base_path.join(audio));
-                }
+                push_source(segment["system_audio"]["path"].as_str());
+                push_source(segment["mic"]["path"].as_str());
+                push_source(segment["audio"]["path"].as_str());
             }
         }
 
@@ -103,7 +98,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
 
         log::info!("Found {} audio sources", audio_sources.len());
 
-        // Process each audio source using AudioData
         let mut mixed_samples = Vec::new();
         let mut channel_count = 0;
 
@@ -121,7 +115,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
                         mixed_samples = audio.samples().to_vec();
                         channel_count = audio.channels() as usize;
                     } else {
-                        // Handle potential different channel counts by mixing to mono first if needed
                         if audio.channels() as usize != channel_count {
                             log::info!(
                                 "Channel count mismatch: {} vs {}, mixing to mono",
@@ -129,24 +122,20 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
                                 audio.channels()
                             );
 
-                            // If we have mixed samples with multiple channels, convert to mono
                             if channel_count > 1 {
                                 let mono_samples = convert_to_mono(&mixed_samples, channel_count);
                                 mixed_samples = mono_samples;
                                 channel_count = 1;
                             }
 
-                            // Convert the new audio to mono too if it has multiple channels
                             let samples = if audio.channels() > 1 {
                                 convert_to_mono(audio.samples(), audio.channels() as usize)
                             } else {
                                 audio.samples().to_vec()
                             };
 
-                            // Mix mono samples
                             mix_samples(&mut mixed_samples, &samples);
                         } else {
-                            // Same channel count, simple mix
                             mix_samples(&mut mixed_samples, audio.samples());
                         }
                     }
@@ -158,7 +147,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
             }
         }
 
-        // No matter what, ensure we have mono audio for Whisper
         if channel_count > 1 {
             log::info!("Converting final mixed audio from {channel_count} channels to mono");
             mixed_samples = convert_to_mono(&mixed_samples, channel_count);
@@ -166,10 +154,22 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
         }
 
         if mixed_samples.is_empty() {
+            log::error!("No audio samples after processing all sources");
             return Err("Failed to process any audio sources".to_string());
         }
 
-        // Convert to WAV format with desired sample rate
+        log::info!("Final mixed audio: {} samples", mixed_samples.len());
+        let mix_rms =
+            (mixed_samples.iter().map(|&s| s * s).sum::<f32>() / mixed_samples.len() as f32).sqrt();
+        log::info!("Mixed audio RMS: {:.4}", mix_rms);
+
+        if mix_rms < 0.001 {
+            log::warn!(
+                "WARNING: Mixed audio RMS is very low ({:.6}) - audio may be nearly silent!",
+                mix_rms
+            );
+        }
+
         let mut output = avformat::output(&output_path)
             .map_err(|e| format!("Failed to create output file: {e}"))?;
 
@@ -199,7 +199,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
             .write_header()
             .map_err(|e| format!("Failed to write header: {e}"))?;
 
-        // Create resampler for sample rate conversion
         let mut resampler = resampling::Context::get(
             avformat::Sample::F32(avformat::sample::Type::Packed),
             channel_layout,
@@ -210,9 +209,7 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
         )
         .map_err(|e| format!("Failed to create resampler: {e}"))?;
 
-        // Process audio in chunks
         let frame_size = encoder.frame_size() as usize;
-        // Check if frame_size is zero and use a fallback
         let frame_size = if frame_size == 0 { 1024 } else { frame_size };
 
         log::info!(
@@ -229,15 +226,12 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
         );
         frame.set_rate(WHISPER_SAMPLE_RATE);
 
-        // Make sure we have samples and a valid chunk size
         if !mixed_samples.is_empty() && frame_size * channel_count > 0 {
-            // Process chunks of audio
             for (chunk_idx, chunk) in mixed_samples.chunks(frame_size * channel_count).enumerate() {
                 if chunk_idx % 100 == 0 {
                     log::info!("Processing chunk {}, size: {}", chunk_idx, chunk.len());
                 }
 
-                // Create a new input frame with actual data from the chunk
                 let mut input_frame = ffmpeg::frame::Audio::new(
                     avformat::Sample::F32(avformat::sample::Type::Packed),
                     chunk.len() / channel_count,
@@ -245,7 +239,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
                 );
                 input_frame.set_rate(AudioData::SAMPLE_RATE);
 
-                // Copy data from chunk to frame
                 let bytes = unsafe {
                     std::slice::from_raw_parts(
                         chunk.as_ptr() as *const u8,
@@ -254,7 +247,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
                 };
                 input_frame.data_mut(0)[0..bytes.len()].copy_from_slice(bytes);
 
-                // Create output frame for resampled data
                 let mut output_frame = ffmpeg::frame::Audio::new(
                     avformat::Sample::I16(avformat::sample::Type::Packed),
                     frame_size,
@@ -262,7 +254,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
                 );
                 output_frame.set_rate(WHISPER_SAMPLE_RATE);
 
-                // Use the input frame with actual data instead of the empty frame
                 match resampler.run(&input_frame, &mut output_frame) {
                     Ok(_) => {
                         if chunk_idx % 100 == 0 {
@@ -284,7 +275,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
                     continue;
                 }
 
-                // Process each encoded packet
                 loop {
                     let mut packet = ffmpeg::Packet::empty();
                     match encoder.receive_packet(&mut packet) {
@@ -299,12 +289,10 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
             }
         }
 
-        // Flush the encoder
         encoder
             .send_eof()
             .map_err(|e| format!("Failed to send EOF: {e}"))?;
 
-        // Process final packets in a loop with limited borrow scope
         loop {
             let mut packet = ffmpeg::Packet::empty();
             let received = encoder.receive_packet(&mut packet);
@@ -313,7 +301,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
                 break;
             }
 
-            // Use a block to limit the scope of the output borrow
             {
                 if let Err(e) = packet.write_interleaved(&mut output) {
                     return Err(format!("Failed to write final packet: {e}"));
@@ -325,9 +312,9 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
             .write_trailer()
             .map_err(|e| format!("Failed to write trailer: {e}"))?;
 
+        log::info!("=== EXTRACT AUDIO END (from .cap) ===");
         Ok(())
     } else {
-        // Handle regular video file
         let mut input =
             avformat::input(&video_path).map_err(|e| format!("Failed to open video file: {e}"))?;
 
@@ -338,25 +325,20 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
 
         let codec_params = stream.parameters();
 
-        // Get decoder parameters first
         let decoder_ctx = avcodec::Context::from_parameters(codec_params.clone())
             .map_err(|e| format!("Failed to create decoder context: {e}"))?;
 
-        // Create and open the decoder
         let mut decoder = decoder_ctx
             .decoder()
             .audio()
             .map_err(|e| format!("Failed to create decoder: {e}"))?;
 
-        // Now we can access audio-specific methods
         let decoder_format = decoder.format();
         let decoder_channel_layout = decoder.channel_layout();
         let decoder_rate = decoder.rate();
 
-        // Set up and prepare encoder and output separately to avoid multiple borrows
         let channel_layout = ChannelLayout::MONO;
 
-        // Create encoder first
         let mut encoder_ctx = avcodec::Context::new()
             .encoder()
             .audio()
@@ -373,11 +355,9 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
             .open_as(codec)
             .map_err(|e| format!("Failed to open encoder: {e}"))?;
 
-        // Create output context separately
         let mut output = avformat::output(&output_path)
             .map_err(|e| format!("Failed to create output file: {e}"))?;
 
-        // Add stream and get parameters in a block to limit the borrow
         let stream_params = {
             let mut output_stream = output
                 .add_stream(codec)
@@ -385,16 +365,13 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
 
             output_stream.set_parameters(&encoder);
 
-            // Store the stream parameters we need for later
             (output_stream.index(), output_stream.id())
         };
 
-        // Write header
         output
             .write_header()
             .map_err(|e| format!("Failed to write header: {e}"))?;
 
-        // Create resampler
         let mut resampler = resampling::Context::get(
             decoder_format,
             decoder_channel_layout,
@@ -405,7 +382,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
         )
         .map_err(|e| format!("Failed to create resampler: {e}"))?;
 
-        // Create frames
         let mut decoded_frame = ffmpeg::frame::Audio::empty();
         let mut resampled_frame = ffmpeg::frame::Audio::new(
             avformat::Sample::I16(avformat::sample::Type::Packed),
@@ -413,22 +389,15 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
             channel_layout,
         );
 
-        // Save the stream index from the original stream (not the output stream)
         let input_stream_index = stream.index();
 
-        // Process packets one at a time, cloning what we need from input packets
         let mut packet_queue = Vec::new();
 
-        // First collect all the packets we need by cloning the data
         {
-            // Use a separate block to limit the immutable borrow lifetime
             for (stream_idx, packet) in input.packets() {
                 if stream_idx.index() == input_stream_index {
-                    // Clone the packet data to avoid borrowing input
                     if let Some(data) = packet.data() {
-                        // Copy the packet data to a new packet
                         let mut cloned_packet = ffmpeg::Packet::copy(data);
-                        // Copy timing information
                         if let Some(pts) = packet.pts() {
                             cloned_packet.set_pts(Some(pts));
                         }
@@ -441,14 +410,12 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
             }
         }
 
-        // Then process each cloned packet
         for packet_res in packet_queue {
             if let Err(e) = decoder.send_packet(&packet_res) {
                 log::warn!("Failed to send packet to decoder: {e}");
                 continue;
             }
 
-            // Process decoded frames
             while decoder.receive_frame(&mut decoded_frame).is_ok() {
                 if let Err(e) = resampler.run(&decoded_frame, &mut resampled_frame) {
                     log::warn!("Failed to resample audio: {e}");
@@ -460,12 +427,10 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
                     continue;
                 }
 
-                // Process encoded packets
                 loop {
                     let mut packet = ffmpeg::Packet::empty();
                     match encoder.receive_packet(&mut packet) {
                         Ok(_) => {
-                            // Set the stream for the output packet
                             packet.set_stream(stream_params.0);
 
                             if let Err(e) = packet.write_interleaved(&mut output) {
@@ -478,7 +443,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
             }
         }
 
-        // Flush the decoder
         decoder
             .send_eof()
             .map_err(|e| format!("Failed to send EOF to decoder: {e}"))?;
@@ -492,7 +456,6 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
                 .send_frame(&resampled_frame)
                 .map_err(|e| format!("Failed to send final frame: {e}"))?;
 
-            // Process final encoded packets
             loop {
                 let mut packet = ffmpeg::Packet::empty();
                 let received = encoder.receive_packet(&mut packet);
@@ -507,53 +470,73 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re
             }
         }
 
-        // Close the output file with trailer
         output
             .write_trailer()
             .map_err(|e| format!("Failed to write trailer: {e}"))?;
 
+        log::info!("=== EXTRACT AUDIO END (from video) ===");
         Ok(())
     }
 }
 
-/// Load or initialize the WhisperContext
 async fn get_whisper_context(model_path: &str) -> Result<Arc<WhisperContext>, String> {
     let mut context_guard = WHISPER_CONTEXT.lock().await;
 
-    // Always create a new context to avoid issues with multiple uses
+    if let Some(ref existing) = *context_guard {
+        log::info!("Reusing cached Whisper context");
+        return Ok(existing.clone());
+    }
+
     log::info!("Initializing Whisper context with model: {model_path}");
     let ctx = WhisperContext::new_with_params(model_path, WhisperContextParameters::default())
         .map_err(|e| format!("Failed to load Whisper model: {e}"))?;
 
-    *context_guard = Some(ctx);
+    let ctx_arc = Arc::new(ctx);
+    *context_guard = Some(ctx_arc.clone());
+
+    Ok(ctx_arc)
+}
+
+fn is_special_token(token_text: &str) -> bool {
+    let trimmed = token_text.trim();
+    if trimmed.is_empty() {
+        return true;
+    }
+
+    let is_special = trimmed.contains('[')
+        || trimmed.contains(']')
+        || trimmed.contains("_TT_")
+        || trimmed.contains("_BEG_")
+        || trimmed.contains("<|");
 
-    // Get a reference to the context and wrap it in an Arc
-    let context_ref = context_guard.as_ref().unwrap();
-    let context_arc = unsafe { Arc::new(std::ptr::read(context_ref)) };
-    Ok(context_arc)
+    if is_special {
+        log::debug!("Filtering special token: {:?}", token_text);
+    }
+
+    is_special
 }
 
-/// Process audio file with Whisper for transcription
 fn process_with_whisper(
     audio_path: &PathBuf,
     context: Arc<WhisperContext>,
     language: &str,
 ) -> Result<CaptionData, String> {
+    log::info!("=== WHISPER TRANSCRIPTION START ===");
     log::info!("Processing audio file: {audio_path:?}");
+    log::info!("Language setting: {}", language);
 
-    // Set up parameters for Whisper
     let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 });
 
-    // Configure parameters for better caption quality
     params.set_translate(false);
     params.set_print_special(false);
     params.set_print_progress(false);
     params.set_print_realtime(false);
-    params.set_token_timestamps(true); // Enable timestamps for captions
-    params.set_language(Some(if language == "auto" { "auto" } else { language })); // Use selected language or auto-detect
-    params.set_max_len(i32::MAX); // No max length for transcription
+    params.set_token_timestamps(true);
+    params.set_language(Some(if language == "auto" { "auto" } else { language }));
+    params.set_max_len(i32::MAX);
+
+    log::info!("Whisper params - translate: false, token_timestamps: true, max_len: MAX");
 
-    // Load audio file
     let mut audio_file = File::open(audio_path)
         .map_err(|e| format!("Failed to open audio file: {e} at path: {audio_path:?}"))?;
     let mut audio_data = Vec::new();
@@ -563,7 +546,6 @@ fn process_with_whisper(
 
     log::info!("Processing audio file of size: {} bytes", audio_data.len());
 
-    // Convert audio data to the required format (16-bit mono PCM)
     let mut audio_data_f32 = Vec::new();
     for i in (0..audio_data.len()).step_by(2) {
         if i + 1 < audio_data.len() {
@@ -572,24 +554,42 @@ fn process_with_whisper(
         }
     }
 
-    log::info!("Converted {} samples to f32 format", audio_data_f32.len());
+    let duration_seconds = audio_data_f32.len() as f32 / WHISPER_SAMPLE_RATE as f32;
+    log::info!(
+        "Converted {} samples to f32 format (duration: {:.2}s at {}Hz)",
+        audio_data_f32.len(),
+        duration_seconds,
+        WHISPER_SAMPLE_RATE
+    );
 
-    // Log sample data statistics for debugging
     if !audio_data_f32.is_empty() {
         let min_sample = audio_data_f32.iter().fold(f32::MAX, |a, &b| a.min(b));
         let max_sample = audio_data_f32.iter().fold(f32::MIN, |a, &b| a.max(b));
         let avg_sample = audio_data_f32.iter().sum::<f32>() / audio_data_f32.len() as f32;
-        log::info!("Audio samples - min: {min_sample}, max: {max_sample}, avg: {avg_sample}");
+        let rms = (audio_data_f32.iter().map(|&s| s * s).sum::<f32>()
+            / audio_data_f32.len() as f32)
+            .sqrt();
+        log::info!(
+            "Audio samples - min: {:.4}, max: {:.4}, avg: {:.6}, RMS: {:.4}",
+            min_sample,
+            max_sample,
+            avg_sample,
+            rms
+        );
+
+        if rms < 0.001 {
+            log::warn!(
+                "WARNING: Audio RMS is very low ({:.6}) - audio may be nearly silent!",
+                rms
+            );
+        }
 
-        // Sample a few values
-        let sample_count = audio_data_f32.len().min(10);
-        for i in 0..sample_count {
-            let idx = i * audio_data_f32.len() / sample_count;
-            log::info!("Sample {}: {}", idx, audio_data_f32[idx]);
+        log::info!("First 20 audio samples:");
+        for i in 0..audio_data_f32.len().min(20) {
+            log::info!("  Sample[{}] = {:.6}", i, audio_data_f32[i]);
         }
     }
 
-    // Run the transcription
     let mut state = context
         .create_state()
         .map_err(|e| format!("Failed to create Whisper state: {e}"))?;
@@ -598,7 +598,6 @@ fn process_with_whisper(
         .full(params, &audio_data_f32[..])
         .map_err(|e| format!("Failed to run Whisper transcription: {e}"))?;
 
-    // Process results: convert Whisper segments to CaptionSegment
     let num_segments = state
         .full_n_segments()
         .map_err(|e| format!("Failed to get number of segments: {e}"))?;
@@ -608,11 +607,10 @@ fn process_with_whisper(
     let mut segments = Vec::new();
 
     for i in 0..num_segments {
-        let text = state
+        let raw_text = state
             .full_get_segment_text(i)
             .map_err(|e| format!("Failed to get segment text: {e}"))?;
 
-        // Properly unwrap the Result first, then convert i64 to f64
         let start_i64 = state
             .full_get_segment_t0(i)
             .map_err(|e| format!("Failed to get segment start time: {e}"))?;
@@ -620,30 +618,180 @@ fn process_with_whisper(
             .full_get_segment_t1(i)
             .map_err(|e| format!("Failed to get segment end time: {e}"))?;
 
-        // Convert timestamps from centiseconds to seconds (as f32 for CaptionSegment)
         let start_time = (start_i64 as f32) / 100.0;
         let end_time = (end_i64 as f32) / 100.0;
 
-        // Add debug logging for timestamps
         log::info!(
-            "Segment {}: start={}, end={}, text='{}'",
+            "=== Segment {}: start={:.2}s, end={:.2}s, raw_text='{}'",
             i,
             start_time,
             end_time,
-            text.trim()
+            raw_text.trim()
         );
 
-        if !text.trim().is_empty() {
+        let mut words = Vec::new();
+        let num_tokens = state
+            .full_n_tokens(i)
+            .map_err(|e| format!("Failed to get token count: {e}"))?;
+
+        log::info!("  Segment {} has {} tokens", i, num_tokens);
+
+        let mut current_word = String::new();
+        let mut word_start: Option<f32> = None;
+        let mut word_end: f32 = start_time;
+
+        for t in 0..num_tokens {
+            let token_text = state.full_get_token_text(i, t).unwrap_or_default();
+            let token_id = state.full_get_token_id(i, t).unwrap_or(0);
+            let token_prob = state.full_get_token_prob(i, t).unwrap_or(0.0);
+
+            if is_special_token(&token_text) {
+                log::debug!(
+                    "  Token[{}]: id={}, text={:?} -> SKIPPED (special)",
+                    t,
+                    token_id,
+                    token_text
+                );
+                continue;
+            }
+
+            let token_data = state.full_get_token_data(i, t).ok();
+
+            if let Some(data) = token_data {
+                let token_start = (data.t0 as f32) / 100.0;
+                let token_end = (data.t1 as f32) / 100.0;
+
+                log::info!(
+                    "  Token[{}]: id={}, text={:?}, t0={:.2}s, t1={:.2}s, prob={:.4}",
+                    t,
+                    token_id,
+                    token_text,
+                    token_start,
+                    token_end,
+                    token_prob
+                );
+
+                if token_text.starts_with(' ') || token_text.starts_with('\n') {
+                    if !current_word.is_empty() {
+                        if let Some(ws) = word_start {
+                            log::info!(
+                                "    -> Completing word: '{}' ({:.2}s - {:.2}s)",
+                                current_word.trim(),
+                                ws,
+                                word_end
+                            );
+                            words.push(CaptionWord {
+                                text: current_word.trim().to_string(),
+                                start: ws,
+                                end: word_end,
+                            });
+                        }
+                    }
+                    current_word = token_text.trim().to_string();
+                    word_start = Some(token_start);
+                    log::debug!(
+                        "    -> Starting new word: '{}' at {:.2}s",
+                        current_word,
+                        token_start
+                    );
+                } else {
+                    if word_start.is_none() {
+                        word_start = Some(token_start);
+                        log::debug!("    -> Word start set to {:.2}s", token_start);
+                    }
+                    current_word.push_str(&token_text);
+                    log::debug!("    -> Appending to word: '{}'", current_word);
+                }
+                word_end = token_end;
+            } else {
+                log::warn!(
+                    "  Token[{}]: id={}, text={:?} -> NO TIMING DATA",
+                    t,
+                    token_id,
+                    token_text
+                );
+            }
+        }
+
+        if !current_word.trim().is_empty() {
+            if let Some(ws) = word_start {
+                log::info!(
+                    "    -> Final word: '{}' ({:.2}s - {:.2}s)",
+                    current_word.trim(),
+                    ws,
+                    word_end
+                );
+                words.push(CaptionWord {
+                    text: current_word.trim().to_string(),
+                    start: ws,
+                    end: word_end,
+                });
+            }
+        }
+
+        log::info!("  Segment {} produced {} words", i, words.len());
+        for (w_idx, word) in words.iter().enumerate() {
+            log::info!(
+                "    Word[{}]: '{}' ({:.2}s - {:.2}s)",
+                w_idx,
+                word.text,
+                word.start,
+                word.end
+            );
+        }
+
+        if words.is_empty() {
+            log::warn!("  Segment {} has no words, skipping", i);
+            continue;
+        }
+
+        const MAX_WORDS_PER_SEGMENT: usize = 6;
+
+        let word_chunks: Vec<Vec<CaptionWord>> = words
+            .chunks(MAX_WORDS_PER_SEGMENT)
+            .map(|chunk| chunk.to_vec())
+            .collect();
+
+        for (chunk_idx, chunk_words) in word_chunks.into_iter().enumerate() {
+            let segment_text = chunk_words
+                .iter()
+                .map(|word| word.text.clone())
+                .collect::<Vec<_>>()
+                .join(" ");
+
+            let segment_start = chunk_words
+                .first()
+                .map(|word| word.start)
+                .unwrap_or(start_time);
+            let segment_end = chunk_words.last().map(|word| word.end).unwrap_or(end_time);
+
             segments.push(CaptionSegment {
-                id: format!("segment-{i}"),
-                start: start_time,
-                end: end_time,
-                text: text.trim().to_string(),
+                id: format!("segment-{i}-{chunk_idx}"),
+                start: segment_start,
+                end: segment_end,
+                text: segment_text,
+                words: chunk_words,
             });
         }
     }
 
-    log::info!("Successfully processed {} segments", segments.len());
+    log::info!("=== WHISPER TRANSCRIPTION COMPLETE ===");
+    log::info!("Total segments: {}", segments.len());
+
+    let total_words: usize = segments.iter().map(|s| s.words.len()).sum();
+    log::info!("Total words: {}", total_words);
+
+    log::info!("=== FINAL TRANSCRIPTION SUMMARY ===");
+    for segment in &segments {
+        log::info!(
+            "Segment '{}' ({:.2}s - {:.2}s): {}",
+            segment.id,
+            segment.start,
+            segment.end,
+            segment.text
+        );
+    }
+    log::info!("=== END SUMMARY ===");
 
     Ok(CaptionData {
         segments,
@@ -651,7 +799,6 @@ fn process_with_whisper(
     })
 }
 
-/// Function to transcribe audio from a video file using Whisper
 #[tauri::command]
 #[specta::specta]
 #[instrument]
@@ -660,20 +807,25 @@ pub async fn transcribe_audio(
     model_path: String,
     language: String,
 ) -> Result<CaptionData, String> {
-    // Check if files exist with detailed error messages
+    log::info!("=== TRANSCRIBE AUDIO COMMAND START ===");
+    log::info!("Video path: {}", video_path);
+    log::info!("Model path: {}", model_path);
+    log::info!("Language: {}", language);
+
     if !std::path::Path::new(&video_path).exists() {
+        log::error!("Video file not found at path: {}", video_path);
         return Err(format!("Video file not found at path: {video_path}"));
     }
 
     if !std::path::Path::new(&model_path).exists() {
+        log::error!("Model file not found at path: {}", model_path);
         return Err(format!("Model file not found at path: {model_path}"));
     }
 
-    // Create temp dir with better error handling
     let temp_dir = tempdir().map_err(|e| format!("Failed to create temporary directory: {e}"))?;
     let audio_path = temp_dir.path().join("audio.wav");
+    log::info!("Temp audio path: {:?}", audio_path);
 
-    // First try the ffmpeg implementation
     match extract_audio_from_video(&video_path, &audio_path).await {
         Ok(_) => log::info!("Successfully extracted audio to {audio_path:?}"),
         Err(e) => {
@@ -682,39 +834,73 @@ pub async fn transcribe_audio(
         }
     }
 
-    // Verify the audio file was created
     if !audio_path.exists() {
+        log::error!("Audio file was not created at {:?}", audio_path);
         return Err("Failed to create audio file for transcription".to_string());
     }
 
-    log::info!("Audio file created at: {audio_path:?}");
+    let audio_metadata = std::fs::metadata(&audio_path).ok();
+    if let Some(meta) = &audio_metadata {
+        log::info!(
+            "Audio file created at: {:?}, size: {} bytes",
+            audio_path,
+            meta.len()
+        );
+    }
 
-    // Get or initialize Whisper context with detailed error handling
     let context = match get_whisper_context(&model_path).await {
-        Ok(ctx) => ctx,
+        Ok(ctx) => {
+            log::info!("Whisper context ready");
+            ctx
+        }
         Err(e) => {
             log::error!("Failed to initialize Whisper context: {e}");
             return Err(format!("Failed to initialize transcription model: {e}"));
         }
     };
 
-    // Process with Whisper and handle errors
-    match process_with_whisper(&audio_path, context, &language) {
+    let audio_path_clone = audio_path.clone();
+    let language_clone = language.clone();
+    log::info!("Starting Whisper transcription in blocking task...");
+    let whisper_result = tokio::task::spawn_blocking(move || {
+        process_with_whisper(&audio_path_clone, context, &language_clone)
+    })
+    .await
+    .map_err(|e| format!("Whisper task panicked: {e}"))?;
+
+    match whisper_result {
         Ok(captions) => {
+            log::info!("=== TRANSCRIBE AUDIO RESULT ===");
+            log::info!(
+                "Transcription produced {} segments",
+                captions.segments.len()
+            );
+
+            for (idx, segment) in captions.segments.iter().enumerate() {
+                log::info!(
+                    "  Result Segment[{}]: '{}' ({} words)",
+                    idx,
+                    segment.text,
+                    segment.words.len()
+                );
+            }
+
             if captions.segments.is_empty() {
                 log::warn!("No caption segments were generated");
                 return Err("No speech detected in the audio".to_string());
             }
+
+            log::info!("=== TRANSCRIBE AUDIO COMMAND END (success) ===");
             Ok(captions)
         }
         Err(e) => {
             log::error!("Failed to process audio with Whisper: {e}");
+            log::info!("=== TRANSCRIBE AUDIO COMMAND END (error) ===");
             Err(format!("Failed to transcribe audio: {e}"))
         }
     }
 }
 
-/// Function to save caption data to a file
 #[tauri::command]
 #[specta::specta]
 #[instrument(skip(app))]
@@ -723,7 +909,30 @@ pub async fn save_captions(
     video_id: String,
     captions: CaptionData,
 ) -> Result<(), String> {
+    tracing::info!("=== SAVE CAPTIONS START ===");
     tracing::info!("Saving captions for video_id: {}", video_id);
+    tracing::info!("Received {} segments to save", captions.segments.len());
+
+    for (idx, segment) in captions.segments.iter().enumerate() {
+        tracing::info!(
+            "  Segment[{}] '{}': '{}' ({} words, {:.2}s - {:.2}s)",
+            idx,
+            segment.id,
+            segment.text,
+            segment.words.len(),
+            segment.start,
+            segment.end
+        );
+        for (w_idx, word) in segment.words.iter().enumerate() {
+            tracing::debug!(
+                "    Word[{}]: '{}' ({:.2}s - {:.2}s)",
+                w_idx,
+                word.text,
+                word.start,
+                word.end
+            );
+        }
+    }
 
     let captions_dir = app_captions_dir(&app, &video_id)?;
 
@@ -739,13 +948,10 @@ pub async fn save_captions(
 
     tracing::info!("Writing captions to: {:?}", captions_path);
 
-    // Ensure settings are included with default values if not provided
     let settings = captions.settings.unwrap_or_default();
 
-    // Create a JSON structure manually to ensure field naming consistency
     let mut json_obj = serde_json::Map::new();
 
-    // Add segments array
     let segments_array = serde_json::to_value(
         captions
             .segments
@@ -769,6 +975,18 @@ pub async fn save_captions(
                     "text".to_string(),
                     serde_json::Value::String(seg.text.clone()),
                 );
+                let words_array: Vec<serde_json::Value> = seg
+                    .words
+                    .iter()
+                    .map(|w| {
+                        serde_json::json!({
+                            "text": w.text,
+                            "start": w.start,
+                            "end": w.end
+                        })
+                    })
+                    .collect();
+                segment.insert("words".to_string(), serde_json::Value::Array(words_array));
                 segment
             })
             .collect::<Vec<_>>(),
@@ -780,7 +998,6 @@ pub async fn save_captions(
 
     json_obj.insert("segments".to_string(), segments_array);
 
-    // Add settings object with camelCase naming
     let mut settings_obj = serde_json::Map::new();
     settings_obj.insert(
         "enabled".to_string(),
@@ -827,13 +1044,22 @@ pub async fn save_captions(
         "exportWithSubtitles".to_string(),
         serde_json::Value::Bool(settings.export_with_subtitles),
     );
+    settings_obj.insert(
+        "highlightColor".to_string(),
+        serde_json::Value::String(settings.highlight_color.clone()),
+    );
+    settings_obj.insert(
+        "fadeDuration".to_string(),
+        serde_json::Value::Number(
+            serde_json::Number::from_f64(settings.fade_duration as f64).unwrap(),
+        ),
+    );
 
     json_obj.insert(
         "settings".to_string(),
         serde_json::Value::Object(settings_obj),
     );
 
-    // Convert to pretty JSON string
     let json = serde_json::to_string_pretty(&json_obj).map_err(|e| {
         tracing::error!("Failed to serialize captions: {}", e);
         format!("Failed to serialize captions: {e}")
@@ -845,19 +1071,16 @@ pub async fn save_captions(
     })?;
 
     tracing::info!("Successfully saved captions");
+    tracing::info!("=== SAVE CAPTIONS END ===");
     Ok(())
 }
 
-/// Helper function to parse captions from a JSON string
-/// This can be used by other modules to parse captions without duplicating code
 pub fn parse_captions_json(json: &str) -> Result<cap_project::CaptionsData, String> {
-    // Use a more flexible parsing approach
     match serde_json::from_str::<serde_json::Value>(json) {
         Ok(json_value) => {
             if let Some(segments_array) = json_value.get("segments").and_then(|v| v.as_array()) {
                 let mut segments = Vec::new();
 
-                // Process each segment
                 for segment in segments_array {
                     if let (Some(id), Some(start), Some(end), Some(text)) = (
                         segment.get("id").and_then(|v| v.as_str()),
@@ -865,18 +1088,33 @@ pub fn parse_captions_json(json: &str) -> Result<cap_project::CaptionsData, Stri
                         segment.get("end").and_then(|v| v.as_f64()),
                         segment.get("text").and_then(|v| v.as_str()),
                     ) {
+                        let mut words = Vec::new();
+                        if let Some(words_array) = segment.get("words").and_then(|v| v.as_array()) {
+                            for word in words_array {
+                                if let (Some(w_text), Some(w_start), Some(w_end)) = (
+                                    word.get("text").and_then(|v| v.as_str()),
+                                    word.get("start").and_then(|v| v.as_f64()),
+                                    word.get("end").and_then(|v| v.as_f64()),
+                                ) {
+                                    words.push(cap_project::CaptionWord {
+                                        text: w_text.to_string(),
+                                        start: w_start as f32,
+                                        end: w_end as f32,
+                                    });
+                                }
+                            }
+                        }
                         segments.push(cap_project::CaptionSegment {
                             id: id.to_string(),
                             start: start as f32,
                             end: end as f32,
                             text: text.to_string(),
+                            words,
                         });
                     }
                 }
 
-                // Get settings or use defaults
                 let settings = if let Some(settings_obj) = json_value.get("settings") {
-                    // Extract each field with proper fallbacks
                     let enabled = settings_obj
                         .get("enabled")
                         .and_then(|v| v.as_bool())
@@ -893,10 +1131,9 @@ pub fn parse_captions_json(json: &str) -> Result<cap_project::CaptionsData, Stri
                     let color = settings_obj
                         .get("color")
                         .and_then(|v| v.as_str())
-                        .unwrap_or("#FFFFFF")
+                        .unwrap_or("#A0A0A0")
                         .to_string();
 
-                    // Handle both camelCase and snake_case field names
                     let background_color = settings_obj
                         .get("backgroundColor")
                         .or_else(|| settings_obj.get("background_color"))
@@ -918,7 +1155,7 @@ pub fn parse_captions_json(json: &str) -> Result<cap_project::CaptionsData, Stri
                     let bold = settings_obj
                         .get("bold")
                         .and_then(|v| v.as_bool())
-                        .unwrap_or(true);
+                        .unwrap_or(false);
                     let italic = settings_obj
                         .get("italic")
                         .and_then(|v| v.as_bool())
@@ -941,6 +1178,19 @@ pub fn parse_captions_json(json: &str) -> Result<cap_project::CaptionsData, Stri
                         .and_then(|v| v.as_bool())
                         .unwrap_or(false);
 
+                    let highlight_color = settings_obj
+                        .get("highlightColor")
+                        .or_else(|| settings_obj.get("highlight_color"))
+                        .and_then(|v| v.as_str())
+                        .unwrap_or("#FFFFFF")
+                        .to_string();
+
+                    let fade_duration = settings_obj
+                        .get("fadeDuration")
+                        .or_else(|| settings_obj.get("fade_duration"))
+                        .and_then(|v| v.as_f64())
+                        .unwrap_or(0.15) as f32;
+
                     cap_project::CaptionSettings {
                         enabled,
                         font,
@@ -954,9 +1204,10 @@ pub fn parse_captions_json(json: &str) -> Result<cap_project::CaptionsData, Stri
                         outline,
                         outline_color,
                         export_with_subtitles,
+                        highlight_color,
+                        fade_duration,
                     }
                 } else {
-                    // Use default settings if none provided
                     cap_project::CaptionSettings::default()
                 };
 
@@ -969,7 +1220,6 @@ pub fn parse_captions_json(json: &str) -> Result<cap_project::CaptionsData, Stri
     }
 }
 
-/// Function to load caption data from a file
 #[tauri::command]
 #[specta::specta]
 #[instrument(skip(app))]
@@ -977,11 +1227,15 @@ pub async fn load_captions(
     app: AppHandle,
     video_id: String,
 ) -> Result<Option<CaptionData>, String> {
+    tracing::info!("=== LOAD CAPTIONS START ===");
+    tracing::info!("Loading captions for video_id: {}", video_id);
+
     let captions_dir = app_captions_dir(&app, &video_id)?;
     let captions_path = captions_dir.join("captions.json");
 
     if !captions_path.exists() {
         tracing::info!("No captions file found at: {:?}", captions_path);
+        tracing::info!("=== LOAD CAPTIONS END (no file) ===");
         return Ok(None);
     }
 
@@ -994,6 +1248,8 @@ pub async fn load_captions(
         }
     };
 
+    tracing::info!("Captions JSON length: {} bytes", json.len());
+
     tracing::info!("Parsing captions JSON");
     match parse_captions_json(&json) {
         Ok(project_captions) => {
@@ -1002,33 +1258,42 @@ pub async fn load_captions(
                 project_captions.segments.len()
             );
 
-            // Create the CaptionData structure
+            for (idx, segment) in project_captions.segments.iter().enumerate() {
+                tracing::info!(
+                    "  Loaded Segment[{}] '{}': '{}' ({} words, {:.2}s - {:.2}s)",
+                    idx,
+                    segment.id,
+                    segment.text,
+                    segment.words.len(),
+                    segment.start,
+                    segment.end
+                );
+            }
+
             let tauri_captions = CaptionData {
                 segments: project_captions.segments,
                 settings: Some(project_captions.settings),
             };
 
+            tracing::info!("=== LOAD CAPTIONS END (success) ===");
             Ok(Some(tauri_captions))
         }
         Err(e) => {
             tracing::error!("Failed to parse captions: {}", e);
+            tracing::info!("=== LOAD CAPTIONS END (error) ===");
             Err(format!("Failed to parse captions: {e}"))
         }
     }
 }
 
-/// Helper function to get the captions directory for a video
 fn app_captions_dir(app: &AppHandle, video_id: &str) -> Result<PathBuf, String> {
     tracing::info!("Getting captions directory for video_id: {}", video_id);
 
-    // Get the app data directory
     let app_dir = app
         .path()
         .app_data_dir()
         .map_err(|_| "Failed to get app data directory".to_string())?;
 
-    // Create a dedicated captions directory
-    // Strip .cap extension if present in video_id
     let clean_video_id = video_id.trim_end_matches(".cap");
     let captions_dir = app_dir.join("captions").join(clean_video_id);
 
@@ -1036,7 +1301,6 @@ fn app_captions_dir(app: &AppHandle, video_id: &str) -> Result<PathBuf, String>
     Ok(captions_dir)
 }
 
-// Add new type for download progress
 #[derive(Debug, Serialize, Type, tauri_specta::Event, Clone)]
 pub struct DownloadProgress {
     pub progress: f64,
@@ -1047,7 +1311,6 @@ impl DownloadProgress {
     const EVENT_NAME: &'static str = "download-progress";
 }
 
-/// Helper function to download a Whisper model from Hugging Face Hub
 #[tauri::command]
 #[specta::specta]
 #[instrument(skip(window))]
@@ -1057,7 +1320,6 @@ pub async fn download_whisper_model(
     model_name: String,
     output_path: String,
 ) -> Result<(), String> {
-    // Define model URLs based on model names
     let model_url = match model_name.as_str() {
         "tiny" => "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin",
         "base" => "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin",
@@ -1065,10 +1327,9 @@ pub async fn download_whisper_model(
         "medium" => "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin",
         "large" => "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3.bin",
         "large-v3" => "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3.bin",
-        _ => "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin", // Default to tiny
+        _ => "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin",
     };
 
-    // Create the client and download the model
     let response = app
         .state::<http_client::HttpClient>()
         .get(model_url)
@@ -1083,10 +1344,8 @@ pub async fn download_whisper_model(
         ));
     }
 
-    // Get the total size for progress calculation
     let total_size = response.content_length().unwrap_or(0);
 
-    // Create a file to write to
     if let Some(parent) = std::path::Path::new(&output_path).parent() {
         std::fs::create_dir_all(parent)
             .map_err(|e| format!("Failed to create parent directories: {e}"))?;
@@ -1095,15 +1354,13 @@ pub async fn download_whisper_model(
         .await
         .map_err(|e| format!("Failed to create file: {e}"))?;
 
-    // Download and write in chunks
     let mut downloaded = 0;
     let mut bytes = response
         .bytes()
         .await
         .map_err(|e| format!("Failed to get response bytes: {e}"))?;
 
-    // Write the bytes in chunks to show progress
-    const CHUNK_SIZE: usize = 1024 * 1024; // 1MB chunks
+    const CHUNK_SIZE: usize = 1024 * 1024;
     while !bytes.is_empty() {
         let chunk_size = std::cmp::min(CHUNK_SIZE, bytes.len());
         let chunk = bytes.split_to(chunk_size);
@@ -1114,7 +1371,6 @@ pub async fn download_whisper_model(
 
         downloaded += chunk_size as u64;
 
-        // Calculate and emit progress
         let progress = if total_size > 0 {
             (downloaded as f64 / total_size as f64) * 100.0
         } else {
@@ -1132,7 +1388,6 @@ pub async fn download_whisper_model(
             .map_err(|e| format!("Failed to emit progress: {e}"))?;
     }
 
-    // Ensure file is properly written
     file.flush()
         .await
         .map_err(|e| format!("Failed to flush file: {e}"))?;
@@ -1140,7 +1395,6 @@ pub async fn download_whisper_model(
     Ok(())
 }
 
-/// Function to check if a model file exists
 #[tauri::command]
 #[specta::specta]
 #[instrument]
@@ -1148,7 +1402,6 @@ pub async fn check_model_exists(model_path: String) -> Result<bool, String> {
     Ok(std::path::Path::new(&model_path).exists())
 }
 
-/// Function to delete a downloaded model
 #[tauri::command]
 #[specta::specta]
 #[instrument]
@@ -1164,15 +1417,12 @@ pub async fn delete_whisper_model(model_path: String) -> Result<(), String> {
     Ok(())
 }
 
-/// Convert caption segments to SRT format
 fn captions_to_srt(captions: &CaptionData) -> String {
     let mut srt = String::new();
     for (i, segment) in captions.segments.iter().enumerate() {
-        // Convert start and end times from seconds to HH:MM:SS,mmm format
         let start_time = format_srt_time(f64::from(segment.start));
         let end_time = format_srt_time(f64::from(segment.end));
 
-        // Write SRT entry
         srt.push_str(&format!(
             "{}\n{} --> {}\n{}\n\n",
             i + 1,
@@ -1184,7 +1434,6 @@ fn captions_to_srt(captions: &CaptionData) -> String {
     srt
 }
 
-/// Format time in seconds to SRT time format (HH:MM:SS,mmm)
 fn format_srt_time(seconds: f64) -> String {
     let hours = (seconds / 3600.0) as i32;
     let minutes = ((seconds % 3600.0) / 60.0) as i32;
@@ -1193,7 +1442,6 @@ fn format_srt_time(seconds: f64) -> String {
     format!("{hours:02}:{minutes:02}:{secs:02},{millis:03}")
 }
 
-/// Export captions to an SRT file
 #[tauri::command]
 #[specta::specta]
 #[instrument(skip(app))]
@@ -1203,7 +1451,6 @@ pub async fn export_captions_srt(
 ) -> Result<Option<PathBuf>, String> {
     tracing::info!("Starting SRT export for video_id: {}", video_id);
 
-    // Load captions
     let captions = match load_captions(app.clone(), video_id.clone()).await? {
         Some(c) => {
             tracing::info!("Found {} caption segments to export", c.segments.len());
@@ -1215,8 +1462,6 @@ pub async fn export_captions_srt(
         }
     };
 
-    // Ensure we have settings (this should already be handled by load_captions,
-    // but we add this check for extra safety)
     let captions_with_settings = CaptionData {
         segments: captions.segments,
         settings: captions
@@ -1224,16 +1469,13 @@ pub async fn export_captions_srt(
             .or_else(|| Some(CaptionSettings::default())),
     };
 
-    // Convert to SRT format
     tracing::info!("Converting captions to SRT format");
     let srt_content = captions_to_srt(&captions_with_settings);
 
-    // Get path for SRT file
     let captions_dir = app_captions_dir(&app, &video_id)?;
     let srt_path = captions_dir.join("captions.srt");
     tracing::info!("Will write SRT file to: {:?}", srt_path);
 
-    // Write SRT file
     match std::fs::write(&srt_path, srt_content) {
         Ok(_) => {
             tracing::info!("Successfully wrote SRT file to: {:?}", srt_path);
@@ -1246,7 +1488,6 @@ pub async fn export_captions_srt(
     }
 }
 
-// Helper function to convert multi-channel audio to mono
 fn convert_to_mono(samples: &[f32], channels: usize) -> Vec<f32> {
     if channels == 1 {
         return samples.to_vec();
@@ -1266,11 +1507,9 @@ fn convert_to_mono(samples: &[f32], channels: usize) -> Vec<f32> {
     mono_samples
 }
 
-// Helper function to mix two sample arrays together
 fn mix_samples(dest: &mut [f32], source: &[f32]) -> usize {
     let length = dest.len().min(source.len());
     for i in 0..length {
-        // Simple mix with equal weight (0.5) to prevent clipping
         dest[i] = (dest[i] + source[i]) * 0.5;
     }
     length
diff --git a/apps/desktop/src-tauri/src/general_settings.rs b/apps/desktop/src-tauri/src/general_settings.rs
index 1b193e75b3..1d3fc7fa61 100644
--- a/apps/desktop/src-tauri/src/general_settings.rs
+++ b/apps/desktop/src-tauri/src/general_settings.rs
@@ -122,6 +122,8 @@ pub struct GeneralSettingsStore {
     pub delete_instant_recordings_after_upload: bool,
     #[serde(default = "default_instant_mode_max_resolution")]
     pub instant_mode_max_resolution: u32,
+    #[serde(default)]
+    pub default_project_name_template: Option<String>,
 }
 
 fn default_enable_native_camera_preview() -> bool {
@@ -187,6 +189,7 @@ impl Default for GeneralSettingsStore {
             excluded_windows: default_excluded_windows(),
             delete_instant_recordings_after_upload: false,
             instant_mode_max_resolution: 1920,
+            default_project_name_template: None,
         }
     }
 }
diff --git a/apps/desktop/src-tauri/src/lib.rs b/apps/desktop/src-tauri/src/lib.rs
index 4e776fe3b1..7fa14ab281 100644
--- a/apps/desktop/src-tauri/src/lib.rs
+++ b/apps/desktop/src-tauri/src/lib.rs
@@ -26,6 +26,7 @@ mod screenshot_editor;
 mod target_select_overlay;
 mod thumbnails;
 mod tray;
+mod update_project_names;
 mod upload;
 mod web_api;
 mod window_exclusion;
@@ -649,10 +650,18 @@ fn spawn_camera_watcher(app_handle: AppHandle) {
                 )
             };
 
-            if should_check && let Some(selected_id) = camera_id {
-                let available = is_camera_available(&selected_id);
+            if should_check && let Some(ref selected_id) = camera_id {
+                let available = is_camera_available(selected_id);
+                debug!(
+                    "Camera watcher: checking availability for {:?}, available={}, is_marked={}",
+                    selected_id, available, is_marked
+                );
 
                 if !available && !is_marked {
+                    warn!(
+                        "Camera watcher: camera {:?} detected as unavailable, pausing recording",
+                        selected_id
+                    );
                     let mut app = state.write().await;
                     if let Err(err) = app
                         .handle_input_disconnect(RecordingInputKind::Camera)
@@ -674,7 +683,21 @@ fn spawn_camera_watcher(app_handle: AppHandle) {
 }
 
 fn is_camera_available(id: &DeviceOrModelID) -> bool {
-    cap_camera::list_cameras().any(|info| match id {
+    let cameras: Vec<_> = cap_camera::list_cameras().collect();
+    debug!(
+        "is_camera_available: looking for {:?} in {} cameras",
+        id,
+        cameras.len()
+    );
+    for camera in &cameras {
+        debug!(
+            "  - device_id={}, model_id={:?}, name={}",
+            camera.device_id(),
+            camera.model_id(),
+            camera.display_name()
+        );
+    }
+    cameras.iter().any(|info| match id {
         DeviceOrModelID::DeviceID(device_id) => info.device_id() == device_id,
         DeviceOrModelID::ModelID(model_id) => {
             info.model_id().is_some_and(|existing| existing == model_id)
@@ -1470,6 +1493,17 @@ async fn set_project_config(
     Ok(())
 }
 
+#[tauri::command]
+#[specta::specta]
+#[instrument(skip(editor_instance))]
+async fn update_project_config_in_memory(
+    editor_instance: WindowEditorInstance,
+    config: ProjectConfiguration,
+) -> Result<(), String> {
+    editor_instance.project_config.0.send(config).ok();
+    Ok(())
+}
+
 #[tauri::command]
 #[specta::specta]
 #[instrument(skip(editor_instance))]
@@ -2301,6 +2335,7 @@ pub async fn run(recording_logging_handle: LoggingHandle, logs_dir: PathBuf) {
             stop_playback,
             set_playhead_position,
             set_project_config,
+            update_project_config_in_memory,
             generate_zoom_segments_from_clicks,
             permissions::open_permission_settings,
             permissions::do_permissions_check,
@@ -2351,7 +2386,8 @@ pub async fn run(recording_logging_handle: LoggingHandle, logs_dir: PathBuf) {
             target_select_overlay::display_information,
             target_select_overlay::get_window_icon,
             target_select_overlay::focus_window,
-            editor_delete_project
+            editor_delete_project,
+            format_project_name,
         ])
         .events(tauri_specta::collect_events![
             RecordingOptionsChanged,
@@ -2496,6 +2532,11 @@ pub async fn run(recording_logging_handle: LoggingHandle, logs_dir: PathBuf) {
         .invoke_handler(specta_builder.invoke_handler())
         .setup(move |app| {
             let app = app.handle().clone();
+
+            if let Err(err) = update_project_names::migrate_if_needed(&app) {
+                tracing::error!("Failed to migrate project file names: {}", err);
+            }
+
             specta_builder.mount_events(&app);
             hotkeys::init(&app);
             general_settings::init(&app);
@@ -3061,13 +3102,13 @@ async fn create_editor_instance_impl(
     RenderFrameEvent::listen_any(&app, {
         let preview_tx = instance.preview_tx.clone();
         move |e| {
-            preview_tx
-                .send(Some((
+            preview_tx.send_modify(|v| {
+                *v = Some((
                     e.payload.frame_number,
                     e.payload.fps,
                     e.payload.resolution_base,
-                )))
-                .ok();
+                ));
+            });
         }
     });
 
@@ -3116,6 +3157,24 @@ async fn write_clipboard_string(
         .map_err(|e| format!("Failed to write text to clipboard: {e}"))
 }
 
+#[tauri::command(async)]
+#[specta::specta]
+fn format_project_name(
+    template: Option<String>,
+    target_name: String,
+    target_kind: String,
+    recording_mode: RecordingMode,
+    datetime: Option<chrono::DateTime<chrono::Local>>,
+) -> String {
+    recording::format_project_name(
+        template.as_deref(),
+        target_name.as_str(),
+        target_kind.as_str(),
+        recording_mode,
+        datetime,
+    )
+}
+
 trait EventExt: tauri_specta::Event {
     fn listen_any_spawn<Fut>(
         app: &AppHandle,
diff --git a/apps/desktop/src-tauri/src/recording.rs b/apps/desktop/src-tauri/src/recording.rs
index 0fc5b6cb73..5d7062d7f7 100644
--- a/apps/desktop/src-tauri/src/recording.rs
+++ b/apps/desktop/src-tauri/src/recording.rs
@@ -23,10 +23,13 @@ use cap_recording::{
     studio_recording,
 };
 use cap_rendering::ProjectRecordingsMeta;
-use cap_utils::{ensure_dir, spawn_actor};
+use cap_utils::{ensure_dir, moment_format_to_chrono, spawn_actor};
 use futures::{FutureExt, stream};
+use lazy_static::lazy_static;
+use regex::Regex;
 use serde::{Deserialize, Serialize};
 use specta::Type;
+use std::borrow::Cow;
 use std::{
     any::Any,
     collections::{HashMap, VecDeque},
@@ -76,12 +79,12 @@ pub enum InProgressRecording {
         progressive_upload: InstantMultipartUpload,
         video_upload_info: VideoUploadInfo,
         common: InProgressRecordingCommon,
-        // camera isn't used as part of recording pipeline so we hold lock here
         camera_feed: Option<Arc<CameraFeedLock>>,
     },
     Studio {
         handle: studio_recording::ActorHandle,
         common: InProgressRecordingCommon,
+        camera_feed: Option<Arc<CameraFeedLock>>,
     },
 }
 
@@ -349,6 +352,82 @@ pub enum RecordingAction {
     UpgradeRequired,
 }
 
+pub fn format_project_name<'a>(
+    template: Option<&str>,
+    target_name: &'a str,
+    target_kind: &'a str,
+    recording_mode: RecordingMode,
+    datetime: Option<chrono::DateTime<chrono::Local>>,
+) -> String {
+    const DEFAULT_FILENAME_TEMPLATE: &str = "{target_name} ({target_kind}) {date} {time}";
+    let datetime = datetime.unwrap_or(chrono::Local::now());
+
+    lazy_static! {
+        static ref DATE_REGEX: Regex = Regex::new(r"\{date(?::([^}]+))?\}").unwrap();
+        static ref TIME_REGEX: Regex = Regex::new(r"\{time(?::([^}]+))?\}").unwrap();
+        static ref MOMENT_REGEX: Regex = Regex::new(r"\{moment(?::([^}]+))?\}").unwrap();
+        static ref AC: aho_corasick::AhoCorasick = {
+            aho_corasick::AhoCorasick::new([
+                "{recording_mode}",
+                "{mode}",
+                "{target_kind}",
+                "{target_name}",
+            ])
+            .expect("Failed to build AhoCorasick automaton")
+        };
+    }
+    let haystack = template.unwrap_or(DEFAULT_FILENAME_TEMPLATE);
+
+    // Get recording mode information
+    let (recording_mode, mode) = match recording_mode {
+        RecordingMode::Studio => ("Studio", "studio"),
+        RecordingMode::Instant => ("Instant", "instant"),
+        RecordingMode::Screenshot => ("Screenshot", "screenshot"),
+    };
+
+    let result = AC
+        .try_replace_all(haystack, &[recording_mode, mode, target_kind, target_name])
+        .expect("AhoCorasick replace should never fail with default configuration");
+
+    let result = DATE_REGEX.replace_all(&result, |caps: &regex::Captures| {
+        datetime
+            .format(
+                &caps
+                    .get(1)
+                    .map(|m| m.as_str())
+                    .map(moment_format_to_chrono)
+                    .unwrap_or(Cow::Borrowed("%Y-%m-%d")),
+            )
+            .to_string()
+    });
+
+    let result = TIME_REGEX.replace_all(&result, |caps: &regex::Captures| {
+        datetime
+            .format(
+                &caps
+                    .get(1)
+                    .map(|m| m.as_str())
+                    .map(moment_format_to_chrono)
+                    .unwrap_or(Cow::Borrowed("%I:%M %p")),
+            )
+            .to_string()
+    });
+
+    let result = MOMENT_REGEX.replace_all(&result, |caps: &regex::Captures| {
+        datetime
+            .format(
+                &caps
+                    .get(1)
+                    .map(|m| m.as_str())
+                    .map(moment_format_to_chrono)
+                    .unwrap_or(Cow::Borrowed("%Y-%m-%d %H:%M")),
+            )
+            .to_string()
+    });
+
+    result.into_owned()
+}
+
 #[tauri::command]
 #[specta::specta]
 #[tracing::instrument(name = "recording", skip_all)]
@@ -361,34 +440,41 @@ pub async fn start_recording(
         return Err("Recording already in progress".to_string());
     }
 
-    let id = uuid::Uuid::new_v4().to_string();
     let general_settings = GeneralSettingsStore::get(&app).ok().flatten();
     let general_settings = general_settings.as_ref();
 
-    let recording_dir = app
-        .path()
-        .app_data_dir()
-        .unwrap()
-        .join("recordings")
-        .join(format!("{id}.cap"));
+    let project_name = format_project_name(
+        general_settings
+            .and_then(|s| s.default_project_name_template.clone())
+            .as_deref(),
+        inputs
+            .capture_target
+            .title()
+            .as_deref()
+            .unwrap_or("Unknown"),
+        inputs.capture_target.kind_str(),
+        inputs.mode,
+        None,
+    );
+
+    let filename = project_name.replace(":", ".");
+    let filename = format!("{}.cap", sanitize_filename::sanitize(&filename));
+
+    let recordings_base_dir = app.path().app_data_dir().unwrap().join("recordings");
 
-    ensure_dir(&recording_dir).map_err(|e| format!("Failed to create recording directory: {e}"))?;
+    let project_file_path = recordings_base_dir.join(&cap_utils::ensure_unique_filename(
+        &filename,
+        &recordings_base_dir,
+    )?);
+
+    ensure_dir(&project_file_path)
+        .map_err(|e| format!("Failed to create recording directory: {e}"))?;
     state_mtx
         .write()
         .await
-        .add_recording_logging_handle(&recording_dir.join("recording-logs.log"))
+        .add_recording_logging_handle(&project_file_path.join("recording-logs.log"))
         .await?;
 
-    let target_name = {
-        let title = inputs.capture_target.title();
-
-        match inputs.capture_target.clone() {
-            ScreenCaptureTarget::Area { .. } => title.unwrap_or_else(|| "Area".to_string()),
-            ScreenCaptureTarget::Window { .. } => title.unwrap_or_else(|| "Window".to_string()),
-            ScreenCaptureTarget::Display { .. } => title.unwrap_or_else(|| "Screen".to_string()),
-        }
-    };
-
     if let Some(window) = CapWindowId::Camera.get(&app) {
         let _ = window.set_content_protected(matches!(inputs.mode, RecordingMode::Studio));
     }
@@ -402,10 +488,7 @@ pub async fn start_recording(
                         &app,
                         false,
                         None,
-                        Some(format!(
-                            "{target_name} {}",
-                            chrono::Local::now().format("%Y-%m-%d %H:%M:%S")
-                        )),
+                        Some(project_name.clone()),
                         None,
                         inputs.organization_id.clone(),
                     )
@@ -444,17 +527,10 @@ pub async fn start_recording(
         RecordingMode::Screenshot => return Err("Use take_screenshot for screenshots".to_string()),
     };
 
-    let date_time = if cfg!(windows) {
-        // Windows doesn't support colon in file paths
-        chrono::Local::now().format("%Y-%m-%d %H.%M.%S")
-    } else {
-        chrono::Local::now().format("%Y-%m-%d %H:%M:%S")
-    };
-
     let meta = RecordingMeta {
         platform: Some(Platform::default()),
-        project_path: recording_dir.clone(),
-        pretty_name: format!("{target_name} {date_time}"),
+        project_path: project_file_path.clone(),
+        pretty_name: project_name.clone(),
         inner: match inputs.mode {
             RecordingMode::Studio => {
                 RecordingMetaInner::Studio(StudioRecordingMeta::MultipleSegments {
@@ -544,8 +620,7 @@ pub async fn start_recording(
     let actor_task = {
         let state_mtx = Arc::clone(&state_mtx);
         let general_settings = general_settings.cloned();
-        let recording_dir = recording_dir.clone();
-        let target_name = target_name.clone();
+        let recording_dir = project_file_path.clone();
         let inputs = inputs.clone();
         async move {
             fail!("recording::spawn_actor");
@@ -607,7 +682,7 @@ pub async fn start_recording(
                 acquire_shareable_content_for_target(&inputs.capture_target).await?;
 
             let common = InProgressRecordingCommon {
-                target_name,
+                target_name: project_name,
                 inputs: inputs.clone(),
                 recording_dir: recording_dir.clone(),
             };
@@ -674,6 +749,7 @@ pub async fn start_recording(
                             Ok(InProgressRecording::Studio {
                                 handle,
                                 common: common.clone(),
+                                camera_feed: camera_feed.clone(),
                             })
                         }
                         RecordingMode::Instant => {
@@ -769,15 +845,25 @@ pub async fn start_recording(
         Ok(Ok(rx)) => rx,
         Ok(Err(err)) => {
             let message = format!("{err:#}");
-            handle_spawn_failure(&app, &state_mtx, recording_dir.as_path(), message.clone())
-                .await?;
+            handle_spawn_failure(
+                &app,
+                &state_mtx,
+                project_file_path.as_path(),
+                message.clone(),
+            )
+            .await?;
             return Err(message);
         }
         Err(panic) => {
             let panic_msg = panic_message(panic);
             let message = format!("Failed to spawn recording actor: {panic_msg}");
-            handle_spawn_failure(&app, &state_mtx, recording_dir.as_path(), message.clone())
-                .await?;
+            handle_spawn_failure(
+                &app,
+                &state_mtx,
+                project_file_path.as_path(),
+                message.clone(),
+            )
+            .await?;
             return Err(message);
         }
     };
@@ -818,7 +904,7 @@ pub async fn start_recording(
                     dialog.blocking_show();
 
                     // this clears the current recording for us
-                    handle_recording_end(app, Err(e.to_string()), &mut state, recording_dir)
+                    handle_recording_end(app, Err(e.to_string()), &mut state, project_file_path)
                         .await
                         .ok();
                 }
@@ -1035,6 +1121,19 @@ pub async fn take_screenshot(
     use image::ImageEncoder;
     use std::time::Instant;
 
+    let general_settings = GeneralSettingsStore::get(&app).ok().flatten();
+    let general_settings = general_settings.as_ref();
+
+    let project_name = format_project_name(
+        general_settings
+            .and_then(|s| s.default_project_name_template.clone())
+            .as_deref(),
+        target.title().as_deref().unwrap_or("Unknown"),
+        target.kind_str(),
+        RecordingMode::Screenshot,
+        None,
+    );
+
     let image = capture_screenshot(target)
         .await
         .map_err(|e| format!("Failed to capture screenshot: {e}"))?;
@@ -1043,23 +1142,22 @@ pub async fn take_screenshot(
     let image_height = image.height();
     let image_data = image.into_raw();
 
-    let screenshots_dir = app.path().app_data_dir().unwrap().join("screenshots");
+    let filename = project_name.replace(":", ".");
+    let filename = format!("{}.cap", sanitize_filename::sanitize(&filename));
 
-    std::fs::create_dir_all(&screenshots_dir).map_err(|e| e.to_string())?;
+    let screenshots_base_dir = app.path().app_data_dir().unwrap().join("screenshots");
 
-    let date_time = if cfg!(windows) {
-        chrono::Local::now().format("%Y-%m-%d %H.%M.%S")
-    } else {
-        chrono::Local::now().format("%Y-%m-%d %H:%M:%S")
-    };
+    let project_file_path = screenshots_base_dir.join(&cap_utils::ensure_unique_filename(
+        &filename,
+        &screenshots_base_dir,
+    )?);
 
-    let id = uuid::Uuid::new_v4().to_string();
-    let cap_dir = screenshots_dir.join(format!("{id}.cap"));
-    std::fs::create_dir_all(&cap_dir).map_err(|e| e.to_string())?;
+    ensure_dir(&project_file_path)
+        .map_err(|e| format!("Failed to create screenshots directory: {e}"))?;
 
     let image_filename = "original.png";
-    let image_path = cap_dir.join(image_filename);
-    let cap_dir_key = cap_dir.to_string_lossy().to_string();
+    let image_path = project_file_path.join(image_filename);
+    let cap_dir_key = project_file_path.to_string_lossy().to_string();
 
     let pending_screenshots = app.state::<PendingScreenshots>();
     pending_screenshots.insert(
@@ -1089,8 +1187,8 @@ pub async fn take_screenshot(
 
     let meta = cap_project::RecordingMeta {
         platform: Some(Platform::default()),
-        project_path: cap_dir.clone(),
-        pretty_name: format!("Screenshot {}", date_time),
+        project_path: project_file_path.clone(),
+        pretty_name: project_name,
         sharing: None,
         inner: cap_project::RecordingMetaInner::Studio(
             cap_project::StudioRecordingMeta::SingleSegment { segment },
@@ -1102,7 +1200,7 @@ pub async fn take_screenshot(
         .map_err(|e| format!("Failed to save recording meta: {e}"))?;
 
     cap_project::ProjectConfiguration::default()
-        .write(&cap_dir)
+        .write(&project_file_path)
         .map_err(|e| format!("Failed to save project config: {e}"))?;
 
     let is_large_capture = (image_width as u64).saturating_mul(image_height as u64) > 8_000_000;
@@ -1705,6 +1803,8 @@ fn project_config_from_recording(
         segments: timeline_segments,
         zoom_segments,
         scene_segments: Vec::new(),
+        mask_segments: Vec::new(),
+        text_segments: Vec::new(),
     });
 
     config
diff --git a/apps/desktop/src-tauri/src/target_select_overlay.rs b/apps/desktop/src-tauri/src/target_select_overlay.rs
index d52424235e..1ea7a0642f 100644
--- a/apps/desktop/src-tauri/src/target_select_overlay.rs
+++ b/apps/desktop/src-tauri/src/target_select_overlay.rs
@@ -15,7 +15,7 @@ use crate::{
 };
 use scap_targets::{
     Display, DisplayId, Window, WindowId,
-    bounds::{LogicalBounds, PhysicalSize},
+    bounds::{LogicalBounds, LogicalSize, PhysicalSize},
 };
 use serde::Serialize;
 use specta::Type;
@@ -42,6 +42,7 @@ pub struct WindowUnderCursor {
 pub struct DisplayInformation {
     name: Option<String>,
     physical_size: Option<PhysicalSize>,
+    logical_size: Option<LogicalSize>,
     refresh_rate: String,
 }
 
@@ -217,6 +218,7 @@ pub async fn display_information(display_id: &str) -> Result<DisplayInformation,
     Ok(DisplayInformation {
         name: display.name(),
         physical_size: display.physical_size(),
+        logical_size: display.logical_size(),
         refresh_rate: display.refresh_rate().to_string(),
     })
 }
diff --git a/apps/desktop/src-tauri/src/update_project_names.rs b/apps/desktop/src-tauri/src/update_project_names.rs
new file mode 100644
index 0000000000..f52737ab76
--- /dev/null
+++ b/apps/desktop/src-tauri/src/update_project_names.rs
@@ -0,0 +1,322 @@
+use std::{
+    collections::HashSet,
+    path::{Path, PathBuf},
+    sync::Arc,
+};
+
+use cap_project::RecordingMeta;
+use futures::StreamExt;
+use tauri::AppHandle;
+use tokio::{fs, sync::Mutex};
+
+use crate::recordings_path;
+
+const STORE_KEY: &str = "uuid_projects_migrated";
+
+pub fn migrate_if_needed(app: &AppHandle) -> Result<(), String> {
+    use tauri_plugin_store::StoreExt;
+
+    let store = app
+        .store("store")
+        .map_err(|e| format!("Failed to access store: {}", e))?;
+
+    if store
+        .get(STORE_KEY)
+        .and_then(|v| v.as_bool())
+        .unwrap_or(false)
+    {
+        return Ok(());
+    }
+
+    if let Err(err) = futures::executor::block_on(migrate(app)) {
+        tracing::error!("Updating project names failed: {err}");
+    }
+
+    store.set(STORE_KEY, true);
+    store
+        .save()
+        .map_err(|e| format!("Failed to save store: {}", e))?;
+
+    Ok(())
+}
+
+use std::time::Instant;
+
+/// Performs a one-time migration of all UUID-named projects to pretty name-based naming.
+pub async fn migrate(app: &AppHandle) -> Result<(), String> {
+    let recordings_dir = recordings_path(app);
+    if !fs::try_exists(&recordings_dir)
+        .await
+        .map_err(|e| format!("Failed to check recordings directory: {}", e))?
+    {
+        return Ok(());
+    }
+
+    let uuid_projects = collect_uuid_projects(&recordings_dir).await?;
+    if uuid_projects.is_empty() {
+        tracing::debug!("No UUID-named projects found to migrate");
+        return Ok(());
+    }
+
+    tracing::info!(
+        "Found {} UUID-named projects to migrate",
+        uuid_projects.len()
+    );
+
+    let total_found = uuid_projects.len();
+    let concurrency_limit = std::thread::available_parallelism()
+        .map(|n| n.get())
+        .unwrap_or(4)
+        .clamp(2, 16)
+        .min(total_found);
+    tracing::debug!("Using concurrency limit of {}", concurrency_limit);
+
+    let wall_start = Instant::now();
+    let in_flight_bases = Arc::new(Mutex::new(HashSet::new()));
+
+    // (project_name, result, duration)
+    let migration_results = futures::stream::iter(uuid_projects)
+        .map(|project_path| {
+            let in_flight = in_flight_bases.clone();
+            async move {
+                let project_name = project_path
+                    .file_name()
+                    .map(|s| s.to_string_lossy().into_owned())
+                    .unwrap_or_else(|| project_path.display().to_string());
+
+                let start = Instant::now();
+                let res = migrate_single_project(project_path, in_flight).await;
+                let dur = start.elapsed();
+
+                (project_name, res, dur)
+            }
+        })
+        .buffer_unordered(concurrency_limit)
+        .collect::<Vec<_>>()
+        .await;
+
+    let wall_elapsed = wall_start.elapsed();
+
+    let mut migrated = 0usize;
+    let mut skipped = 0usize;
+    let mut failed = 0usize;
+
+    let mut total_ms: u128 = 0;
+    let mut per_project: Vec<(String, std::time::Duration)> =
+        Vec::with_capacity(migration_results.len());
+
+    for (name, result, dur) in migration_results.into_iter() {
+        match result {
+            Ok(ProjectMigrationResult::Migrated) => migrated += 1,
+            Ok(ProjectMigrationResult::Skipped) => skipped += 1,
+            Err(_) => failed += 1,
+        }
+        total_ms += dur.as_millis();
+        per_project.push((name, dur));
+    }
+
+    let avg_ms = if total_found > 0 {
+        (total_ms as f64) / (total_found as f64)
+    } else {
+        0.0
+    };
+
+    // Sort by duration descending to pick slowest
+    per_project.sort_by(|a, b| b.1.cmp(&a.1));
+
+    tracing::info!(
+        total_found = total_found,
+        migrated = migrated,
+        skipped = skipped,
+        failed = failed,
+        wall_ms = wall_elapsed.as_millis(),
+        avg_per_project_ms = ?avg_ms,
+        "Migration complete"
+    );
+
+    // Log top slowest N (choose 5 or less)
+    let top_n = 5.min(per_project.len());
+    if top_n > 0 {
+        tracing::info!("Top {} slowest project migrations:", top_n);
+        for (name, dur) in per_project.into_iter().take(top_n) {
+            tracing::info!(project = %name, ms = dur.as_millis());
+        }
+    }
+
+    Ok(())
+}
+
+async fn collect_uuid_projects(recordings_dir: &Path) -> Result<Vec<PathBuf>, String> {
+    let mut uuid_projects = Vec::new();
+    let mut entries = fs::read_dir(recordings_dir)
+        .await
+        .map_err(|e| format!("Failed to read recordings directory: {}", e))?;
+
+    while let Some(entry) = entries
+        .next_entry()
+        .await
+        .map_err(|e| format!("Failed to read directory entry: {}", e))?
+    {
+        let path = entry.path();
+        if !path.is_dir() {
+            continue;
+        }
+
+        let Some(filename) = path.file_name().and_then(|s| s.to_str()) else {
+            continue;
+        };
+
+        if filename.ends_with(".cap") && fast_is_project_filename_uuid(filename) {
+            uuid_projects.push(path);
+        }
+    }
+
+    Ok(uuid_projects)
+}
+
+#[derive(Debug)]
+enum ProjectMigrationResult {
+    Migrated,
+    Skipped,
+}
+
+async fn migrate_single_project(
+    path: PathBuf,
+    in_flight_basis: Arc<Mutex<HashSet<String>>>,
+) -> Result<ProjectMigrationResult, String> {
+    let filename = path
+        .file_name()
+        .and_then(|s| s.to_str())
+        .unwrap_or("unknown");
+
+    let meta = match RecordingMeta::load_for_project(&path) {
+        Ok(meta) => meta,
+        Err(e) => {
+            tracing::warn!("Failed to load metadata for {}: {}", filename, e);
+            return Err(format!("Failed to load metadata: {}", e));
+        }
+    };
+
+    // Lock on the base sanitized name to prevent concurrent migrations with same target
+    let base_name = sanitize_filename::sanitize(meta.pretty_name.replace(":", "."));
+    {
+        let mut in_flight = in_flight_basis.lock().await;
+        let mut wait_count = 0;
+        while !in_flight.insert(base_name.clone()) {
+            wait_count += 1;
+            if wait_count == 1 {
+                tracing::debug!(
+                    "Project {} waiting for concurrent migration of base name \"{}\"",
+                    filename,
+                    base_name
+                );
+            }
+            drop(in_flight);
+            tokio::time::sleep(std::time::Duration::from_millis(5)).await;
+            in_flight = in_flight_basis.lock().await;
+        }
+        if wait_count > 0 {
+            tracing::debug!(
+                "Project {} acquired lock for \"{}\" after {} waits",
+                filename,
+                base_name,
+                wait_count
+            );
+        }
+    }
+
+    let result = migrate_project_filename_async(&path, &meta).await;
+
+    in_flight_basis.lock().await.remove(&base_name);
+
+    match result {
+        Ok(new_path) => {
+            if new_path != path {
+                let new_name = new_path.file_name().unwrap().to_string_lossy();
+                tracing::info!("Updated name: \"{}\" -> \"{}\"", filename, new_name);
+                Ok(ProjectMigrationResult::Migrated)
+            } else {
+                Ok(ProjectMigrationResult::Skipped)
+            }
+        }
+        Err(e) => {
+            tracing::error!("Failed to migrate {}: {}", filename, e);
+            Err(e)
+        }
+    }
+}
+
+/// Migrates a project filename from UUID to sanitized pretty name
+async fn migrate_project_filename_async(
+    project_path: &Path,
+    meta: &RecordingMeta,
+) -> Result<PathBuf, String> {
+    let sanitized = sanitize_filename::sanitize(meta.pretty_name.replace(":", "."));
+
+    let filename = if sanitized.ends_with(".cap") {
+        sanitized
+    } else {
+        format!("{}.cap", sanitized)
+    };
+
+    let parent_dir = project_path
+        .parent()
+        .ok_or("Project path has no parent directory")?;
+
+    let unique_filename = cap_utils::ensure_unique_filename(&filename, parent_dir)
+        .map_err(|e| format!("Failed to ensure unique filename: {}", e))?;
+
+    let final_path = parent_dir.join(&unique_filename);
+
+    fs::rename(project_path, &final_path)
+        .await
+        .map_err(|e| format!("Failed to rename project directory: {}", e))?;
+
+    Ok(final_path)
+}
+
+pub fn fast_is_project_filename_uuid(filename: &str) -> bool {
+    if filename.len() != 40 || !filename.ends_with(".cap") {
+        return false;
+    }
+
+    let uuid_part = &filename[..36];
+
+    if uuid_part.as_bytes()[8] != b'-'
+        || uuid_part.as_bytes()[13] != b'-'
+        || uuid_part.as_bytes()[18] != b'-'
+        || uuid_part.as_bytes()[23] != b'-'
+    {
+        return false;
+    }
+
+    uuid_part.chars().all(|c| c.is_ascii_hexdigit() || c == '-')
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_is_project_filename_uuid() {
+        // Valid UUID
+        assert!(fast_is_project_filename_uuid(
+            "a1b2c3d4-e5f6-7890-abcd-ef1234567890.cap"
+        ));
+        assert!(fast_is_project_filename_uuid(
+            "00000000-0000-0000-0000-000000000000.cap"
+        ));
+
+        // Invalid cases
+        assert!(!fast_is_project_filename_uuid("my-project-name.cap"));
+        assert!(!fast_is_project_filename_uuid(
+            "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
+        ));
+        assert!(!fast_is_project_filename_uuid(
+            "a1b2c3d4-e5f6-7890-abcd-ef1234567890.txt"
+        ));
+        assert!(!fast_is_project_filename_uuid(
+            "g1b2c3d4-e5f6-7890-abcd-ef1234567890.cap"
+        ));
+    }
+}
diff --git a/apps/desktop/src/components/Toggle.tsx b/apps/desktop/src/components/Toggle.tsx
index 60f29d664e..c16036fd33 100644
--- a/apps/desktop/src/components/Toggle.tsx
+++ b/apps/desktop/src/components/Toggle.tsx
@@ -40,8 +40,8 @@ export function Toggle(
 	const [local, others] = splitProps(props, ["size"]);
 
 	return (
-		<KSwitch {...others}>
-			<KSwitch.Input class="peer" />
+		<KSwitch class="relative" {...others}>
+			<KSwitch.Input class="peer absolute inset-0 opacity-0 cursor-pointer" />
 			<KSwitch.Control class={toggleControlStyles({ size: local.size })}>
 				<KSwitch.Thumb class={toggleThumbStyles({ size: local.size })} />
 			</KSwitch.Control>
diff --git a/apps/desktop/src/routes/(window-chrome)/settings/general.tsx b/apps/desktop/src/routes/(window-chrome)/settings/general.tsx
index 38a48f8b4f..bf282d4b5b 100644
--- a/apps/desktop/src/routes/(window-chrome)/settings/general.tsx
+++ b/apps/desktop/src/routes/(window-chrome)/settings/general.tsx
@@ -6,6 +6,7 @@ import {
 } from "@tauri-apps/plugin-notification";
 import { type OsType, type } from "@tauri-apps/plugin-os";
 import "@total-typescript/ts-reset/filter-boolean";
+import { Collapsible } from "@kobalte/core/collapsible";
 import { CheckMenuItem, Menu, MenuItem } from "@tauri-apps/api/menu";
 import { confirm } from "@tauri-apps/plugin-dialog";
 import { cx } from "cva";
@@ -13,7 +14,9 @@ import {
 	createEffect,
 	createMemo,
 	createResource,
+	createSignal,
 	For,
+	onMount,
 	type ParentProps,
 	Show,
 } from "solid-js";
@@ -102,6 +105,9 @@ const INSTANT_MODE_RESOLUTION_OPTIONS = [
 	label: string;
 }[];
 
+const DEFAULT_PROJECT_NAME_TEMPLATE =
+	"{target_name} ({target_kind}) {date} {time}";
+
 export default function GeneralSettings() {
 	const [store] = createResource(() => generalSettingsStore.get());
 
@@ -568,6 +574,13 @@ function Inner(props: { initialStore: GeneralSettingsStore | null }) {
 					/>
 				</SettingGroup>
 
+				<DefaultProjectNameCard
+					onChange={(value) =>
+						handleChange("defaultProjectNameTemplate", value)
+					}
+					value={settings.defaultProjectNameTemplate ?? null}
+				/>
+
 				<ExcludedWindowsCard
 					excludedWindows={excludedWindows()}
 					availableWindows={availableWindows()}
@@ -653,6 +666,209 @@ function ServerURLSetting(props: {
 	);
 }
 
+function DefaultProjectNameCard(props: {
+	value: string | null;
+	onChange: (name: string | null) => Promise<void>;
+}) {
+	const MOMENT_EXAMPLE_TEMPLATE = "{moment:DDDD, MMMM D, YYYY h:mm A}";
+	const macos = type() === "macos";
+	const today = new Date();
+	const datetime = new Date(
+		today.getFullYear(),
+		today.getMonth(),
+		today.getDate(),
+		macos ? 9 : 12,
+		macos ? 41 : 0,
+		0,
+		0,
+	).toISOString();
+
+	let inputRef: HTMLInputElement | undefined;
+
+	const dateString = today.toISOString().split("T")[0];
+	const initialTemplate = () => props.value ?? DEFAULT_PROJECT_NAME_TEMPLATE;
+
+	const [inputValue, setInputValue] = createSignal<string>(initialTemplate());
+	const [preview, setPreview] = createSignal<string | null>(null);
+	const [momentExample, setMomentExample] = createSignal("");
+
+	async function updatePreview(val = inputValue()) {
+		const formatted = await commands.formatProjectName(
+			val,
+			macos ? "Safari" : "Chrome",
+			"Window",
+			"instant",
+			datetime,
+		);
+		setPreview(formatted);
+	}
+
+	onMount(() => {
+		commands
+			.formatProjectName(
+				MOMENT_EXAMPLE_TEMPLATE,
+				macos ? "Safari" : "Chrome",
+				"Window",
+				"instant",
+				datetime,
+			)
+			.then(setMomentExample);
+
+		const seed = initialTemplate();
+		setInputValue(seed);
+		if (inputRef) inputRef.value = seed;
+		updatePreview(seed);
+	});
+
+	const isSaveDisabled = () => {
+		const input = inputValue();
+		return (
+			!input ||
+			input === (props.value ?? DEFAULT_PROJECT_NAME_TEMPLATE) ||
+			input.length <= 3
+		);
+	};
+
+	function CodeView(props: { children: string }) {
+		return (
+			<button
+				type="button"
+				title="Click to copy"
+				class="bg-gray-1 hover:bg-gray-5 rounded-md m-0.5 p-0.5 cursor-pointer transition-[color,background-color,transform] ease-out duration-200 active:scale-95"
+				onClick={() => commands.writeClipboardString(props.children)}
+			>
+				<code>{props.children}</code>
+			</button>
+		);
+	}
+
+	return (
+		<div class="flex flex-col gap-3 px-4 py-3 mt-6 rounded-xl border border-gray-3 bg-gray-2">
+			<div class="flex flex-col gap-3 sm:flex-row sm:items-start sm:justify-between">
+				<div class="flex flex-col gap-1">
+					<p class="text-sm text-gray-12">Default Project Name</p>
+					<p class="text-xs text-gray-10">
+						Choose the template to use as the default project and file name.
+					</p>
+				</div>
+				<div class="flex flex-shrink-0 gap-2">
+					<Button
+						size="sm"
+						variant="gray"
+						disabled={
+							inputValue() === DEFAULT_PROJECT_NAME_TEMPLATE &&
+							inputValue() !== props.value
+						}
+						onClick={async () => {
+							await props.onChange(null);
+							const newTemplate = initialTemplate();
+							setInputValue(newTemplate);
+							if (inputRef) inputRef.value = newTemplate;
+							await updatePreview(newTemplate);
+						}}
+					>
+						Reset
+					</Button>
+
+					<Button
+						size="sm"
+						variant="dark"
+						disabled={isSaveDisabled()}
+						onClick={async () => {
+							await props.onChange(inputValue() ?? null);
+							await updatePreview();
+						}}
+					>
+						Save
+					</Button>
+				</div>
+			</div>
+
+			<div class="flex flex-col gap-2 w-full">
+				<Input
+					autocorrect="off"
+					ref={inputRef}
+					type="text"
+					class="bg-gray-3 font-mono"
+					value={inputValue()}
+					onInput={(e) => {
+						setInputValue(e.currentTarget.value);
+						updatePreview(e.currentTarget.value);
+					}}
+				/>
+
+				<div class="w-full flex items-center py-2 px-2 rounded-lg bg-gray-transparent-50 border border-dashed border-gray-5">
+					<IconCapLogo class="size-4 pointer-events-none mr-2" />
+					<p class="whitespace-pre-wrap">{preview()}</p>
+				</div>
+
+				<Collapsible class="w-full rounded-lg">
+					<Collapsible.Trigger class="group inline-flex items-center w-full text-xs rounded-lg outline-none px-0.5 py-1">
+						<IconCapChevronDown class="size-4 ui-group-expanded:rotate-180 transition-transform duration-300 ease-in-out" />
+						<p class="py-0.5 px-1">How to customize?</p>
+					</Collapsible.Trigger>
+
+					<Collapsible.Content class="opacity-0 transition animate-collapsible-up ui-expanded:animate-collapsible-down ui-expanded:opacity-100 text-xs text-gray-12 space-y-3 px-1 pb-2">
+						<p class="border-t pt-3">
+							Use placeholders in your template that will be automatically
+							filled in.
+						</p>
+
+						<div class="space-y-1">
+							<p class="font-medium text-foreground">Recording Mode</p>
+							<p>
+								<CodeView>{"{recording_mode}"}</CodeView> → "Studio", "Instant",
+								or "Screenshot"
+							</p>
+							<p>
+								<CodeView>{"{mode}"}</CodeView> → "studio", "instant", or
+								"screenshot"
+							</p>
+						</div>
+
+						<div class="space-y-1">
+							<p class="font-medium text-foreground">Target</p>
+							<p>
+								<CodeView>{"{target_kind}"}</CodeView> → "Display", "Window", or
+								"Area"
+							</p>
+							<p>
+								<CodeView>{"{target_name}"}</CodeView> → The name of the monitor
+								or the title of the app depending on the recording mode.
+							</p>
+						</div>
+
+						<div class="space-y-1">
+							<p class="font-medium text-foreground">Date &amp; Time</p>
+							<p>
+								<CodeView>{"{date}"}</CodeView> → {dateString}
+							</p>
+							<p>
+								<CodeView>{"{time}"}</CodeView> →{" "}
+								{macos ? "09:41 AM" : "12:00 PM"}
+							</p>
+						</div>
+
+						<div class="space-y-1">
+							<p class="font-medium text-foreground">Custom Formats</p>
+							<p>
+								You can also use a custom format for time. The placeholders are
+								case-sensitive. For 24-hour time, use{" "}
+								<CodeView>{"{moment:HH:mm}"}</CodeView> or use lower cased{" "}
+								<code>hh</code> for 12-hour format.
+							</p>
+							<p class="flex flex-col items-start pt-1">
+								<CodeView>{MOMENT_EXAMPLE_TEMPLATE}</CodeView> →{" "}
+								{momentExample()}
+							</p>
+						</div>
+					</Collapsible.Content>
+				</Collapsible>
+			</div>
+		</div>
+	);
+}
+
 function ExcludedWindowsCard(props: {
 	excludedWindows: WindowExclusion[];
 	availableWindows: CaptureWindow[];
@@ -735,7 +951,7 @@ function ExcludedWindowsCard(props: {
 						</p>
 					</Show>
 				</div>
-				<div class="flex gap-2">
+				<div class="flex flex-shrink-0 gap-2">
 					<Button
 						variant="gray"
 						size="sm"
diff --git a/apps/desktop/src/routes/editor/CaptionsTab.tsx b/apps/desktop/src/routes/editor/CaptionsTab.tsx
index ff46d77452..6b33586ef9 100644
--- a/apps/desktop/src/routes/editor/CaptionsTab.tsx
+++ b/apps/desktop/src/routes/editor/CaptionsTab.tsx
@@ -1,16 +1,26 @@
 import { Button } from "@cap/ui-solid";
 import { Select as KSelect } from "@kobalte/core/select";
 import { createWritableMemo } from "@solid-primitives/memo";
-import { createElementSize } from "@solid-primitives/resize-observer";
 import { appLocalDataDir, join } from "@tauri-apps/api/path";
 import { exists } from "@tauri-apps/plugin-fs";
-import { batch, createEffect, createSignal, onMount, Show } from "solid-js";
-import { createStore } from "solid-js/store";
+import { cx } from "cva";
+import {
+	createEffect,
+	createMemo,
+	createSignal,
+	For,
+	on,
+	onMount,
+	Show,
+} from "solid-js";
 import toast from "solid-toast";
 import { Toggle } from "~/components/Toggle";
-import type { CaptionSegment, CaptionSettings } from "~/utils/tauri";
+import { defaultCaptionSettings } from "~/store/captions";
+import type { CaptionSettings } from "~/utils/tauri";
 import { commands, events } from "~/utils/tauri";
-import { FPS, OUTPUT_SIZE, useEditorContext } from "./context";
+import IconLucideCheck from "~icons/lucide/check";
+import IconLucideDownload from "~icons/lucide/download";
+import { useEditorContext } from "./context";
 import { TextInput } from "./TextInput";
 import {
 	Field,
@@ -23,10 +33,11 @@ import {
 	topLeftAnimateClasses,
 } from "./ui";
 
-// Model information
 interface ModelOption {
 	name: string;
 	label: string;
+	size: string;
+	description: string;
 }
 
 interface LanguageOption {
@@ -34,17 +45,19 @@ interface LanguageOption {
 	label: string;
 }
 
-interface FontOption {
-	value: string;
-	label: string;
-}
-
 const MODEL_OPTIONS: ModelOption[] = [
-	{ name: "tiny", label: "Tiny (75MB) - Fastest, less accurate" },
-	{ name: "base", label: "Base (142MB) - Fast, decent accuracy" },
-	{ name: "small", label: "Small (466MB) - Balanced speed/accuracy" },
-	{ name: "medium", label: "Medium (1.5GB) - Slower, more accurate" },
-	{ name: "large-v3", label: "Large (3GB) - Slowest, most accurate" },
+	{
+		name: "small",
+		label: "Small",
+		size: "466MB",
+		description: "Balanced speed/accuracy",
+	},
+	{
+		name: "medium",
+		label: "Medium",
+		size: "1.5GB",
+		description: "Slower, more accurate",
+	},
 ];
 
 const LANGUAGE_OPTIONS: LanguageOption[] = [
@@ -64,60 +77,29 @@ const LANGUAGE_OPTIONS: LanguageOption[] = [
 	{ code: "zh", label: "Chinese" },
 ];
 
-const DEFAULT_MODEL = "tiny";
-const MODEL_FOLDER = "transcription_models";
-
-// Custom flat button component since we can't import it
-function FlatButton(props: {
-	class?: string;
-	onClick?: () => void;
-	disabled?: boolean;
-	children: any;
-}) {
-	return (
-		<button
-			class={`px-3 py-1.5 bg-blue-500 hover:bg-blue-600 text-white rounded-md transition-colors ${
-				props.class || ""
-			}`}
-			onClick={props.onClick}
-			disabled={props.disabled}
-		>
-			{props.children}
-		</button>
-	);
+interface PositionOption {
+	value: string;
+	label: string;
 }
 
+const POSITION_OPTIONS: PositionOption[] = [
+	{ value: "top-left", label: "Top Left" },
+	{ value: "top-center", label: "Top Center" },
+	{ value: "top-right", label: "Top Right" },
+	{ value: "bottom-left", label: "Bottom Left" },
+	{ value: "bottom-center", label: "Bottom Center" },
+	{ value: "bottom-right", label: "Bottom Right" },
+];
+
+const DEFAULT_MODEL = "small";
+const MODEL_FOLDER = "transcription_models";
+
 const fontOptions = [
 	{ value: "System Sans-Serif", label: "System Sans-Serif" },
 	{ value: "System Serif", label: "System Serif" },
 	{ value: "System Monospace", label: "System Monospace" },
 ];
 
-// Add type definitions at the top
-interface CaptionsResponse {
-	segments: CaptionSegment[];
-}
-
-// Color conversion types
-type RGB = [number, number, number];
-
-// Helper functions for color conversion
-function hexToRgb(hex: string): RGB {
-	const result = /^#?([a-f\d]{2})([a-f\d]{2})([a-f\d]{2})$/i.exec(hex);
-	return result
-		? [
-				parseInt(result[1], 16),
-				parseInt(result[2], 16),
-				parseInt(result[3], 16),
-			]
-		: [0, 0, 0];
-}
-
-function rgbToHex(rgb: RGB): string {
-	return `#${rgb.map((x) => x.toString(16).padStart(2, "0")).join("")}`;
-}
-
-// Add RgbInput component at the top level
 function RgbInput(props: { value: string; onChange: (value: string) => void }) {
 	const [text, setText] = createWritableMemo(() => props.value);
 	let prevColor = props.value;
@@ -164,115 +146,29 @@ function RgbInput(props: { value: string; onChange: (value: string) => void }) {
 	);
 }
 
-// Add scroll position preservation for the container
 export function CaptionsTab() {
 	const { project, setProject, editorInstance, editorState } =
 		useEditorContext();
 
-	// Scroll management
-	let scrollContainerRef: HTMLDivElement | undefined;
-	const [scrollState, setScrollState] = createStore({
-		lastScrollTop: 0,
-		isScrolling: false,
-	});
-
-	// Track container size changes
-	const size = createElementSize(() => scrollContainerRef);
-
-	// Create a local store for caption settings to avoid direct project mutations
-	const [captionSettings, setCaptionSettings] = createStore(
-		project?.captions?.settings || {
-			enabled: false,
-			font: "Arial",
-			size: 24,
-			color: "#FFFFFF",
-			backgroundColor: "#000000",
-			backgroundOpacity: 80,
-			position: "bottom",
-			bold: true,
-			italic: false,
-			outline: true,
-			outlineColor: "#000000",
-			exportWithSubtitles: false,
-		},
-	);
+	const getSetting = <K extends keyof CaptionSettings>(
+		key: K,
+	): NonNullable<CaptionSettings[K]> =>
+		(project?.captions?.settings?.[key] ??
+			defaultCaptionSettings[key]) as NonNullable<CaptionSettings[K]>;
 
-	// Sync caption settings with project and update player
-	createEffect(() => {
-		if (!project?.captions) return;
-
-		const settings = captionSettings;
-
-		// Only update if there are actual changes
-		if (
-			JSON.stringify(settings) !== JSON.stringify(project.captions.settings)
-		) {
-			batch(() => {
-				// Update project settings
-				setProject("captions", "settings", settings);
-
-				// Force player refresh
-				events.renderFrameEvent.emit({
-					frame_number: Math.floor(editorState.playbackTime * FPS),
-					fps: FPS,
-					resolution_base: OUTPUT_SIZE,
-				});
-			});
-		}
-	});
-
-	// Sync project settings to local store
-	createEffect(() => {
-		if (project?.captions?.settings) {
-			setCaptionSettings(project.captions.settings);
-		}
-	});
-
-	// Helper function to update caption settings
-	const updateCaptionSetting = (key: keyof CaptionSettings, value: any) => {
+	const updateCaptionSetting = <K extends keyof CaptionSettings>(
+		key: K,
+		value: CaptionSettings[K],
+	) => {
 		if (!project?.captions) return;
 
-		// Store scroll position before update
-		if (scrollContainerRef) {
-			setScrollState("lastScrollTop", scrollContainerRef.scrollTop);
-		}
-
-		// Update local store
-		setCaptionSettings({
-			...captionSettings,
-			[key]: value,
-		});
-
-		// For font changes, force an immediate player update
-		if (key === "font") {
-			events.renderFrameEvent.emit({
-				frame_number: Math.floor(editorState.playbackTime * FPS),
-				fps: FPS,
-				resolution_base: OUTPUT_SIZE,
-			});
-		}
+		setProject("captions", "settings", key, value);
 	};
 
-	// Restore scroll position after any content changes
-	createEffect(() => {
-		// Track any size changes
-		const _ = size.height;
-
-		// Restore scroll position if we have one
-		if (scrollContainerRef && scrollState.lastScrollTop > 0) {
-			requestAnimationFrame(() => {
-				scrollContainerRef!.scrollTop = scrollState.lastScrollTop;
-			});
-		}
-	});
-
-	// Add model selection state
 	const [selectedModel, setSelectedModel] = createSignal(DEFAULT_MODEL);
 	const [selectedLanguage, setSelectedLanguage] = createSignal("auto");
 	const [downloadedModels, setDownloadedModels] = createSignal<string[]>([]);
 
-	// States for captions
-	const [modelExists, setModelExists] = createSignal(false);
 	const [isDownloading, setIsDownloading] = createSignal(false);
 	const [downloadProgress, setDownloadProgress] = createSignal(0);
 	const [downloadingModel, setDownloadingModel] = createSignal<string | null>(
@@ -280,48 +176,30 @@ export function CaptionsTab() {
 	);
 	const [isGenerating, setIsGenerating] = createSignal(false);
 	const [hasAudio, setHasAudio] = createSignal(false);
-	const [modelPath, setModelPath] = createSignal("");
-	const [currentCaption, setCurrentCaption] = createSignal<string | null>(null);
-
-	// Ensure captions object is initialized in project config
-	createEffect(() => {
-		if (!project || !editorInstance) return;
-
-		if (!project.captions) {
-			// Initialize captions with default settings
-			setProject("captions", {
-				segments: [],
-				settings: {
-					enabled: false,
-					font: "Arial",
-					size: 24,
-					color: "#FFFFFF",
-					backgroundColor: "#000000",
-					backgroundOpacity: 80,
-					position: "bottom",
-					bold: true,
-					italic: false,
-					outline: true,
-					outlineColor: "#000000",
-					exportWithSubtitles: false,
-				},
-			});
-		}
-	});
 
-	// Check downloaded models on mount
+	createEffect(
+		on(
+			() => project && editorInstance && !project.captions,
+			(shouldInit) => {
+				if (shouldInit) {
+					setProject("captions", {
+						segments: [],
+						settings: { ...defaultCaptionSettings },
+					});
+				}
+			},
+		),
+	);
+
 	onMount(async () => {
 		try {
-			// Check for downloaded models
 			const appDataDirPath = await appLocalDataDir();
 			const modelsPath = await join(appDataDirPath, MODEL_FOLDER);
 
-			// Create models directory if it doesn't exist
 			if (!(await exists(modelsPath))) {
 				await commands.createDir(modelsPath, true);
 			}
 
-			// Check which models are already downloaded
 			const models = await Promise.all(
 				MODEL_OPTIONS.map(async (model) => {
 					const downloaded = await checkModelExists(model.name);
@@ -329,25 +207,32 @@ export function CaptionsTab() {
 				}),
 			);
 
-			// Set available models
 			setDownloadedModels(
 				models.filter((m) => m.downloaded).map((m) => m.name),
 			);
 
-			// Check if current model exists
-			if (selectedModel()) {
-				setModelExists(await checkModelExists(selectedModel()));
+			const savedModel = localStorage.getItem("selectedTranscriptionModel");
+			if (savedModel && MODEL_OPTIONS.some((m) => m.name === savedModel)) {
+				setSelectedModel(savedModel);
+			}
+
+			const savedLanguage = localStorage.getItem(
+				"selectedTranscriptionLanguage",
+			);
+			if (
+				savedLanguage &&
+				LANGUAGE_OPTIONS.some((l) => l.code === savedLanguage)
+			) {
+				setSelectedLanguage(savedLanguage);
 			}
 
-			// Check if the video has audio
-			if (editorInstance && editorInstance.recordings) {
+			if (editorInstance?.recordings) {
 				const hasAudioTrack = editorInstance.recordings.segments.some(
 					(segment) => segment.mic !== null || segment.system_audio !== null,
 				);
 				setHasAudio(hasAudioTrack);
 			}
 
-			// Restore download state if there was an ongoing download
 			const downloadState = localStorage.getItem("modelDownloadState");
 			if (downloadState) {
 				const { model, progress } = JSON.parse(downloadState);
@@ -364,70 +249,40 @@ export function CaptionsTab() {
 		}
 	});
 
-	// Save download state when it changes
-	createEffect(() => {
-		if (isDownloading() && downloadingModel()) {
-			localStorage.setItem(
-				"modelDownloadState",
-				JSON.stringify({
-					model: downloadingModel(),
-					progress: downloadProgress(),
-				}),
-			);
-		} else {
-			localStorage.removeItem("modelDownloadState");
-		}
-	});
-
-	// Effect to update current caption based on playback time
-	createEffect(() => {
-		if (!project?.captions?.segments || editorState.playbackTime === undefined)
-			return;
-
-		const time = editorState.playbackTime;
-		const segments = project.captions.segments;
-
-		// Binary search for the correct segment
-		const findSegment = (
-			time: number,
-			segments: CaptionSegment[],
-		): CaptionSegment | undefined => {
-			let left = 0;
-			let right = segments.length - 1;
-
-			while (left <= right) {
-				const mid = Math.floor((left + right) / 2);
-				const segment = segments[mid];
-
-				if (time >= segment.start && time < segment.end) {
-					return segment;
-				}
-
-				if (time < segment.start) {
-					right = mid - 1;
+	createEffect(
+		on(
+			() => [isDownloading(), downloadingModel(), downloadProgress()] as const,
+			([downloading, model, progress]) => {
+				if (downloading && model) {
+					localStorage.setItem(
+						"modelDownloadState",
+						JSON.stringify({ model, progress }),
+					);
 				} else {
-					left = mid + 1;
+					localStorage.removeItem("modelDownloadState");
 				}
-			}
-
-			return undefined;
-		};
+			},
+		),
+	);
 
-		// Find the current segment using binary search
-		const currentSegment = findSegment(time, segments);
+	createEffect(
+		on(selectedModel, (model) => {
+			if (model) localStorage.setItem("selectedTranscriptionModel", model);
+		}),
+	);
 
-		// Only update if the caption has changed
-		if (currentSegment?.text !== currentCaption()) {
-			setCurrentCaption(currentSegment?.text || null);
-		}
-	});
+	createEffect(
+		on(selectedLanguage, (language) => {
+			if (language)
+				localStorage.setItem("selectedTranscriptionLanguage", language);
+		}),
+	);
 
 	const checkModelExists = async (modelName: string) => {
 		const appDataDirPath = await appLocalDataDir();
 		const modelsPath = await join(appDataDirPath, MODEL_FOLDER);
-		const modelPath = await join(modelsPath, `${modelName}.bin`);
-		setModelPath(modelPath);
-		return await commands.checkModelExists(modelPath);
+		const path = await join(modelsPath, `${modelName}.bin`);
+		return await commands.checkModelExists(path);
 	};
 
 	const downloadModel = async () => {
@@ -437,7 +292,6 @@ export function CaptionsTab() {
 			setDownloadProgress(0);
 			setDownloadingModel(modelToDownload);
 
-			// Create the directory if it doesn't exist
 			const appDataDirPath = await appLocalDataDir();
 			const modelsPath = await join(appDataDirPath, MODEL_FOLDER);
 			const modelPath = await join(modelsPath, `${modelToDownload}.bin`);
@@ -448,20 +302,14 @@ export function CaptionsTab() {
 				console.error("Error creating directory:", err);
 			}
 
-			// Set up progress listener
 			const unlisten = await events.downloadProgress.listen((event) => {
 				setDownloadProgress(event.payload.progress);
 			});
 
-			// Download the model
 			await commands.downloadWhisperModel(modelToDownload, modelPath);
-
-			// Clean up listener
 			unlisten();
 
-			// Update downloaded models list
 			setDownloadedModels((prev) => [...prev, modelToDownload]);
-			setModelExists(true);
 			toast.success("Transcription model downloaded successfully!");
 		} catch (error) {
 			console.error("Error downloading model:", error);
@@ -489,7 +337,6 @@ export function CaptionsTab() {
 				`${selectedModel()}.bin`,
 			);
 
-			// Verify file existence before proceeding
 			const result = await commands.transcribeAudio(
 				videoPath,
 				currentModelPath,
@@ -497,7 +344,6 @@ export function CaptionsTab() {
 			);
 
 			if (result && result.segments.length > 0) {
-				// Update project with the new segments
 				setProject("captions", "segments", result.segments);
 				updateCaptionSetting("enabled", true);
 				toast.success("Captions generated successfully!");
@@ -516,7 +362,6 @@ export function CaptionsTab() {
 				errorMessage = error;
 			}
 
-			// Provide more user-friendly error messages
 			if (errorMessage.includes("No audio stream found")) {
 				errorMessage = "No audio found in the video file";
 			} else if (errorMessage.includes("Model file not found")) {
@@ -532,7 +377,6 @@ export function CaptionsTab() {
 		}
 	};
 
-	// Segment operations that update project directly
 	const deleteSegment = (id: string) => {
 		if (!project?.captions?.segments) return;
 
@@ -573,395 +417,201 @@ export function CaptionsTab() {
 		]);
 	};
 
+	const hasCaptions = createMemo(
+		() => (project.captions?.segments?.length ?? 0) > 0,
+	);
+
 	return (
-		<div class="flex flex-col h-full">
-			<div
-				class="p-[0.75rem] text-[0.875rem] h-full transition-[height] duration-200"
-				ref={(el) => (scrollContainerRef = el)}
-				onScroll={() => {
-					if (!scrollState.isScrolling && scrollContainerRef) {
-						setScrollState("isScrolling", true);
-						setScrollState("lastScrollTop", scrollContainerRef.scrollTop);
-
-						// Reset scrolling flag after scroll ends
-						setTimeout(() => {
-							setScrollState("isScrolling", false);
-						}, 150);
-					}
-				}}
-			>
-				<Field name="Captions" icon={<IconCapMessageBubble />}>
-					<div class="flex flex-col gap-4">
-						<Subfield name="Enable Captions">
-							<Toggle
-								checked={captionSettings.enabled}
-								onChange={(checked) => updateCaptionSetting("enabled", checked)}
-							/>
+		<Field name="Captions" icon={<IconCapMessageBubble />}>
+			<div class="flex flex-col gap-4">
+				<div class="space-y-6 transition-all duration-200">
+					<div class="space-y-4">
+						<div class="space-y-2">
+							<label class="text-xs text-gray-500">Transcription Model</label>
+							<div class="grid grid-cols-2 gap-3">
+								<For each={MODEL_OPTIONS}>
+									{(model) => {
+										const isDownloaded = () =>
+											downloadedModels().includes(model.name);
+										const isSelected = () => selectedModel() === model.name;
+
+										return (
+											<button
+												class={cx(
+													"flex flex-col text-left p-3 rounded-lg border transition-all relative",
+													isSelected()
+														? "border-blue-500 bg-blue-50/50 ring-1 ring-blue-500"
+														: "border-gray-200 hover:border-gray-300 bg-white",
+												)}
+												onClick={() => {
+													setSelectedModel(model.name);
+												}}
+											>
+												<div class="flex items-center justify-between w-full mb-1">
+													<span class="font-medium text-sm">{model.label}</span>
+													<Show when={isDownloaded()}>
+														<div class="text-green-500" title="Downloaded">
+															<IconLucideCheck class="size-4" />
+														</div>
+													</Show>
+												</div>
+												<span class="text-xs text-gray-500 mb-2">
+													{model.description}
+												</span>
+												<div class="flex items-center justify-between mt-auto">
+													<span class="text-[10px] px-1.5 py-0.5 bg-gray-100 rounded text-gray-500">
+														{model.size}
+													</span>
+												</div>
+											</button>
+										);
+									}}
+								</For>
+							</div>
+						</div>
+
+						<Subfield name="Language">
+							<KSelect<string>
+								options={LANGUAGE_OPTIONS.map((l) => l.code)}
+								value={selectedLanguage()}
+								onChange={(value: string | null) => {
+									if (value) setSelectedLanguage(value);
+								}}
+								itemComponent={(props) => (
+									<MenuItem<typeof KSelect.Item>
+										as={KSelect.Item}
+										item={props.item}
+									>
+										<KSelect.ItemLabel class="flex-1">
+											{
+												LANGUAGE_OPTIONS.find(
+													(l) => l.code === props.item.rawValue,
+												)?.label
+											}
+										</KSelect.ItemLabel>
+									</MenuItem>
+								)}
+							>
+								<KSelect.Trigger class="flex flex-row items-center h-9 px-3 gap-2 border rounded-lg border-gray-200 w-full text-gray-700 text-sm focus:border-blue-500 focus:ring-1 focus:ring-blue-500 transition-colors">
+									<KSelect.Value<string> class="flex-1 text-left truncate">
+										{(state) => {
+											const language = LANGUAGE_OPTIONS.find(
+												(l) => l.code === state.selectedOption(),
+											);
+											return (
+												<span>{language?.label || "Select a language"}</span>
+											);
+										}}
+									</KSelect.Value>
+									<KSelect.Icon>
+										<IconCapChevronDown class="size-4 shrink-0 transform transition-transform ui-expanded:rotate-180" />
+									</KSelect.Icon>
+								</KSelect.Trigger>
+								<KSelect.Portal>
+									<PopperContent<typeof KSelect.Content>
+										as={KSelect.Content}
+										class={topLeftAnimateClasses}
+									>
+										<MenuItemList<typeof KSelect.Listbox>
+											class="max-h-48 overflow-y-auto"
+											as={KSelect.Listbox}
+										/>
+									</PopperContent>
+								</KSelect.Portal>
+							</KSelect>
 						</Subfield>
 
-						<Show when={captionSettings.enabled}>
-							<div class="space-y-6 transition-all duration-200">
-								{/* Model Selection and Download Section */}
-								<div class="space-y-4">
-									<div class="space-y-2">
-										<label class="text-xs text-gray-500">Current Model</label>
-										<KSelect<string>
-											options={MODEL_OPTIONS.filter((m) =>
-												downloadedModels().includes(m.name),
-											).map((m) => m.name)}
-											value={selectedModel()}
-											onChange={(value: string | null) => {
-												if (value) {
-													batch(() => {
-														setSelectedModel(value);
-														setModelExists(downloadedModels().includes(value));
-													});
-												}
-											}}
-											itemComponent={(props) => (
-												<MenuItem<typeof KSelect.Item>
-													as={KSelect.Item}
-													item={props.item}
-												>
-													<KSelect.ItemLabel class="flex-1">
-														{
-															MODEL_OPTIONS.find(
-																(m) => m.name === props.item.rawValue,
-															)?.label
-														}
-													</KSelect.ItemLabel>
-												</MenuItem>
-											)}
-										>
-											<KSelect.Trigger class="flex flex-row items-center h-9 px-3 gap-2 border rounded-lg border-gray-200 w-full text-gray-700 text-sm focus:border-blue-500 focus:ring-1 focus:ring-blue-500 transition-colors">
-												<KSelect.Value<string> class="flex-1 text-left truncate">
-													{(state) => {
-														const model = MODEL_OPTIONS.find(
-															(m) => m.name === state.selectedOption(),
-														);
-														return (
-															<span>{model?.label || "Select a model"}</span>
-														);
-													}}
-												</KSelect.Value>
-												<KSelect.Icon>
-													<IconCapChevronDown class="size-4 shrink-0 transform transition-transform ui-expanded:rotate-180" />
-												</KSelect.Icon>
-											</KSelect.Trigger>
-											<KSelect.Portal>
-												<PopperContent<typeof KSelect.Content>
-													as={KSelect.Content}
-													class={topLeftAnimateClasses}
-												>
-													<MenuItemList<typeof KSelect.Listbox>
-														class="max-h-48 overflow-y-auto"
-														as={KSelect.Listbox}
-													/>
-												</PopperContent>
-											</KSelect.Portal>
-										</KSelect>
-									</div>
-
+						<div class="pt-2">
+							<Show
+								when={downloadedModels().includes(selectedModel())}
+								fallback={
 									<div class="space-y-2">
-										<label class="text-xs text-gray-500">
-											Download New Model
-										</label>
-										<KSelect<string>
-											options={MODEL_OPTIONS.map((m) => m.name)}
-											value={selectedModel()}
-											onChange={(value: string | null) => {
-												if (value) setSelectedModel(value);
-											}}
+										<Button
+											class="w-full flex items-center justify-center gap-2"
+											onClick={downloadModel}
 											disabled={isDownloading()}
-											itemComponent={(props) => (
-												<MenuItem<typeof KSelect.Item>
-													as={KSelect.Item}
-													item={props.item}
-												>
-													<KSelect.ItemLabel class="flex-1">
+										>
+											<Show
+												when={isDownloading()}
+												fallback={
+													<>
+														<IconLucideDownload class="size-4" />
+														Download{" "}
 														{
 															MODEL_OPTIONS.find(
-																(m) => m.name === props.item.rawValue,
+																(m) => m.name === selectedModel(),
 															)?.label
-														}
-														{downloadedModels().includes(props.item.rawValue)
-															? " (Downloaded)"
-															: ""}
-													</KSelect.ItemLabel>
-												</MenuItem>
-											)}
-										>
-											<KSelect.Trigger class="flex flex-row items-center h-9 px-3 gap-2 border rounded-lg border-gray-200 w-full text-gray-700 text-sm focus:border-blue-500 focus:ring-1 focus:ring-blue-500 transition-colors">
-												<KSelect.Value<string> class="flex-1 text-left truncate">
-													{(state) => {
-														const model = MODEL_OPTIONS.find(
-															(m) => m.name === state.selectedOption(),
-														);
-														return (
-															<span>{model?.label || "Select a model"}</span>
-														);
-													}}
-												</KSelect.Value>
-												<KSelect.Icon>
-													<IconCapChevronDown class="size-4 shrink-0 transform transition-transform ui-expanded:rotate-180" />
-												</KSelect.Icon>
-											</KSelect.Trigger>
-											<KSelect.Portal>
-												<PopperContent<typeof KSelect.Content>
-													as={KSelect.Content}
-													class={topLeftAnimateClasses}
-												>
-													<MenuItemList<typeof KSelect.Listbox>
-														class="max-h-48 overflow-y-auto"
-														as={KSelect.Listbox}
-													/>
-												</PopperContent>
-											</KSelect.Portal>
-										</KSelect>
-									</div>
-
-									<Show
-										when={isDownloading()}
-										fallback={
-											<Button
-												class="w-full"
-												onClick={downloadModel}
-												disabled={
-													isDownloading() ||
-													downloadedModels().includes(selectedModel())
+														}{" "}
+														Model
+													</>
 												}
 											>
-												Download{" "}
-												{
-													MODEL_OPTIONS.find((m) => m.name === selectedModel())
-														?.label
-												}
-											</Button>
-										}
-									>
-										<div class="space-y-2">
-											<div class="w-full bg-gray-100 rounded-full h-2">
+												Downloading... {Math.round(downloadProgress())}%
+											</Show>
+										</Button>
+										<Show when={isDownloading()}>
+											<div class="w-full bg-gray-100 rounded-full h-1.5 overflow-hidden">
 												<div
-													class="bg-blue-500 h-2 rounded-full transition-all duration-300"
+													class="bg-blue-500 h-1.5 rounded-full transition-all duration-300"
 													style={{ width: `${downloadProgress()}%` }}
 												/>
 											</div>
-											<p class="text-xs text-center text-gray-500">
-												Downloading{" "}
-												{
-													MODEL_OPTIONS.find(
-														(m) => m.name === downloadingModel(),
-													)?.label
-												}
-												: {Math.round(downloadProgress())}%
-											</p>
-										</div>
-									</Show>
-								</div>
-
-								{/* Language Selection */}
-								<Subfield name="Language">
-									<KSelect<string>
-										options={LANGUAGE_OPTIONS.map((l) => l.code)}
-										value={selectedLanguage()}
-										onChange={(value: string | null) => {
-											if (value) setSelectedLanguage(value);
-										}}
-										itemComponent={(props) => (
-											<MenuItem<typeof KSelect.Item>
-												as={KSelect.Item}
-												item={props.item}
-											>
-												<KSelect.ItemLabel class="flex-1">
-													{
-														LANGUAGE_OPTIONS.find(
-															(l) => l.code === props.item.rawValue,
-														)?.label
-													}
-												</KSelect.ItemLabel>
-											</MenuItem>
-										)}
-									>
-										<KSelect.Trigger class="flex flex-row items-center h-9 px-3 gap-2 border rounded-lg border-gray-200 w-full text-gray-700 text-sm focus:border-blue-500 focus:ring-1 focus:ring-blue-500 transition-colors">
-											<KSelect.Value<string> class="flex-1 text-left truncate">
-												{(state) => {
-													const language = LANGUAGE_OPTIONS.find(
-														(l) => l.code === state.selectedOption(),
-													);
-													return (
-														<span>
-															{language?.label || "Select a language"}
-														</span>
-													);
-												}}
-											</KSelect.Value>
-											<KSelect.Icon>
-												<IconCapChevronDown class="size-4 shrink-0 transform transition-transform ui-expanded:rotate-180" />
-											</KSelect.Icon>
-										</KSelect.Trigger>
-										<KSelect.Portal>
-											<PopperContent<typeof KSelect.Content>
-												as={KSelect.Content}
-												class={topLeftAnimateClasses}
-											>
-												<MenuItemList<typeof KSelect.Listbox>
-													class="max-h-48 overflow-y-auto"
-													as={KSelect.Listbox}
-												/>
-											</PopperContent>
-										</KSelect.Portal>
-									</KSelect>
-								</Subfield>
-
-								{/* Generate Captions Button */}
+										</Show>
+									</div>
+								}
+							>
 								<Show when={hasAudio()}>
 									<Button
 										onClick={generateCaptions}
 										disabled={isGenerating()}
 										class="w-full"
 									>
-										{isGenerating() ? "Generating..." : "Generate Captions"}
+										{isGenerating()
+											? "Generating..."
+											: hasCaptions()
+												? "Regenerate Captions"
+												: "Generate Captions"}
 									</Button>
 								</Show>
+							</Show>
+						</div>
+					</div>
 
-								{/* Font Settings */}
-								<Field name="Font Settings" icon={<IconCapMessageBubble />}>
-									<div class="space-y-3">
-										<div class="flex flex-col gap-2">
-											<span class="text-gray-500 text-sm">Font Family</span>
-											<KSelect<string>
-												options={fontOptions.map((f) => f.value)}
-												value={captionSettings.font}
-												onChange={(value) => {
-													if (value === null) return;
-													updateCaptionSetting("font", value);
-												}}
-												itemComponent={(props) => (
-													<MenuItem<typeof KSelect.Item>
-														as={KSelect.Item}
-														item={props.item}
-													>
-														<KSelect.ItemLabel class="flex-1">
-															{
-																fontOptions.find(
-																	(f) => f.value === props.item.rawValue,
-																)?.label
-															}
-														</KSelect.ItemLabel>
-													</MenuItem>
-												)}
-											>
-												<KSelect.Trigger class="w-full flex items-center justify-between rounded-lg shadow px-3 py-2 bg-white border border-gray-300">
-													<KSelect.Value<string>>
-														{(state) =>
-															fontOptions.find(
-																(f) => f.value === state.selectedOption(),
-															)?.label
-														}
-													</KSelect.Value>
-													<KSelect.Icon>
-														<IconCapChevronDown />
-													</KSelect.Icon>
-												</KSelect.Trigger>
-												<KSelect.Portal>
-													<PopperContent<typeof KSelect.Content>
-														as={KSelect.Content}
-														class={topLeftAnimateClasses}
-													>
-														<MenuItemList<typeof KSelect.Listbox>
-															class="max-h-48 overflow-y-auto"
-															as={KSelect.Listbox}
-														/>
-													</PopperContent>
-												</KSelect.Portal>
-											</KSelect>
-										</div>
-
-										<div class="flex flex-col gap-2">
-											<span class="text-gray-500 text-sm">Size</span>
-											<Slider
-												value={[captionSettings.size || 24]}
-												onChange={(v) => updateCaptionSetting("size", v[0])}
-												minValue={12}
-												maxValue={48}
-												step={1}
-											/>
-										</div>
-
-										<div class="flex flex-col gap-2">
-											<span class="text-gray-500 text-sm">Font Color</span>
-											<RgbInput
-												value={captionSettings.color || "#FFFFFF"}
-												onChange={(value) =>
-													updateCaptionSetting("color", value)
-												}
-											/>
-										</div>
-									</div>
-								</Field>
-
-								{/* Background Settings */}
-								<Field
-									name="Background Settings"
-									icon={<IconCapMessageBubble />}
-								>
-									<div class="space-y-3">
-										<div class="flex flex-col gap-2">
-											<span class="text-gray-500 text-sm">
-												Background Color
-											</span>
-											<RgbInput
-												value={captionSettings.backgroundColor || "#000000"}
-												onChange={(value) =>
-													updateCaptionSetting("backgroundColor", value)
-												}
-											/>
-										</div>
-
-										<div class="flex flex-col gap-2">
-											<span class="text-gray-500 text-sm">
-												Background Opacity
-											</span>
-											<Slider
-												value={[captionSettings.backgroundOpacity || 80]}
-												onChange={(v) =>
-													updateCaptionSetting("backgroundOpacity", v[0])
-												}
-												minValue={0}
-												maxValue={100}
-												step={1}
-											/>
-										</div>
-									</div>
-								</Field>
-
-								{/* Position Settings */}
-								<Field name="Position" icon={<IconCapMessageBubble />}>
+					<div class={cx(!hasCaptions() && "opacity-50 pointer-events-none")}>
+						<Field name="Font Settings" icon={<IconCapMessageBubble />}>
+							<div class="space-y-3">
+								<div class="flex flex-col gap-2">
+									<span class="text-gray-500 text-sm">Font Family</span>
 									<KSelect<string>
-										options={["top", "bottom"]}
-										value={captionSettings.position || "bottom"}
+										options={fontOptions.map((f) => f.value)}
+										value={getSetting("font")}
 										onChange={(value) => {
 											if (value === null) return;
-											updateCaptionSetting("position", value);
+											updateCaptionSetting("font", value);
 										}}
+										disabled={!hasCaptions()}
 										itemComponent={(props) => (
 											<MenuItem<typeof KSelect.Item>
 												as={KSelect.Item}
 												item={props.item}
 											>
-												<KSelect.ItemLabel class="flex-1 capitalize">
-													{props.item.rawValue}
+												<KSelect.ItemLabel class="flex-1">
+													{
+														fontOptions.find(
+															(f) => f.value === props.item.rawValue,
+														)?.label
+													}
 												</KSelect.ItemLabel>
 											</MenuItem>
 										)}
 									>
 										<KSelect.Trigger class="w-full flex items-center justify-between rounded-lg shadow px-3 py-2 bg-white border border-gray-300">
 											<KSelect.Value<string>>
-												{(state) => (
-													<span class="capitalize">
-														{state.selectedOption()}
-													</span>
-												)}
+												{(state) =>
+													fontOptions.find(
+														(f) => f.value === state.selectedOption(),
+													)?.label
+												}
 											</KSelect.Value>
 											<KSelect.Icon>
 												<IconCapChevronDown />
@@ -973,174 +623,282 @@ export function CaptionsTab() {
 												class={topLeftAnimateClasses}
 											>
 												<MenuItemList<typeof KSelect.Listbox>
+													class="max-h-48 overflow-y-auto"
 													as={KSelect.Listbox}
 												/>
 											</PopperContent>
 										</KSelect.Portal>
 									</KSelect>
-								</Field>
-
-								{/* Style Options */}
-								<Field name="Style Options" icon={<IconCapMessageBubble />}>
-									<div class="space-y-3">
-										<div class="flex flex-col gap-4">
-											<Subfield name="Bold">
-												<Toggle
-													checked={captionSettings.bold}
-													onChange={(checked) =>
-														updateCaptionSetting("bold", checked)
-													}
-												/>
-											</Subfield>
-											<Subfield name="Italic">
-												<Toggle
-													checked={captionSettings.italic}
-													onChange={(checked) =>
-														updateCaptionSetting("italic", checked)
-													}
-												/>
-											</Subfield>
-											<Subfield name="Outline">
-												<Toggle
-													checked={captionSettings.outline}
-													onChange={(checked) =>
-														updateCaptionSetting("outline", checked)
-													}
-												/>
-											</Subfield>
-										</div>
-
-										<Show when={captionSettings.outline}>
-											<div class="flex flex-col gap-2">
-												<span class="text-gray-500 text-sm">Outline Color</span>
-												<RgbInput
-													value={captionSettings.outlineColor || "#000000"}
-													onChange={(value) =>
-														updateCaptionSetting("outlineColor", value)
-													}
-												/>
-											</div>
-										</Show>
-									</div>
-								</Field>
+								</div>
 
-								{/* Export Options */}
-								<Field name="Export Options" icon={<IconCapMessageBubble />}>
-									<Subfield name="Export with Subtitles">
+								<div class="flex flex-col gap-2">
+									<span class="text-gray-500 text-sm">Size</span>
+									<Slider
+										value={[getSetting("size")]}
+										onChange={(v) => updateCaptionSetting("size", v[0])}
+										minValue={12}
+										maxValue={100}
+										step={1}
+										disabled={!hasCaptions()}
+									/>
+								</div>
+
+								<div class="flex flex-col gap-2">
+									<span class="text-gray-500 text-sm">Font Color</span>
+									<RgbInput
+										value={getSetting("color")}
+										onChange={(value) => updateCaptionSetting("color", value)}
+									/>
+								</div>
+							</div>
+						</Field>
+
+						<Field name="Background Settings" icon={<IconCapMessageBubble />}>
+							<div class="space-y-3">
+								<div class="flex flex-col gap-2">
+									<span class="text-gray-500 text-sm">Background Color</span>
+									<RgbInput
+										value={getSetting("backgroundColor")}
+										onChange={(value) =>
+											updateCaptionSetting("backgroundColor", value)
+										}
+									/>
+								</div>
+
+								<div class="flex flex-col gap-2">
+									<span class="text-gray-500 text-sm">Background Opacity</span>
+									<Slider
+										value={[getSetting("backgroundOpacity")]}
+										onChange={(v) =>
+											updateCaptionSetting("backgroundOpacity", v[0])
+										}
+										minValue={0}
+										maxValue={100}
+										step={1}
+										disabled={!hasCaptions()}
+									/>
+								</div>
+							</div>
+						</Field>
+
+						<Field name="Position" icon={<IconCapMessageBubble />}>
+							<KSelect<string>
+								options={POSITION_OPTIONS.map((p) => p.value)}
+								value={getSetting("position")}
+								onChange={(value) => {
+									if (value === null) return;
+									updateCaptionSetting("position", value);
+								}}
+								disabled={!hasCaptions()}
+								itemComponent={(props) => (
+									<MenuItem<typeof KSelect.Item>
+										as={KSelect.Item}
+										item={props.item}
+									>
+										<KSelect.ItemLabel class="flex-1">
+											{
+												POSITION_OPTIONS.find(
+													(p) => p.value === props.item.rawValue,
+												)?.label
+											}
+										</KSelect.ItemLabel>
+									</MenuItem>
+								)}
+							>
+								<KSelect.Trigger class="w-full flex items-center justify-between rounded-lg shadow px-3 py-2 bg-white border border-gray-300">
+									<KSelect.Value<string>>
+										{(state) => (
+											<span>
+												{
+													POSITION_OPTIONS.find(
+														(p) => p.value === state.selectedOption(),
+													)?.label
+												}
+											</span>
+										)}
+									</KSelect.Value>
+									<KSelect.Icon>
+										<IconCapChevronDown />
+									</KSelect.Icon>
+								</KSelect.Trigger>
+								<KSelect.Portal>
+									<PopperContent<typeof KSelect.Content>
+										as={KSelect.Content}
+										class={topLeftAnimateClasses}
+									>
+										<MenuItemList<typeof KSelect.Listbox>
+											as={KSelect.Listbox}
+										/>
+									</PopperContent>
+								</KSelect.Portal>
+							</KSelect>
+						</Field>
+
+						<Field name="Animation" icon={<IconCapMessageBubble />}>
+							<div class="space-y-3">
+								<div class="flex flex-col gap-2">
+									<span class="text-gray-500 text-sm">Highlight Color</span>
+									<RgbInput
+										value={getSetting("highlightColor")}
+										onChange={(value) =>
+											updateCaptionSetting("highlightColor", value)
+										}
+									/>
+								</div>
+								<div class="flex flex-col gap-2">
+									<span class="text-gray-500 text-sm">Fade Duration</span>
+									<Slider
+										value={[getSetting("fadeDuration") * 100]}
+										onChange={(v) =>
+											updateCaptionSetting("fadeDuration", v[0] / 100)
+										}
+										minValue={0}
+										maxValue={50}
+										step={1}
+										disabled={!hasCaptions()}
+									/>
+									<span class="text-xs text-gray-400 text-right">
+										{(getSetting("fadeDuration") * 1000).toFixed(0)}ms
+									</span>
+								</div>
+							</div>
+						</Field>
+
+						<Field name="Style Options" icon={<IconCapMessageBubble />}>
+							<div class="space-y-3">
+								<div class="flex flex-col gap-4">
+									<Subfield name="Outline">
 										<Toggle
-											checked={captionSettings.exportWithSubtitles}
+											checked={getSetting("outline")}
 											onChange={(checked) =>
-												updateCaptionSetting("exportWithSubtitles", checked)
+												updateCaptionSetting("outline", checked)
 											}
+											disabled={!hasCaptions()}
 										/>
 									</Subfield>
-								</Field>
+								</div>
+
+								<Show when={getSetting("outline")}>
+									<div class="flex flex-col gap-2">
+										<span class="text-gray-500 text-sm">Outline Color</span>
+										<RgbInput
+											value={getSetting("outlineColor")}
+											onChange={(value) =>
+												updateCaptionSetting("outlineColor", value)
+											}
+										/>
+									</div>
+								</Show>
+							</div>
+						</Field>
+
+						<Field name="Export Options" icon={<IconCapMessageBubble />}>
+							<Subfield name="Export with Subtitles">
+								<Toggle
+									checked={getSetting("exportWithSubtitles")}
+									onChange={(checked) =>
+										updateCaptionSetting("exportWithSubtitles", checked)
+									}
+									disabled={!hasCaptions()}
+								/>
+							</Subfield>
+						</Field>
+					</div>
 
-								{/* Caption Segments Section */}
-								<Show when={project.captions?.segments.length}>
-									<Field
-										name="Caption Segments"
-										icon={<IconCapMessageBubble />}
+					<Show when={hasCaptions()}>
+						<Field name="Caption Segments" icon={<IconCapMessageBubble />}>
+							<div class="space-y-4">
+								<div class="flex items-center justify-between">
+									<Button
+										onClick={() => addSegment(editorState.playbackTime)}
+										class="w-full"
 									>
-										<div class="space-y-4">
-											<div class="flex items-center justify-between">
-												<Button
-													onClick={() => addSegment(editorState.playbackTime)}
-													class="w-full"
-												>
-													Add at Current Time
-												</Button>
-											</div>
+										Add at Current Time
+									</Button>
+								</div>
 
-											<div class="max-h-[300px] overflow-y-auto space-y-3 pr-2">
-												{project.captions?.segments.length === 0 ? (
-													<p class="text-sm text-gray-500">
-														No caption segments found.
-													</p>
-												) : (
-													project.captions?.segments.map((segment) => (
-														<div class="bg-gray-50 dark:bg-gray-100 border border-gray-200 rounded-lg p-4 space-y-4">
-															<div class="flex flex-col space-y-4">
-																<div class="flex space-x-4">
-																	<div class="flex-1">
-																		<label class="text-xs text-gray-400 dark:text-gray-500">
-																			Start Time
-																		</label>
-																		<Input
-																			type="number"
-																			class="w-full"
-																			value={segment.start.toFixed(1)}
-																			step="0.1"
-																			min={0}
-																			onChange={(e) =>
-																				updateSegment(segment.id, {
-																					start: parseFloat(e.target.value),
-																				})
-																			}
-																		/>
-																	</div>
-																	<div class="flex-1">
-																		<label class="text-xs text-gray-400 dark:text-gray-500">
-																			End Time
-																		</label>
-																		<Input
-																			type="number"
-																			class="w-full"
-																			value={segment.end.toFixed(1)}
-																			step="0.1"
-																			min={segment.start}
-																			onChange={(e) =>
-																				updateSegment(segment.id, {
-																					end: parseFloat(e.target.value),
-																				})
-																			}
-																		/>
-																	</div>
-																</div>
-
-																<div class="space-y-2">
-																	<label class="text-xs text-gray-400 dark:text-gray-500">
-																		Caption Text
-																	</label>
-																	<div class="w-full px-3 py-2 bg-white dark:bg-gray-50 border border-gray-200 rounded-lg text-sm focus-within:border-blue-500 focus-within:ring-1 focus-within:ring-blue-500 transition-colors">
-																		<textarea
-																			class="w-full resize-none outline-none bg-transparent text-[--text-primary]"
-																			value={segment.text}
-																			rows={2}
-																			onChange={(e) =>
-																				updateSegment(segment.id, {
-																					text: e.target.value,
-																				})
-																			}
-																		/>
-																	</div>
-																</div>
-
-																<div class="flex justify-end">
-																	<Button
-																		variant="destructive"
-																		size="sm"
-																		onClick={() => deleteSegment(segment.id)}
-																		class="text-gray-50 dark:text-gray-500 inline-flex items-center gap-1.5"
-																	>
-																		<IconDelete />
-																		Delete
-																	</Button>
-																</div>
-															</div>
+								<div class="max-h-[300px] overflow-y-auto space-y-3 pr-2">
+									<For each={project.captions?.segments}>
+										{(segment) => (
+											<div class="bg-gray-50 dark:bg-gray-100 border border-gray-200 rounded-lg p-4 space-y-4">
+												<div class="flex flex-col space-y-4">
+													<div class="flex space-x-4">
+														<div class="flex-1">
+															<label class="text-xs text-gray-400 dark:text-gray-500">
+																Start Time
+															</label>
+															<Input
+																type="number"
+																class="w-full"
+																value={segment.start.toFixed(1)}
+																step="0.1"
+																min={0}
+																onChange={(e) =>
+																	updateSegment(segment.id, {
+																		start: parseFloat(e.target.value),
+																	})
+																}
+															/>
 														</div>
-													))
-												)}
+														<div class="flex-1">
+															<label class="text-xs text-gray-400 dark:text-gray-500">
+																End Time
+															</label>
+															<Input
+																type="number"
+																class="w-full"
+																value={segment.end.toFixed(1)}
+																step="0.1"
+																min={segment.start}
+																onChange={(e) =>
+																	updateSegment(segment.id, {
+																		end: parseFloat(e.target.value),
+																	})
+																}
+															/>
+														</div>
+													</div>
+
+													<div class="space-y-2">
+														<label class="text-xs text-gray-400 dark:text-gray-500">
+															Caption Text
+														</label>
+														<div class="w-full px-3 py-2 bg-white dark:bg-gray-50 border border-gray-200 rounded-lg text-sm focus-within:border-blue-500 focus-within:ring-1 focus-within:ring-blue-500 transition-colors">
+															<textarea
+																class="w-full resize-none outline-none bg-transparent text-[--text-primary]"
+																value={segment.text}
+																rows={2}
+																onChange={(e) =>
+																	updateSegment(segment.id, {
+																		text: e.target.value,
+																	})
+																}
+															/>
+														</div>
+													</div>
+
+													<div class="flex justify-end">
+														<Button
+															variant="destructive"
+															size="sm"
+															onClick={() => deleteSegment(segment.id)}
+															class="text-gray-50 dark:text-gray-500 inline-flex items-center gap-1.5"
+														>
+															<IconDelete />
+															Delete
+														</Button>
+													</div>
+												</div>
 											</div>
-										</div>
-									</Field>
-								</Show>
+										)}
+									</For>
+								</div>
 							</div>
-						</Show>
-					</div>
-				</Field>
+						</Field>
+					</Show>
+				</div>
 			</div>
-		</div>
+		</Field>
 	);
 }
 
diff --git a/apps/desktop/src/routes/editor/ConfigSidebar.tsx b/apps/desktop/src/routes/editor/ConfigSidebar.tsx
index be5deaee77..e2e24332f5 100644
--- a/apps/desktop/src/routes/editor/ConfigSidebar.tsx
+++ b/apps/desktop/src/routes/editor/ConfigSidebar.tsx
@@ -52,13 +52,22 @@ import {
 	type TimelineSegment,
 	type ZoomSegment,
 } from "~/utils/tauri";
+import IconLucideBoxSelect from "~icons/lucide/box-select";
+import IconLucideGauge from "~icons/lucide/gauge";
+import IconLucideGrid from "~icons/lucide/grid";
 import IconLucideMonitor from "~icons/lucide/monitor";
+import IconLucideMoon from "~icons/lucide/moon";
+import IconLucideMove from "~icons/lucide/move";
+import IconLucidePalette from "~icons/lucide/palette";
 import IconLucideRabbit from "~icons/lucide/rabbit";
+import IconLucideScan from "~icons/lucide/scan";
 import IconLucideSparkles from "~icons/lucide/sparkles";
 import IconLucideTimer from "~icons/lucide/timer";
+import IconLucideType from "~icons/lucide/type";
 import IconLucideWind from "~icons/lucide/wind";
 import { CaptionsTab } from "./CaptionsTab";
 import { type CornerRoundingType, useEditorContext } from "./context";
+import { evaluateMask, type MaskKind, type MaskSegment } from "./masks";
 import {
 	DEFAULT_GRADIENT_FROM,
 	DEFAULT_GRADIENT_TO,
@@ -66,6 +75,7 @@ import {
 } from "./projectConfig";
 import ShadowSettings from "./ShadowSettings";
 import { TextInput } from "./TextInput";
+import type { TextSegment } from "./text";
 import {
 	ComingSoonTooltip,
 	EditorButton,
@@ -377,7 +387,7 @@ export function ConfigSidebar() {
 								meta().type === "multiple" && (meta() as any).segments[0].cursor
 							),
 						},
-						window.FLAGS.captions && {
+						{
 							id: "captions" as const,
 							icon: IconCapMessageBubble,
 						},
@@ -783,6 +793,140 @@ export function ConfigSidebar() {
 				<Show when={editorState.timeline.selection}>
 					{(selection) => (
 						<Suspense>
+							<Show
+								when={(() => {
+									const textSelection = selection();
+									if (textSelection.type !== "text") return;
+
+									const segments = textSelection.indices
+										.map((index) => ({
+											index,
+											segment: project.timeline?.textSegments?.[index],
+										}))
+										.filter(
+											(item): item is { index: number; segment: TextSegment } =>
+												item.segment !== undefined,
+										);
+
+									if (segments.length === 0) {
+										setEditorState("timeline", "selection", null);
+										return;
+									}
+									return { selection: textSelection, segments };
+								})()}
+							>
+								{(value) => (
+									<div class="space-y-4">
+										<div class="flex flex-row justify-between items-center">
+											<div class="flex gap-2 items-center">
+												<EditorButton
+													onClick={() =>
+														setEditorState("timeline", "selection", null)
+													}
+													leftIcon={<IconLucideCheck />}
+												>
+													Done
+												</EditorButton>
+												<span class="text-sm text-gray-10">
+													{value().segments.length} text{" "}
+													{value().segments.length === 1
+														? "segment"
+														: "segments"}{" "}
+													selected
+												</span>
+											</div>
+											<EditorButton
+												variant="danger"
+												onClick={() =>
+													projectActions.deleteTextSegments(
+														value().segments.map((s) => s.index),
+													)
+												}
+												leftIcon={<IconCapTrash />}
+											>
+												Delete
+											</EditorButton>
+										</div>
+										<For each={value().segments}>
+											{(item) => (
+												<div class="p-4 rounded-lg border border-gray-200">
+													<TextSegmentConfig
+														segment={item.segment}
+														segmentIndex={item.index}
+													/>
+												</div>
+											)}
+										</For>
+									</div>
+								)}
+							</Show>
+							<Show
+								when={(() => {
+									const maskSelection = selection();
+									if (maskSelection.type !== "mask") return;
+
+									const segments = maskSelection.indices
+										.map((index) => ({
+											index,
+											segment: project.timeline?.maskSegments?.[index],
+										}))
+										.filter(
+											(item): item is { index: number; segment: MaskSegment } =>
+												item.segment !== undefined,
+										);
+
+									if (segments.length === 0) {
+										setEditorState("timeline", "selection", null);
+										return;
+									}
+									return { selection: maskSelection, segments };
+								})()}
+							>
+								{(value) => (
+									<div class="space-y-4">
+										<div class="flex flex-row justify-between items-center">
+											<div class="flex gap-2 items-center">
+												<EditorButton
+													onClick={() =>
+														setEditorState("timeline", "selection", null)
+													}
+													leftIcon={<IconLucideCheck />}
+												>
+													Done
+												</EditorButton>
+												<span class="text-sm text-gray-10">
+													{value().segments.length} mask{" "}
+													{value().segments.length === 1
+														? "segment"
+														: "segments"}{" "}
+													selected
+												</span>
+											</div>
+											<EditorButton
+												variant="danger"
+												onClick={() =>
+													projectActions.deleteMaskSegments(
+														value().segments.map((s) => s.index),
+													)
+												}
+												leftIcon={<IconCapTrash />}
+											>
+												Delete
+											</EditorButton>
+										</div>
+										<For each={value().segments}>
+											{(item) => (
+												<div class="p-4 rounded-lg border border-gray-200">
+													<MaskSegmentConfig
+														segment={item.segment}
+														segmentIndex={item.index}
+													/>
+												</div>
+											)}
+										</For>
+									</div>
+								)}
+							</Show>
 							<Show
 								when={(() => {
 									const zoomSelection = selection();
@@ -2306,6 +2450,478 @@ function CornerStyleSelect(props: {
 	);
 }
 
+const TEXT_FONT_OPTIONS = [
+	{ value: "sans-serif", label: "Sans" },
+	{ value: "serif", label: "Serif" },
+	{ value: "monospace", label: "Monospace" },
+	{ value: "Inter", label: "Inter" },
+	{ value: "Geist Sans", label: "Geist Sans" },
+];
+
+const normalizeHexInput = (value: string, fallback: string) => {
+	const trimmed = value.trim();
+	const withHash = trimmed.startsWith("#") ? trimmed : `#${trimmed}`;
+	const shortMatch = /^#[0-9A-Fa-f]{3}$/.test(withHash);
+	if (shortMatch) {
+		const [, r, g, b] = withHash;
+		return `#${r}${r}${g}${g}${b}${b}`.toLowerCase();
+	}
+	const fullMatch = /^#[0-9A-Fa-f]{6}$/.test(withHash);
+	if (fullMatch) return withHash.toLowerCase();
+	return fallback;
+};
+
+function HexColorInput(props: {
+	value: string;
+	onChange: (value: string) => void;
+}) {
+	const [text, setText] = createWritableMemo(() => props.value);
+	let prevColor = props.value;
+	let colorInput: HTMLInputElement | undefined;
+
+	return (
+		<div class="flex items-center gap-3">
+			<button
+				type="button"
+				class="w-10 h-10 rounded-md border border-gray-4"
+				style={{ "background-color": text() }}
+				onClick={() => {
+					const input = colorInput as
+						| (HTMLInputElement & { showPicker?: () => void })
+						| undefined;
+					input?.showPicker?.();
+					input?.click();
+				}}
+			/>
+			<input
+				ref={(el) => {
+					colorInput = el;
+				}}
+				type="color"
+				class="absolute w-0 h-0 opacity-0"
+				value={text()}
+				onInput={(e) => {
+					const next = e.currentTarget.value;
+					setText(next);
+					prevColor = next;
+					props.onChange(next);
+				}}
+			/>
+			<TextInput
+				class="flex-1 px-3 py-2 rounded-lg border border-gray-3 bg-gray-2 text-sm text-gray-12"
+				value={text()}
+				onFocus={() => {
+					prevColor = props.value;
+				}}
+				onInput={(e) => {
+					setText(e.currentTarget.value);
+				}}
+				onBlur={(e) => {
+					const next = normalizeHexInput(e.currentTarget.value, prevColor);
+					setText(next);
+					prevColor = next;
+					props.onChange(next);
+				}}
+			/>
+		</div>
+	);
+}
+
+function TextSegmentConfig(props: {
+	segmentIndex: number;
+	segment: TextSegment;
+}) {
+	const { setProject } = useEditorContext();
+	const clampNumber = (value: number, min: number, max: number) =>
+		Math.min(Math.max(Number.isFinite(value) ? value : min), max);
+	const textFontOptions = createMemo(() => {
+		const font = props.segment.fontFamily;
+		if (!font) return TEXT_FONT_OPTIONS;
+		const exists = TEXT_FONT_OPTIONS.some((option) => option.value === font);
+		return exists
+			? TEXT_FONT_OPTIONS
+			: [...TEXT_FONT_OPTIONS, { value: font, label: font }];
+	});
+
+	const selectedFont = createMemo(
+		() =>
+			textFontOptions().find(
+				(option) => option.value === props.segment.fontFamily,
+			) ?? textFontOptions()[0],
+	);
+
+	const updateSegment = (fn: (segment: TextSegment) => void) => {
+		setProject(
+			"timeline",
+			"textSegments",
+			produce((segments) => {
+				const target = segments?.[props.segmentIndex];
+				if (!target) return;
+				fn(target);
+			}),
+		);
+	};
+
+	return (
+		<div class="space-y-4">
+			<Field
+				name={`Text ${props.segmentIndex + 1}`}
+				icon={<IconLucideType class="size-4" />}
+			>
+				<div class="flex items-center gap-3">
+					<textarea
+						class="flex-1 px-3 py-2 rounded-lg border border-gray-3 bg-gray-2 text-gray-12 resize-none min-h-[80px]"
+						value={props.segment.content}
+						onInput={(e) =>
+							updateSegment((segment) => {
+								segment.content = e.currentTarget.value;
+							})
+						}
+					/>
+					<div class="flex flex-col items-center gap-2">
+						<span class="text-xs text-gray-11">Enabled</span>
+						<Toggle
+							checked={props.segment.enabled}
+							onChange={(value) =>
+								updateSegment((segment) => {
+									segment.enabled = value;
+								})
+							}
+						/>
+					</div>
+				</div>
+			</Field>
+			<Field name="Font" icon={<IconLucideType class="size-4" />}>
+				<div class="grid grid-cols-2 gap-3">
+					<KSelect<{ label: string; value: string }>
+						options={textFontOptions()}
+						optionValue="value"
+						optionTextValue="label"
+						value={selectedFont()}
+						onChange={(option) => {
+							if (option) {
+								updateSegment((segment) => {
+									segment.fontFamily = option.value;
+								});
+							}
+						}}
+						itemComponent={(selectProps) => (
+							<MenuItem<typeof KSelect.Item>
+								as={KSelect.Item}
+								item={selectProps.item}
+							>
+								<KSelect.ItemLabel class="flex-1">
+									{selectProps.item.rawValue.label}
+								</KSelect.ItemLabel>
+							</MenuItem>
+						)}
+					>
+						<KSelect.Trigger class="flex flex-row gap-2 items-center px-2 w-full h-9 rounded-lg transition-colors bg-gray-3 disabled:text-gray-11">
+							<KSelect.Value<{
+								label: string;
+								value: string;
+							}> class="flex-1 text-sm text-left truncate text-[--gray-500] font-normal">
+								{(state) => <span>{state.selectedOption().label}</span>}
+							</KSelect.Value>
+							<KSelect.Icon<ValidComponent>
+								as={(selectProps) => (
+									<IconCapChevronDown
+										{...selectProps}
+										class="size-4 shrink-0 transform transition-transform ui-expanded:rotate-180 text-[--gray-500]"
+									/>
+								)}
+							/>
+						</KSelect.Trigger>
+						<KSelect.Portal>
+							<PopperContent<typeof KSelect.Content>
+								as={KSelect.Content}
+								class={cx(topSlideAnimateClasses, "z-50")}
+							>
+								<MenuItemList<typeof KSelect.Listbox>
+									class="overflow-y-auto max-h-40"
+									as={KSelect.Listbox}
+								/>
+							</PopperContent>
+						</KSelect.Portal>
+					</KSelect>
+				</div>
+			</Field>
+			<Field name="Size" icon={<IconCapEnlarge class="size-4" />}>
+				<Slider
+					value={[clampNumber(props.segment.fontSize, 8, 200)]}
+					onChange={([value]) =>
+						updateSegment((segment) => {
+							segment.fontSize = clampNumber(value, 8, 200);
+						})
+					}
+					minValue={8}
+					maxValue={200}
+					step={1}
+				/>
+			</Field>
+			<Field name="Font Weight" icon={<IconLucideSparkles class="size-4" />}>
+				<div class="flex flex-col gap-2">
+					<KSelect
+						options={[
+							{ label: "Thin", value: 100 },
+							{ label: "Extra Light", value: 200 },
+							{ label: "Light", value: 300 },
+							{ label: "Normal", value: 400 },
+							{ label: "Medium", value: 500 },
+							{ label: "Semi Bold", value: 600 },
+							{ label: "Bold", value: 700 },
+							{ label: "Extra Bold", value: 800 },
+							{ label: "Black", value: 900 },
+						]}
+						optionValue="value"
+						optionTextValue="label"
+						value={{
+							label: "Custom",
+							value: props.segment.fontWeight,
+						}}
+						onChange={(value) => {
+							if (!value) return;
+							updateSegment((segment) => {
+								segment.fontWeight = value.value;
+							});
+						}}
+						itemComponent={(props) => (
+							<MenuItem<typeof KSelect.Item>
+								as={KSelect.Item}
+								item={props.item}
+							>
+								<KSelect.ItemLabel class="flex-1">
+									{props.item.rawValue.label}
+								</KSelect.ItemLabel>
+								<KSelect.ItemIndicator class="ml-auto text-blue-9">
+									<IconCapCircleCheck />
+								</KSelect.ItemIndicator>
+							</MenuItem>
+						)}
+					>
+						<KSelect.Trigger class="flex w-full items-center justify-between rounded-md border border-gray-3 bg-gray-2 px-3 py-2 text-sm text-gray-12 transition-colors hover:border-gray-4 hover:bg-gray-3 focus:border-blue-9 focus:outline-none focus:ring-1 focus:ring-blue-9">
+							<KSelect.Value<any> class="truncate">
+								{(state) => {
+									const selected = state.selectedOption();
+									if (selected) return selected.label;
+									// Find label for current weight
+									const weight = props.segment.fontWeight;
+									const option = [
+										{ label: "Thin", value: 100 },
+										{ label: "Extra Light", value: 200 },
+										{ label: "Light", value: 300 },
+										{ label: "Normal", value: 400 },
+										{ label: "Medium", value: 500 },
+										{ label: "Semi Bold", value: 600 },
+										{ label: "Bold", value: 700 },
+										{ label: "Extra Bold", value: 800 },
+										{ label: "Black", value: 900 },
+									].find((o) => o.value === weight);
+									return option ? option.label : weight.toString();
+								}}
+							</KSelect.Value>
+							<KSelect.Icon>
+								<IconCapChevronDown class="size-4 shrink-0 transform transition-transform ui-expanded:rotate-180 text-[--gray-500]" />
+							</KSelect.Icon>
+						</KSelect.Trigger>
+						<KSelect.Portal>
+							<PopperContent<typeof KSelect.Content>
+								as={KSelect.Content}
+								class={cx(topSlideAnimateClasses, "z-50")}
+							>
+								<MenuItemList<typeof KSelect.Listbox>
+									class="overflow-y-auto max-h-40"
+									as={KSelect.Listbox}
+								/>
+							</PopperContent>
+						</KSelect.Portal>
+					</KSelect>
+
+					<div class="flex items-center justify-between pt-1">
+						<span class="text-xs text-gray-11">Italic</span>
+						<Toggle
+							checked={props.segment.italic}
+							onChange={(value) =>
+								updateSegment((segment) => {
+									segment.italic = value;
+								})
+							}
+						/>
+					</div>
+				</div>
+			</Field>
+			<Field name="Color" icon={<IconLucidePalette class="size-4" />}>
+				<HexColorInput
+					value={props.segment.color}
+					onChange={(value) =>
+						updateSegment((segment) => {
+							segment.color = value;
+						})
+					}
+				/>
+			</Field>
+		</div>
+	);
+}
+
+function MaskSegmentConfig(props: {
+	segmentIndex: number;
+	segment: MaskSegment;
+}) {
+	const { setProject, editorState } = useEditorContext();
+
+	const updateSegment = (fn: (segment: MaskSegment) => void) => {
+		setProject(
+			"timeline",
+			"maskSegments",
+			produce((segments) => {
+				const target = segments?.[props.segmentIndex];
+				if (!target) return;
+				target.keyframes ??= { position: [], size: [], intensity: [] };
+				fn(target);
+			}),
+		);
+	};
+
+	createEffect(() => {
+		const keyframes = props.segment.keyframes;
+		if (
+			!keyframes ||
+			(keyframes.position.length === 0 &&
+				keyframes.size.length === 0 &&
+				keyframes.intensity.length === 0)
+		)
+			return;
+		updateSegment((segment) => {
+			segment.keyframes = { position: [], size: [], intensity: [] };
+		});
+	});
+
+	const currentAbsoluteTime = () =>
+		editorState.previewTime ?? editorState.playbackTime ?? props.segment.start;
+	const maskState = () => evaluateMask(props.segment, currentAbsoluteTime());
+
+	const clearKeyframes = (segment: MaskSegment) => {
+		segment.keyframes.position = [];
+		segment.keyframes.size = [];
+		segment.keyframes.intensity = [];
+	};
+
+	const setPosition = (value: { x: number; y: number }) =>
+		updateSegment((segment) => {
+			segment.center = value;
+			clearKeyframes(segment);
+		});
+
+	const setSize = (value: { x: number; y: number }) =>
+		updateSegment((segment) => {
+			segment.size = value;
+			clearKeyframes(segment);
+		});
+
+	const setIntensity = (value: number) =>
+		updateSegment((segment) => {
+			segment.opacity = value;
+			clearKeyframes(segment);
+		});
+
+	return (
+		<div class="space-y-4">
+			<Field
+				name={`Mask ${props.segmentIndex + 1}`}
+				icon={<IconLucideBoxSelect class="size-4" />}
+			>
+				<div class="flex items-center justify-between gap-4">
+					<RadioGroup
+						class="grid grid-cols-2 gap-2"
+						value={props.segment.maskType}
+						onChange={(value) =>
+							updateSegment((segment) => {
+								segment.maskType = value as MaskKind;
+								if (segment.maskType === "highlight") {
+									segment.feather = 0;
+									segment.opacity = 1;
+								} else {
+									segment.feather = 0.1;
+								}
+							})
+						}
+					>
+						{[
+							{ value: "sensitive", label: "Sensitive" },
+							{ value: "highlight", label: "Highlight" },
+						].map((option) => (
+							<RadioGroup.Item
+								value={option.value}
+								class="rounded-lg border border-gray-3 transition-colors ui-checked:border-blue-8 ui-checked:bg-blue-3/40"
+							>
+								<RadioGroup.ItemInput class="sr-only" />
+								<RadioGroup.ItemLabel class="flex cursor-pointer items-center gap-2 p-2 text-sm text-gray-12">
+									<RadioGroup.ItemControl class="size-4 rounded-full border border-gray-7 ui-checked:border-blue-9 ui-checked:bg-blue-9" />
+									{option.label}
+								</RadioGroup.ItemLabel>
+							</RadioGroup.Item>
+						))}
+					</RadioGroup>
+					<div class="flex items-center gap-2">
+						<span class="text-xs text-gray-11">Enabled</span>
+						<Toggle
+							checked={props.segment.enabled}
+							onChange={(value) =>
+								updateSegment((segment) => {
+									segment.enabled = value;
+								})
+							}
+						/>
+					</div>
+				</div>
+			</Field>
+			<Show when={props.segment.maskType === "sensitive"}>
+				<Field name="Intensity" icon={<IconLucideGauge class="size-4" />}>
+					<Slider
+						value={[props.segment.opacity]}
+						onChange={([v]) => setIntensity(v)}
+						minValue={0}
+						maxValue={1}
+						step={0.01}
+						formatTooltip="%"
+					/>
+				</Field>
+			</Show>
+			<Show when={props.segment.maskType === "sensitive"}>
+				<Field name="Pixelation" icon={<IconLucideGrid class="size-4" />}>
+					<Slider
+						value={[props.segment.pixelation]}
+						onChange={([v]) =>
+							updateSegment((segment) => {
+								segment.pixelation = v;
+							})
+						}
+						minValue={1}
+						maxValue={80}
+						step={1}
+					/>
+				</Field>
+			</Show>
+			<Show when={props.segment.maskType === "highlight"}>
+				<Field name="Outside Darkness" icon={<IconLucideMoon class="size-4" />}>
+					<Slider
+						value={[props.segment.darkness]}
+						onChange={([v]) =>
+							updateSegment((segment) => {
+								segment.darkness = v;
+							})
+						}
+						minValue={0}
+						maxValue={1}
+						step={0.01}
+					/>
+				</Field>
+			</Show>
+		</div>
+	);
+}
+
 function ZoomSegmentPreview(props: {
 	segmentIndex: number;
 	segment: ZoomSegment;
diff --git a/apps/desktop/src/routes/editor/Editor.tsx b/apps/desktop/src/routes/editor/Editor.tsx
index 5ef0f84d5b..c5ea0e97ca 100644
--- a/apps/desktop/src/routes/editor/Editor.tsx
+++ b/apps/desktop/src/routes/editor/Editor.tsx
@@ -1,5 +1,6 @@
 import { Button } from "@cap/ui-solid";
 import { NumberField } from "@kobalte/core/number-field";
+import { createElementBounds } from "@solid-primitives/bounds";
 import { trackDeep } from "@solid-primitives/deep";
 import { debounce, throttle } from "@solid-primitives/scheduled";
 import { makePersisted } from "@solid-primitives/storage";
@@ -13,7 +14,6 @@ import {
 	createSignal,
 	Match,
 	on,
-	onCleanup,
 	Show,
 	Switch,
 } from "solid-js";
@@ -30,22 +30,28 @@ import {
 import { Toggle } from "~/components/Toggle";
 import { composeEventHandlers } from "~/utils/composeEventHandlers";
 import { createTauriEventListener } from "~/utils/createEventListener";
-import { events } from "~/utils/tauri";
+import { commands, events } from "~/utils/tauri";
 import { ConfigSidebar } from "./ConfigSidebar";
 import {
 	EditorContextProvider,
 	EditorInstanceContextProvider,
 	FPS,
-	OUTPUT_SIZE,
+	serializeProjectConfiguration,
 	useEditorContext,
 	useEditorInstanceContext,
 } from "./context";
 import { ExportDialog } from "./ExportDialog";
 import { Header } from "./Header";
-import { Player } from "./Player";
+import { PlayerContent } from "./Player";
 import { Timeline } from "./Timeline";
 import { Dialog, DialogContent, EditorButton, Input, Subfield } from "./ui";
 
+const DEFAULT_TIMELINE_HEIGHT = 260;
+const MIN_PLAYER_CONTENT_HEIGHT = 320;
+const MIN_TIMELINE_HEIGHT = 240;
+const RESIZE_HANDLE_HEIGHT = 8;
+const MIN_PLAYER_HEIGHT = MIN_PLAYER_CONTENT_HEIGHT + RESIZE_HANDLE_HEIGHT;
+
 export function Editor() {
 	return (
 		<EditorInstanceContextProvider>
@@ -83,52 +89,85 @@ export function Editor() {
 }
 
 function Inner() {
-	const { project, editorState, setEditorState } = useEditorContext();
+	const { project, editorState, setEditorState, previewResolutionBase } =
+		useEditorContext();
+
+	const [layoutRef, setLayoutRef] = createSignal<HTMLDivElement>();
+	const layoutBounds = createElementBounds(layoutRef);
+	const [storedTimelineHeight, setStoredTimelineHeight] = makePersisted(
+		createSignal(DEFAULT_TIMELINE_HEIGHT),
+		{ name: "editorTimelineHeight" },
+	);
+	const [isResizingTimeline, setIsResizingTimeline] = createSignal(false);
+
+	const clampTimelineHeight = (value: number) => {
+		const available = layoutBounds.height ?? 0;
+		const maxHeight =
+			available > 0
+				? Math.max(MIN_TIMELINE_HEIGHT, available - MIN_PLAYER_HEIGHT)
+				: Number.POSITIVE_INFINITY;
+		const upperBound = Number.isFinite(maxHeight)
+			? maxHeight
+			: Math.max(value, MIN_TIMELINE_HEIGHT);
+		return Math.min(Math.max(value, MIN_TIMELINE_HEIGHT), upperBound);
+	};
+
+	const timelineHeight = createMemo(() =>
+		Math.round(clampTimelineHeight(storedTimelineHeight())),
+	);
+
+	const handleTimelineResizeStart = (event: MouseEvent) => {
+		if (event.button !== 0) return;
+		event.preventDefault();
+		const startY = event.clientY;
+		const startHeight = timelineHeight();
+		setIsResizingTimeline(true);
+
+		const handleMove = (moveEvent: MouseEvent) => {
+			const delta = moveEvent.clientY - startY;
+			setStoredTimelineHeight(clampTimelineHeight(startHeight - delta));
+		};
+
+		const handleUp = () => {
+			setIsResizingTimeline(false);
+			window.removeEventListener("mousemove", handleMove);
+			window.removeEventListener("mouseup", handleUp);
+		};
+
+		window.addEventListener("mousemove", handleMove);
+		window.addEventListener("mouseup", handleUp);
+	};
+
+	createEffect(() => {
+		const available = layoutBounds.height;
+		if (!available) return;
+		setStoredTimelineHeight((height) => clampTimelineHeight(height));
+	});
 
 	createTauriEventListener(events.editorStateChanged, (payload) => {
-		renderFrameThrottled.clear();
+		throttledRenderFrame.clear();
+		trailingRenderFrame.clear();
 		setEditorState("playbackTime", payload.playhead_position / FPS);
 	});
 
-	let rafId: number | null = null;
-	let pendingFrameTime: number | null = null;
-
-	const emitFrame = (time: number) => {
-		events.renderFrameEvent.emit({
-			frame_number: Math.max(Math.floor(time * FPS), 0),
-			fps: FPS,
-			resolution_base: OUTPUT_SIZE,
-		});
+	const emitRenderFrame = (time: number) => {
+		if (!editorState.playing) {
+			events.renderFrameEvent.emit({
+				frame_number: Math.max(Math.floor(time * FPS), 0),
+				fps: FPS,
+				resolution_base: previewResolutionBase(),
+			});
+		}
 	};
 
-	const renderFrameThrottled = throttle((time: number) => {
-		if (editorState.playing) return;
+	const throttledRenderFrame = throttle(emitRenderFrame, 1000 / FPS);
 
-		if (rafId !== null) {
-			pendingFrameTime = time;
-			return;
-		}
+	const trailingRenderFrame = debounce(emitRenderFrame, 1000 / FPS + 16);
 
-		rafId = requestAnimationFrame(() => {
-			rafId = null;
-			const frameTime = pendingFrameTime ?? time;
-			pendingFrameTime = null;
-			emitFrame(frameTime);
-		});
-	}, 1000 / 30);
-
-	const renderFrameDebounced = debounce((time: number) => {
-		if (editorState.playing) return;
-		emitFrame(time);
-	}, 50);
-
-	onCleanup(() => {
-		if (rafId !== null) {
-			cancelAnimationFrame(rafId);
-		}
-		renderFrameThrottled.clear();
-		renderFrameDebounced.clear();
-	});
+	const renderFrame = (time: number) => {
+		throttledRenderFrame(time);
+		trailingRenderFrame(time);
+	};
 
 	const frameNumberToRender = createMemo(() => {
 		const preview = editorState.previewTime;
@@ -137,24 +176,26 @@ function Inner() {
 	});
 
 	createEffect(
-		on(frameNumberToRender, (number) => {
-			if (editorState.playing) return;
-			renderFrameThrottled(number);
-		}),
+		on(
+			() => [frameNumberToRender(), previewResolutionBase()],
+			([number]) => {
+				if (editorState.playing) return;
+				renderFrame(number as number);
+			},
+		),
 	);
 
-	let lastProjectUpdateTime = 0;
+	const updateConfigAndRender = throttle(async (time: number) => {
+		const config = serializeProjectConfiguration(project);
+		await commands.updateProjectConfigInMemory(config);
+		renderFrame(time);
+	}, 1000 / FPS);
+
 	createEffect(
 		on(
 			() => trackDeep(project),
 			() => {
-				const now = performance.now();
-				if (now - lastProjectUpdateTime < 100) {
-					renderFrameDebounced(editorState.playbackTime);
-				} else {
-					renderFrameThrottled(editorState.playbackTime);
-				}
-				lastProjectUpdateTime = now;
+				updateConfigAndRender(editorState.playbackTime);
 			},
 		),
 	);
@@ -166,12 +207,46 @@ function Inner() {
 				class="flex overflow-y-hidden flex-col flex-1 gap-2 pb-4 w-full min-h-0 leading-5 animate-in fade-in"
 				data-tauri-drag-region
 			>
-				<div class="flex overflow-hidden flex-col flex-1 min-h-0">
-					<div class="flex overflow-y-hidden flex-row flex-1 min-h-0 gap-2 px-2 pb-0.5">
-						<Player />
+				<div
+					ref={setLayoutRef}
+					class="flex overflow-hidden flex-col flex-1 min-h-0"
+				>
+					<div
+						class="flex overflow-y-hidden flex-row flex-1 min-h-0 gap-2 px-2"
+						style={{
+							"min-height": `${MIN_PLAYER_HEIGHT}px`,
+						}}
+					>
+						<div class="flex flex-col flex-1 rounded-xl border bg-gray-1 dark:bg-gray-2 border-gray-3 overflow-hidden">
+							<PlayerContent />
+							<div
+								role="separator"
+								aria-orientation="horizontal"
+								class="flex-none transition-colors hover:bg-gray-3/30"
+								style={{ height: `${RESIZE_HANDLE_HEIGHT}px` }}
+							>
+								<div
+									class="flex justify-center items-center h-full cursor-row-resize select-none group"
+									classList={{ "bg-gray-3/50": isResizingTimeline() }}
+									onMouseDown={handleTimelineResizeStart}
+								>
+									<div
+										class="h-1 w-12 rounded-full bg-gray-4 transition-colors group-hover:bg-gray-6"
+										classList={{ "bg-gray-7": isResizingTimeline() }}
+									/>
+								</div>
+							</div>
+						</div>
 						<ConfigSidebar />
 					</div>
-					<Timeline />
+					<div
+						class="flex-none min-h-0 px-2 pb-0.5 overflow-hidden relative"
+						style={{ height: `${timelineHeight()}px` }}
+					>
+						<div class="h-full">
+							<Timeline />
+						</div>
+					</div>
 				</div>
 				<Dialogs />
 			</div>
diff --git a/apps/desktop/src/routes/editor/ExportDialog.tsx b/apps/desktop/src/routes/editor/ExportDialog.tsx
index 354b5aefac..ba88a91f1c 100644
--- a/apps/desktop/src/routes/editor/ExportDialog.tsx
+++ b/apps/desktop/src/routes/editor/ExportDialog.tsx
@@ -32,7 +32,7 @@ import Tooltip from "~/components/Tooltip";
 import { authStore } from "~/store";
 import { trackEvent } from "~/utils/analytics";
 import { createSignInMutation } from "~/utils/auth";
-import { exportVideo } from "~/utils/export";
+import { createExportTask } from "~/utils/export";
 import { createOrganizationsQuery } from "~/utils/queries";
 import {
 	commands,
@@ -124,6 +124,8 @@ export function ExportDialog() {
 		refetchMeta,
 	} = useEditorContext();
 
+	const projectPath = editorInstance.path;
+
 	const auth = authStore.createQuery();
 	const organisations = createOrganizationsQuery();
 
@@ -137,6 +139,11 @@ export function ExportDialog() {
 		);
 	};
 
+	const isCancellationError = (error: unknown) =>
+		error instanceof SilentError ||
+		error === "Export cancelled" ||
+		(error instanceof Error && error.message === "Export cancelled");
+
 	const [_settings, setSettings] = makePersisted(
 		createStore<Settings>({
 			format: "Mp4",
@@ -169,8 +176,12 @@ export function ExportDialog() {
 		return ret;
 	});
 
-	const exportWithSettings = (onProgress: (progress: FramesRendered) => void) =>
-		exportVideo(
+	let cancelCurrentExport: (() => void) | null = null;
+
+	const exportWithSettings = (
+		onProgress: (progress: FramesRendered) => void,
+	) => {
+		const { promise, cancel } = createExportTask(
 			projectPath,
 			settings.format === "Mp4"
 				? {
@@ -193,6 +204,11 @@ export function ExportDialog() {
 					},
 			onProgress,
 		);
+		cancelCurrentExport = cancel;
+		return promise.finally(() => {
+			if (cancelCurrentExport === cancel) cancelCurrentExport = null;
+		});
+	};
 
 	const [outputPath, setOutputPath] = createSignal<string | null>(null);
 	const [isCancelled, setIsCancelled] = createSignal(false);
@@ -205,6 +221,8 @@ export function ExportDialog() {
 			})
 		) {
 			setIsCancelled(true);
+			cancelCurrentExport?.();
+			cancelCurrentExport = null;
 			setExportState({ type: "idle" });
 			const path = outputPath();
 			if (path) {
@@ -217,8 +235,6 @@ export function ExportDialog() {
 		}
 	};
 
-	const projectPath = editorInstance.path;
-
 	const exportEstimates = createQuery(() => ({
 		// prevents flicker when modifying settings
 		placeholderData: keepPreviousData,
@@ -260,6 +276,10 @@ export function ExportDialog() {
 			await commands.copyVideoToClipboard(outputPath);
 		},
 		onError: (error) => {
+			if (isCancelled() || isCancellationError(error)) {
+				setExportState(reconcile({ type: "idle" }));
+				return;
+			}
 			commands.globalMessageDialog(
 				error instanceof Error ? error.message : "Failed to copy recording",
 			);
@@ -333,6 +353,10 @@ export function ExportDialog() {
 			setExportState({ type: "done" });
 		},
 		onError: (error) => {
+			if (isCancelled() || isCancellationError(error)) {
+				setExportState({ type: "idle" });
+				return;
+			}
 			commands.globalMessageDialog(
 				error instanceof Error
 					? error.message
@@ -446,6 +470,10 @@ export function ExportDialog() {
 			setExportState({ type: "done" });
 		},
 		onError: (error) => {
+			if (isCancelled() || isCancellationError(error)) {
+				setExportState(reconcile({ type: "idle" }));
+				return;
+			}
 			console.error(error);
 			if (!(error instanceof SilentError)) {
 				commands.globalMessageDialog(
@@ -896,7 +924,7 @@ export function ExportDialog() {
 																	variant="ghost"
 																	size="sm"
 																	onClick={handleCancel}
-																	class="mt-4 hover:bg-red-9 hover:text-white"
+																	class="mt-4 hover:bg-red-500 hover:text-white"
 																>
 																	Cancel
 																</Button>
@@ -948,7 +976,7 @@ export function ExportDialog() {
 																				variant="ghost"
 																				size="sm"
 																				onClick={handleCancel}
-																				class="mt-4 hover:bg-red-9 hover:text-white"
+																				class="mt-4 hover:bg-red-500 hover:text-white"
 																			>
 																				Cancel
 																			</Button>
@@ -1030,7 +1058,7 @@ export function ExportDialog() {
 																					variant="ghost"
 																					size="sm"
 																					onClick={handleCancel}
-																					class="mt-4 hover:bg-red-9 hover:text-white"
+																					class="mt-4 hover:bg-red-500 hover:text-white"
 																				>
 																					Cancel
 																				</Button>
@@ -1059,90 +1087,101 @@ export function ExportDialog() {
 									</Switch>
 								</div>
 							</Dialog.Content>
-							<Dialog.Footer>
-								<Show
-									when={
-										exportState.action === "upload" &&
-										exportState.type === "done"
-									}
-								>
-									<div class="relative">
-										<a
-											href={meta().sharing?.link}
-											target="_blank"
-											rel="noreferrer"
-											class="block"
-										>
+							<Show
+								when={
+									exportState.type === "done" &&
+									(exportState.action === "save" ||
+										exportState.action === "upload")
+								}
+							>
+								<Dialog.Footer>
+									<Show
+										when={
+											exportState.action === "upload" &&
+											exportState.type === "done"
+										}
+									>
+										<div class="relative">
+											<a
+												href={meta().sharing?.link}
+												target="_blank"
+												rel="noreferrer"
+												class="block"
+											>
+												<Button
+													onClick={() => {
+														setCopyPressed(true);
+														setTimeout(() => {
+															setCopyPressed(false);
+														}, 2000);
+														navigator.clipboard.writeText(
+															meta().sharing?.link!,
+														);
+													}}
+													variant="dark"
+													class="flex gap-2 justify-center items-center"
+												>
+													{!copyPressed() ? (
+														<IconCapCopy class="transition-colors duration-200 text-gray-1 size-4 group-hover:text-gray-12" />
+													) : (
+														<IconLucideCheck class="transition-colors duration-200 text-gray-1 size-4 svgpathanimation group-hover:text-gray-12" />
+													)}
+													<p>Open Link</p>
+												</Button>
+											</a>
+										</div>
+									</Show>
+
+									<Show
+										when={
+											exportState.action === "save" &&
+											exportState.type === "done"
+										}
+									>
+										<div class="flex gap-4 w-full">
 											<Button
+												variant="dark"
+												class="flex gap-2 items-center"
 												onClick={() => {
-													setCopyPressed(true);
-													setTimeout(() => {
-														setCopyPressed(false);
-													}, 2000);
-													navigator.clipboard.writeText(meta().sharing?.link!);
+													const path = outputPath();
+													if (path) {
+														commands.openFilePath(path);
+													}
 												}}
+											>
+												<IconCapFile class="size-4" />
+												Open File
+											</Button>
+											<Button
 												variant="dark"
-												class="flex gap-2 justify-center items-center"
+												class="flex gap-2 items-center"
+												onClick={async () => {
+													const path = outputPath();
+													if (path) {
+														setClipboardCopyPressed(true);
+														setTimeout(() => {
+															setClipboardCopyPressed(false);
+														}, 2000);
+														await commands.copyVideoToClipboard(path);
+														toast.success(
+															`${
+																settings.format === "Gif" ? "GIF" : "Video"
+															} copied to clipboard`,
+														);
+													}
+												}}
 											>
-												{!copyPressed() ? (
-													<IconCapCopy class="transition-colors duration-200 text-gray-1 size-4 group-hover:text-gray-12" />
+												{!clipboardCopyPressed() ? (
+													<IconCapCopy class="size-4" />
 												) : (
-													<IconLucideCheck class="transition-colors duration-200 text-gray-1 size-4 svgpathanimation group-hover:text-gray-12" />
+													<IconLucideCheck class="size-4 svgpathanimation" />
 												)}
-												<p>Open Link</p>
+												Copy to Clipboard
 											</Button>
-										</a>
-									</div>
-								</Show>
-
-								<Show
-									when={
-										exportState.action === "save" && exportState.type === "done"
-									}
-								>
-									<div class="flex gap-4 w-full">
-										<Button
-											variant="dark"
-											class="flex gap-2 items-center"
-											onClick={() => {
-												const path = outputPath();
-												if (path) {
-													commands.openFilePath(path);
-												}
-											}}
-										>
-											<IconCapFile class="size-4" />
-											Open File
-										</Button>
-										<Button
-											variant="dark"
-											class="flex gap-2 items-center"
-											onClick={async () => {
-												const path = outputPath();
-												if (path) {
-													setClipboardCopyPressed(true);
-													setTimeout(() => {
-														setClipboardCopyPressed(false);
-													}, 2000);
-													await commands.copyVideoToClipboard(path);
-													toast.success(
-														`${
-															settings.format === "Gif" ? "GIF" : "Video"
-														} copied to clipboard`,
-													);
-												}
-											}}
-										>
-											{!clipboardCopyPressed() ? (
-												<IconCapCopy class="size-4" />
-											) : (
-												<IconLucideCheck class="size-4 svgpathanimation" />
-											)}
-											Copy to Clipboard
-										</Button>
-									</div>
-								</Show>
-							</Dialog.Footer>
+										</div>
+									</Show>
+								</Dialog.Footer>
+							</Show>
 						</>
 					);
 				}}
diff --git a/apps/desktop/src/routes/editor/MaskOverlay.tsx b/apps/desktop/src/routes/editor/MaskOverlay.tsx
new file mode 100644
index 0000000000..0bb5068639
--- /dev/null
+++ b/apps/desktop/src/routes/editor/MaskOverlay.tsx
@@ -0,0 +1,214 @@
+import { createEventListenerMap } from "@solid-primitives/event-listener";
+import { cx } from "cva";
+import { createMemo, createRoot, Show } from "solid-js";
+import { produce } from "solid-js/store";
+
+import { useEditorContext } from "./context";
+import { evaluateMask, type MaskSegment } from "./masks";
+
+type MaskOverlayProps = {
+	size: { width: number; height: number };
+};
+
+export function MaskOverlay(props: MaskOverlayProps) {
+	const { project, setProject, editorState, projectHistory } =
+		useEditorContext();
+
+	const selectedMask = createMemo(() => {
+		const selection = editorState.timeline.selection;
+		if (!selection || selection.type !== "mask") return;
+		const index = selection.indices[0];
+		const segment = project.timeline?.maskSegments?.[index];
+		if (!segment) return;
+		return { index, segment };
+	});
+
+	const currentAbsoluteTime = () =>
+		editorState.previewTime ?? editorState.playbackTime ?? 0;
+
+	const maskState = createMemo(() => {
+		const selected = selectedMask();
+		if (!selected) return;
+		return evaluateMask(selected.segment, currentAbsoluteTime());
+	});
+
+	const updateSegment = (fn: (segment: MaskSegment) => void) => {
+		const index = selectedMask()?.index;
+		if (index === undefined) return;
+		setProject(
+			"timeline",
+			"maskSegments",
+			index,
+			produce((segment) => {
+				segment.keyframes ??= { position: [], size: [], intensity: [] };
+				segment.keyframes.position = [];
+				segment.keyframes.size = [];
+				fn(segment);
+			}),
+		);
+	};
+
+	function createMouseDownDrag<T>(
+		setup: () => T,
+		update: (
+			e: MouseEvent,
+			value: T,
+			initialMouse: { x: number; y: number },
+		) => void,
+	) {
+		return (downEvent: MouseEvent) => {
+			downEvent.preventDefault();
+			downEvent.stopPropagation();
+
+			const initial = setup();
+			const initialMouse = { x: downEvent.clientX, y: downEvent.clientY };
+			const resumeHistory = projectHistory.pause();
+
+			function handleUpdate(event: MouseEvent) {
+				update(event, initial, initialMouse);
+			}
+
+			function finish() {
+				resumeHistory();
+				dispose();
+			}
+
+			const dispose = createRoot((dispose) => {
+				createEventListenerMap(window, {
+					mousemove: handleUpdate,
+					mouseup: () => {
+						finish();
+					},
+				});
+				return dispose;
+			});
+		};
+	}
+
+	return (
+		<Show when={selectedMask() && maskState()}>
+			{() => {
+				const state = () => maskState()!;
+				const rect = () => {
+					const width = state().size.x * props.size.width;
+					const height = state().size.y * props.size.height;
+					const left = state().position.x * props.size.width - width / 2;
+					const top = state().position.y * props.size.height - height / 2;
+					return { width, height, left, top };
+				};
+
+				const onMove = createMouseDownDrag(
+					() => ({
+						startPos: { ...state().position },
+					}),
+					(e, { startPos }, initialMouse) => {
+						const dx = (e.clientX - initialMouse.x) / props.size.width;
+						const dy = (e.clientY - initialMouse.y) / props.size.height;
+
+						updateSegment((s) => {
+							s.center.x = Math.max(0, Math.min(1, startPos.x + dx));
+							s.center.y = Math.max(0, Math.min(1, startPos.y + dy));
+						});
+					},
+				);
+
+				const createResizeHandler = (dirX: -1 | 0 | 1, dirY: -1 | 0 | 1) => {
+					return createMouseDownDrag(
+						() => ({
+							startPos: { ...state().position },
+							startSize: { ...state().size },
+						}),
+						(e, { startPos, startSize }, initialMouse) => {
+							const dx = (e.clientX - initialMouse.x) / props.size.width;
+							const dy = (e.clientY - initialMouse.y) / props.size.height;
+
+							updateSegment((s) => {
+								if (dirX !== 0) {
+									const newWidth = Math.max(0.01, startSize.x + dx * dirX);
+									s.size.x = newWidth;
+									s.center.x = startPos.x + dx / 2;
+								}
+
+								if (dirY !== 0) {
+									const newHeight = Math.max(0.01, startSize.y + dy * dirY);
+									s.size.y = newHeight;
+									s.center.y = startPos.y + dy / 2;
+								}
+							});
+						},
+					);
+				};
+
+				return (
+					<div class="absolute inset-0 pointer-events-none">
+						<div
+							class="absolute pointer-events-auto group"
+							style={{
+								left: `${rect().left}px`,
+								top: `${rect().top}px`,
+								width: `${rect().width}px`,
+								height: `${rect().height}px`,
+							}}
+							onMouseDown={onMove}
+						>
+							{/* Border/Highlight */}
+							<div class="absolute inset-0 rounded-md border-2 border-blue-9 bg-blue-9/10 cursor-move" />
+
+							{/* Handles */}
+							{/* Corners */}
+							<ResizeHandle
+								class="top-0 left-0 -translate-x-1/2 -translate-y-1/2 cursor-nw-resize"
+								onMouseDown={createResizeHandler(-1, -1)}
+							/>
+							<ResizeHandle
+								class="top-0 right-0 translate-x-1/2 -translate-y-1/2 cursor-ne-resize"
+								onMouseDown={createResizeHandler(1, -1)}
+							/>
+							<ResizeHandle
+								class="bottom-0 left-0 -translate-x-1/2 translate-y-1/2 cursor-sw-resize"
+								onMouseDown={createResizeHandler(-1, 1)}
+							/>
+							<ResizeHandle
+								class="bottom-0 right-0 translate-x-1/2 translate-y-1/2 cursor-se-resize"
+								onMouseDown={createResizeHandler(1, 1)}
+							/>
+
+							{/* Sides */}
+							<ResizeHandle
+								class="top-0 left-1/2 -translate-x-1/2 -translate-y-1/2 cursor-n-resize"
+								onMouseDown={createResizeHandler(0, -1)}
+							/>
+							<ResizeHandle
+								class="bottom-0 left-1/2 -translate-x-1/2 translate-y-1/2 cursor-s-resize"
+								onMouseDown={createResizeHandler(0, 1)}
+							/>
+							<ResizeHandle
+								class="left-0 top-1/2 -translate-x-1/2 -translate-y-1/2 cursor-w-resize"
+								onMouseDown={createResizeHandler(-1, 0)}
+							/>
+							<ResizeHandle
+								class="right-0 top-1/2 translate-x-1/2 -translate-y-1/2 cursor-e-resize"
+								onMouseDown={createResizeHandler(1, 0)}
+							/>
+						</div>
+					</div>
+				);
+			}}
+		</Show>
+	);
+}
+
+function ResizeHandle(props: {
+	class?: string;
+	onMouseDown: (e: MouseEvent) => void;
+}) {
+	return (
+		<div
+			class={cx(
+				"absolute w-3 h-3 bg-blue-9 border border-white rounded-full shadow-sm transition-transform hover:scale-125",
+				props.class,
+			)}
+			onMouseDown={props.onMouseDown}
+		/>
+	);
+}
diff --git a/apps/desktop/src/routes/editor/Player.tsx b/apps/desktop/src/routes/editor/Player.tsx
index 9b0245d12d..f8fc1fc71e 100644
--- a/apps/desktop/src/routes/editor/Player.tsx
+++ b/apps/desktop/src/routes/editor/Player.tsx
@@ -1,3 +1,4 @@
+import { Select as KSelect } from "@kobalte/core/select";
 import { ToggleButton as KToggleButton } from "@kobalte/core/toggle-button";
 import { createElementBounds } from "@solid-primitives/bounds";
 import { cx } from "cva";
@@ -9,15 +10,24 @@ import { commands } from "~/utils/tauri";
 import AspectRatioSelect from "./AspectRatioSelect";
 import {
 	FPS,
-	OUTPUT_SIZE,
+	type PreviewQuality,
 	serializeProjectConfiguration,
 	useEditorContext,
 } from "./context";
-import { EditorButton, Slider } from "./ui";
+import { MaskOverlay } from "./MaskOverlay";
+import { TextOverlay } from "./TextOverlay";
+import {
+	EditorButton,
+	MenuItem,
+	MenuItemList,
+	PopperContent,
+	Slider,
+	topLeftAnimateClasses,
+} from "./ui";
 import { useEditorShortcuts } from "./useEditorShortcuts";
 import { formatTime } from "./utils";
 
-export function Player() {
+export function PlayerContent() {
 	const {
 		project,
 		editorInstance,
@@ -27,8 +37,16 @@ export function Player() {
 		setEditorState,
 		zoomOutLimit,
 		setProject,
+		previewResolutionBase,
+		previewQuality,
+		setPreviewQuality,
 	} = useEditorContext();
 
+	const previewOptions = [
+		{ label: "Full", value: "full" as PreviewQuality },
+		{ label: "Half", value: "half" as PreviewQuality },
+	];
+
 	// Load captions on mount
 	onMount(async () => {
 		if (editorInstance && editorInstance.path) {
@@ -66,6 +84,8 @@ export function Player() {
 							outlineColor: captionsStore.state.settings.outlineColor,
 							exportWithSubtitles:
 								captionsStore.state.settings.exportWithSubtitles,
+							highlightColor: captionsStore.state.settings.highlightColor,
+							fadeDuration: captionsStore.state.settings.fadeDuration,
 						},
 					};
 
@@ -95,10 +115,6 @@ export function Player() {
 		}
 	});
 
-	const [canvasContainerRef, setCanvasContainerRef] =
-		createSignal<HTMLDivElement>();
-	const containerBounds = createElementBounds(canvasContainerRef);
-
 	const isAtEnd = () => {
 		const total = totalDuration();
 		return total > 0 && total - editorState.playbackTime <= 0.1;
@@ -123,6 +139,31 @@ export function Player() {
 		setEditorState("playing", false);
 	};
 
+	const handlePreviewQualityChange = async (quality: PreviewQuality) => {
+		if (quality === previewQuality()) return;
+
+		const wasPlaying = editorState.playing;
+		const currentFrame = Math.max(
+			Math.floor(editorState.playbackTime * FPS),
+			0,
+		);
+
+		setPreviewQuality(quality);
+
+		if (!wasPlaying) return;
+
+		try {
+			await commands.stopPlayback();
+			setEditorState("playing", false);
+			await commands.seekTo(currentFrame);
+			await commands.startPlayback(FPS, previewResolutionBase());
+			setEditorState("playing", true);
+		} catch (error) {
+			console.error("Failed to update preview quality:", error);
+			setEditorState("playing", false);
+		}
+	};
+
 	createEffect(() => {
 		if (isAtEnd() && editorState.playing) {
 			commands.stopPlayback();
@@ -136,7 +177,7 @@ export function Player() {
 				await commands.stopPlayback();
 				setEditorState("playbackTime", 0);
 				await commands.seekTo(0);
-				await commands.startPlayback(FPS, OUTPUT_SIZE);
+				await commands.startPlayback(FPS, previewResolutionBase());
 				setEditorState("playing", true);
 			} else if (editorState.playing) {
 				await commands.stopPlayback();
@@ -144,7 +185,7 @@ export function Player() {
 			} else {
 				// Ensure we seek to the current playback time before starting playback
 				await commands.seekTo(Math.floor(editorState.playbackTime * FPS));
-				await commands.startPlayback(FPS, OUTPUT_SIZE);
+				await commands.startPlayback(FPS, previewResolutionBase());
 				setEditorState("playing", true);
 			}
 			if (editorState.playing) setEditorState("previewTime", null);
@@ -208,16 +249,74 @@ export function Player() {
 	]);
 
 	return (
-		<div class="flex flex-col flex-1 rounded-xl border bg-gray-1 dark:bg-gray-2 border-gray-3">
-			<div class="flex gap-3 justify-center p-3">
-				<AspectRatioSelect />
-				<EditorButton
-					tooltipText="Crop Video"
-					onClick={cropDialogHandler}
-					leftIcon={<IconCapCrop class="w-5 text-gray-12" />}
-				>
-					Crop
-				</EditorButton>
+		<div class="flex flex-col flex-1 min-h-0">
+			<div class="flex items-center justify-between gap-3 p-3">
+				<div class="flex items-center gap-3">
+					<AspectRatioSelect />
+					<EditorButton
+						tooltipText="Crop Video"
+						onClick={cropDialogHandler}
+						leftIcon={<IconCapCrop class="w-5 text-gray-12" />}
+					>
+						Crop
+					</EditorButton>
+				</div>
+				<div class="flex items-center gap-2">
+					<span class="text-xs font-medium text-gray-11">Preview</span>
+					<KSelect<{ label: string; value: PreviewQuality }>
+						options={previewOptions}
+						optionValue="value"
+						optionTextValue="label"
+						value={previewOptions.find(
+							(option) => option.value === previewQuality(),
+						)}
+						onChange={(next) => {
+							if (next) handlePreviewQualityChange(next.value);
+						}}
+						disallowEmptySelection
+						itemComponent={(props) => (
+							<MenuItem<typeof KSelect.Item>
+								as={KSelect.Item}
+								item={props.item}
+							>
+								<KSelect.ItemLabel class="flex-1">
+									{props.item.rawValue.label}
+								</KSelect.ItemLabel>
+								<KSelect.ItemIndicator class="ml-auto text-blue-9">
+									<IconCapCircleCheck />
+								</KSelect.ItemIndicator>
+							</MenuItem>
+						)}
+					>
+						<KSelect.Trigger class="flex items-center gap-2 h-9 px-3 rounded-lg border border-gray-3 bg-gray-2 dark:bg-gray-3 text-sm text-gray-12">
+							<KSelect.Value<{ label: string; value: PreviewQuality }>
+								class="flex-1 text-left truncate"
+								placeholder="Select preview quality"
+							>
+								{(state) =>
+									state.selectedOption()?.label ?? "Select preview quality"
+								}
+							</KSelect.Value>
+							<KSelect.Icon>
+								<IconCapChevronDown class="size-4 text-gray-11" />
+							</KSelect.Icon>
+						</KSelect.Trigger>
+						<KSelect.Portal>
+							<PopperContent<typeof KSelect.Content>
+								as={KSelect.Content}
+								class={cx(topLeftAnimateClasses, "w-44")}
+							>
+								<MenuItem as="div" class="text-gray-11" data-disabled="true">
+									Select preview quality
+								</MenuItem>
+								<MenuItemList<typeof KSelect.Listbox>
+									as={KSelect.Listbox}
+									class="max-h-40"
+								/>
+							</PopperContent>
+						</KSelect.Portal>
+					</KSelect>
+				</div>
 			</div>
 			<PreviewCanvas />
 			<div class="flex overflow-hidden z-10 flex-row gap-3 justify-between items-center p-5">
@@ -423,18 +522,28 @@ function PreviewCanvas() {
 
 					return (
 						<div class="flex overflow-hidden absolute inset-0 justify-center items-center h-full">
-							<canvas
+							<div
+								class="relative"
 								style={{
 									width: `${size().width}px`,
 									height: `${size().height}px`,
-									...gridStyle,
 								}}
-								class="rounded"
-								ref={canvasRef}
-								id="canvas"
-								width={frameWidth()}
-								height={frameHeight()}
-							/>
+							>
+								<canvas
+									style={{
+										width: `${size().width}px`,
+										height: `${size().height}px`,
+										...gridStyle,
+									}}
+									class="rounded"
+									ref={canvasRef}
+									id="canvas"
+									width={frameWidth()}
+									height={frameHeight()}
+								/>
+								<MaskOverlay size={size()} />
+								<TextOverlay size={size()} />
+							</div>
 						</div>
 					);
 				}}
diff --git a/apps/desktop/src/routes/editor/TextOverlay.tsx b/apps/desktop/src/routes/editor/TextOverlay.tsx
new file mode 100644
index 0000000000..d0560003c3
--- /dev/null
+++ b/apps/desktop/src/routes/editor/TextOverlay.tsx
@@ -0,0 +1,372 @@
+import { createEventListenerMap } from "@solid-primitives/event-listener";
+import { cx } from "cva";
+import { createEffect, createMemo, createRoot, on, Show } from "solid-js";
+import { produce } from "solid-js/store";
+
+import { useEditorContext } from "./context";
+import type { TextSegment } from "./text";
+
+type TextOverlayProps = {
+	size: { width: number; height: number };
+};
+
+export function TextOverlay(props: TextOverlayProps) {
+	const { project, setProject, editorState, projectHistory } =
+		useEditorContext();
+
+	const selectedText = createMemo(() => {
+		const selection = editorState.timeline.selection;
+		if (!selection || selection.type !== "text") return;
+		const index = selection.indices[0];
+		const segment = project.timeline?.textSegments?.[index];
+		if (!segment) return;
+		return { index, segment };
+	});
+
+	const clamp = (value: number, min: number, max: number) =>
+		Math.min(Math.max(value, min), max);
+
+	const updateSegment = (fn: (segment: TextSegment) => void) => {
+		const index = selectedText()?.index;
+		if (index === undefined) return;
+		setProject(
+			"timeline",
+			"textSegments",
+			index,
+			produce((segment) => {
+				if (!segment) return;
+				fn(segment);
+			}),
+		);
+	};
+
+	function createMouseDownDrag<T>(
+		setup: () => T,
+		update: (
+			e: MouseEvent,
+			value: T,
+			initialMouse: { x: number; y: number },
+		) => void,
+	) {
+		return (downEvent: MouseEvent) => {
+			downEvent.preventDefault();
+			downEvent.stopPropagation();
+
+			const initial = setup();
+			const initialMouse = { x: downEvent.clientX, y: downEvent.clientY };
+			const resumeHistory = projectHistory.pause();
+
+			function handleUpdate(event: MouseEvent) {
+				update(event, initial, initialMouse);
+			}
+
+			function finish() {
+				resumeHistory();
+				dispose();
+			}
+
+			const dispose = createRoot((dispose) => {
+				createEventListenerMap(window, {
+					mousemove: handleUpdate,
+					mouseup: (event) => {
+						handleUpdate(event);
+						finish();
+					},
+				});
+				return dispose;
+			});
+		};
+	}
+
+	return (
+		<Show when={selectedText()}>
+			{(selected) => {
+				const segment = () => selected().segment;
+
+				// Measurement Logic
+				let hiddenMeasureRef: HTMLDivElement | undefined;
+
+				createEffect(
+					on(
+						() => ({
+							content: segment().content,
+							fontFamily: segment().fontFamily,
+							fontSize: segment().fontSize,
+							fontWeight: segment().fontWeight,
+							italic: segment().italic,
+							containerWidth: props.size.width,
+							containerHeight: props.size.height,
+						}),
+						(deps) => {
+							if (!hiddenMeasureRef) return;
+
+							const { width: naturalWidth, height: naturalHeight } =
+								hiddenMeasureRef.getBoundingClientRect();
+
+							if (
+								naturalWidth === 0 ||
+								naturalHeight === 0 ||
+								!deps.containerWidth ||
+								!deps.containerHeight
+							)
+								return;
+
+							// Normalize to [0-1]
+							const normalizedWidth = naturalWidth / deps.containerWidth;
+							const normalizedHeight = naturalHeight / deps.containerHeight;
+
+							const newFontSize = deps.fontSize;
+							const newSizeX = normalizedWidth;
+							const newSizeY = normalizedHeight;
+
+							// Logic simplified: Trust the measurement.
+
+							// Update if significant difference to avoid loops
+							const sizeXDiff = Math.abs(newSizeX - segment().size.x);
+							const sizeYDiff = Math.abs(newSizeY - segment().size.y);
+							// const fontDiff = Math.abs(newFontSize - segment().fontSize); // We aren't changing font size anymore
+
+							if (sizeXDiff > 0.001 || sizeYDiff > 0.001) {
+								updateSegment((s) => {
+									const oldHeight = s.size.y;
+									s.size.x = newSizeX;
+									s.size.y = newSizeY;
+									// s.fontSize = newFontSize; // Don't override font size
+
+									// Adjust Center Y to keep top anchor fixed (growing down)
+									// If height changes by diff, center moves by diff/2
+									const diff = newSizeY - oldHeight;
+									s.center.y += diff / 2;
+
+									// Frame constraints for center
+									const halfH = s.size.y / 2;
+									const halfW = s.size.x / 2;
+
+									if (s.center.y + halfH > 1) {
+										s.center.y -= s.center.y + halfH - 1;
+									}
+									if (s.center.y - halfH < 0) {
+										s.center.y += 0 - (s.center.y - halfH);
+									}
+									if (s.center.x + halfW > 1) {
+										s.center.x -= s.center.x + halfW - 1;
+									}
+									if (s.center.x - halfW < 0) {
+										s.center.x += 0 - (s.center.x - halfW);
+									}
+								});
+							}
+						},
+					),
+				);
+
+				const rect = () => {
+					const width = segment().size.x * props.size.width;
+					const height = segment().size.y * props.size.height;
+					const left = segment().center.x * props.size.width - width / 2;
+					const top = segment().center.y * props.size.height - height / 2;
+					return { width, height, left, top };
+				};
+
+				const onMove = createMouseDownDrag(
+					() => ({
+						startPos: { ...segment().center },
+						startSize: { ...segment().size },
+					}),
+					(e, { startPos, startSize }, initialMouse) => {
+						const dx = (e.clientX - initialMouse.x) / props.size.width;
+						const dy = (e.clientY - initialMouse.y) / props.size.height;
+
+						updateSegment((s) => {
+							const newX = startPos.x + dx;
+							const newY = startPos.y + dy;
+
+							// Constrain to frame
+							const halfW = startSize.x / 2;
+							const halfH = startSize.y / 2;
+
+							s.center.x = clamp(newX, halfW, 1 - halfW);
+							s.center.y = clamp(newY, halfH, 1 - halfH);
+						});
+					},
+				);
+
+				const createResizeHandler = (dirX: -1 | 0 | 1, dirY: -1 | 0 | 1) => {
+					return createMouseDownDrag(
+						() => ({
+							startPos: { ...segment().center },
+							startSize: { ...segment().size },
+							startFontSize: segment().fontSize,
+						}),
+						(e, { startPos, startSize, startFontSize }, initialMouse) => {
+							const dx = (e.clientX - initialMouse.x) / props.size.width;
+							const dy = (e.clientY - initialMouse.y) / props.size.height;
+
+							updateSegment((s) => {
+								// If Corner Drag -> Scale (change fontSize and Width)
+								// If Side Drag -> Change Width (reflow)
+
+								const isCorner = dirX !== 0 && dirY !== 0;
+								const isSide = dirX !== 0 && dirY === 0;
+
+								if (isSide) {
+									// Standard resize logic: updates width, keeps center relative or fixed
+									const targetWidth = startSize.x + dx * dirX;
+									const clampedWidth = clamp(targetWidth, 0.05, 1);
+									const appliedDelta = clampedWidth - startSize.x;
+
+									s.size.x = clampedWidth;
+									s.center.x = clamp(
+										startPos.x + (dirX * appliedDelta) / 2,
+										s.size.x / 2,
+										1 - s.size.x / 2,
+									);
+								} else if (isCorner) {
+									// Scale uniformly
+									const currentWidthPx = startSize.x * props.size.width;
+									const currentHeightPx = startSize.y * props.size.height;
+
+									const deltaPxX = dx * props.size.width * dirX;
+									const deltaPxY = dy * props.size.height * dirY;
+
+									const scaleY = (currentHeightPx + deltaPxY) / currentHeightPx;
+									const scale = scaleY;
+
+									if (scale > 0.1) {
+										s.fontSize = clamp(startFontSize * scale, 8, 400);
+										// Also scale width to maintain aspect ratio of the box
+										s.size.x = clamp(startSize.x * scale, 0.05, 1);
+
+										// Update center
+										const widthDiff = s.size.x - startSize.x;
+										const approxHeightDiff = startSize.y * scale - startSize.y;
+
+										s.center.x = clamp(
+											startPos.x + (widthDiff * dirX) / 2,
+											s.size.x / 2,
+											1 - s.size.x / 2,
+										);
+										s.center.y = clamp(
+											startPos.y + (approxHeightDiff * dirY) / 2,
+											s.size.y / 2, // Use calculated height for clamping? Approximation
+											1 - s.size.y / 2,
+										);
+									}
+								}
+							});
+						},
+					);
+				};
+
+				return (
+					<div class="absolute inset-0 pointer-events-none">
+						{/* Hidden Measurement Div */}
+						<div
+							ref={hiddenMeasureRef}
+							style={{
+								position: "absolute",
+								visibility: "hidden",
+								"white-space": "pre-wrap",
+								"word-break": "break-word",
+								"font-family": segment().fontFamily,
+								"font-size": `${segment().fontSize}px`,
+								"font-weight": segment().fontWeight,
+								"font-style": segment().italic ? "italic" : "normal",
+								"line-height": 1.2,
+								// Use fixed width if we want to support wrapping when large?
+								// Or max-width?
+								// If we allow wrapping, we should set max-width to container width.
+								"max-width": `${props.size.width}px`,
+								width: "fit-content",
+								height: "auto",
+								top: "0",
+								left: "0",
+							}}
+						>
+							{segment().content}
+							{/* Ensure height for empty lines if needed, though pre-wrap usually handles it */}
+							{segment().content.endsWith("\n") ? <br /> : null}
+						</div>
+
+						<div
+							class="absolute pointer-events-auto group"
+							style={{
+								left: `${rect().left}px`,
+								top: `${rect().top}px`,
+								width: `${rect().width}px`,
+								height: `${rect().height}px`,
+							}}
+							onMouseDown={onMove}
+						>
+							{/* Visual placeholder (not used for measurement anymore) */}
+							<div
+								style={{
+									"white-space": "pre-wrap",
+									"word-break": "break-word",
+									"font-family": segment().fontFamily,
+									"font-size": `${segment().fontSize}px`,
+									"font-weight": segment().fontWeight,
+									"font-style": segment().italic ? "italic" : "normal",
+									color: segment().color,
+									"line-height": 1.2,
+									width: "100%",
+									height: "100%",
+									display: "flex",
+									"align-items": "center",
+									"justify-content": "center",
+									"text-align": "center",
+									"pointer-events": "none",
+									opacity: 0,
+								}}
+								class="w-full h-full overflow-hidden"
+							>
+								{segment().content}
+							</div>
+
+							<div class="absolute inset-0 rounded-md border-2 border-blue-9 bg-blue-9/10 cursor-move" />
+							<ResizeHandle
+								class="top-0 left-0 -translate-x-1/2 -translate-y-1/2 cursor-nw-resize"
+								onMouseDown={createResizeHandler(-1, -1)}
+							/>
+							<ResizeHandle
+								class="top-0 right-0 translate-x-1/2 -translate-y-1/2 cursor-ne-resize"
+								onMouseDown={createResizeHandler(1, -1)}
+							/>
+							<ResizeHandle
+								class="bottom-0 left-0 -translate-x-1/2 translate-y-1/2 cursor-sw-resize"
+								onMouseDown={createResizeHandler(-1, 1)}
+							/>
+							<ResizeHandle
+								class="bottom-0 right-0 translate-x-1/2 translate-y-1/2 cursor-se-resize"
+								onMouseDown={createResizeHandler(1, 1)}
+							/>
+							<ResizeHandle
+								class="left-0 top-1/2 -translate-x-1/2 -translate-y-1/2 cursor-w-resize"
+								onMouseDown={createResizeHandler(-1, 0)}
+							/>
+							<ResizeHandle
+								class="right-0 top-1/2 translate-x-1/2 -translate-y-1/2 cursor-e-resize"
+								onMouseDown={createResizeHandler(1, 0)}
+							/>
+						</div>
+					</div>
+				);
+			}}
+		</Show>
+	);
+}
+
+function ResizeHandle(props: {
+	class?: string;
+	onMouseDown: (e: MouseEvent) => void;
+}) {
+	return (
+		<div
+			class={cx(
+				"absolute w-3 h-3 bg-blue-9 border border-white rounded-full shadow-sm transition-transform hover:scale-125",
+				props.class,
+			)}
+			onMouseDown={props.onMouseDown}
+		/>
+	);
+}
diff --git a/apps/desktop/src/routes/editor/Timeline/ClipTrack.tsx b/apps/desktop/src/routes/editor/Timeline/ClipTrack.tsx
index 4ffbc601f5..5bb449c864 100644
--- a/apps/desktop/src/routes/editor/Timeline/ClipTrack.tsx
+++ b/apps/desktop/src/routes/editor/Timeline/ClipTrack.tsx
@@ -409,9 +409,7 @@ export function ClipTrack(
 								class={cx(
 									"border transition-colors duration-200 group hover:border-gray-12",
 									"bg-gradient-to-r from-[#2675DB] via-[#4FA0FF] to-[#2675DB] shadow-[inset_0_5px_10px_5px_rgba(255,255,255,0.2)]",
-									isSelected()
-										? "wobble-wrapper border-gray-12"
-										: "border-transparent",
+									isSelected() ? "border-gray-12" : "border-transparent",
 								)}
 								innerClass="ring-blue-9"
 								segment={relativeSegment()}
diff --git a/apps/desktop/src/routes/editor/Timeline/MaskTrack.tsx b/apps/desktop/src/routes/editor/Timeline/MaskTrack.tsx
new file mode 100644
index 0000000000..303b43b30c
--- /dev/null
+++ b/apps/desktop/src/routes/editor/Timeline/MaskTrack.tsx
@@ -0,0 +1,375 @@
+import { createEventListenerMap } from "@solid-primitives/event-listener";
+import { cx } from "cva";
+import { createMemo, createRoot, For } from "solid-js";
+import { produce } from "solid-js/store";
+
+import { useEditorContext } from "../context";
+import { defaultMaskSegment } from "../masks";
+import { useTimelineContext } from "./context";
+import { SegmentContent, SegmentHandle, SegmentRoot, TrackRoot } from "./Track";
+
+export type MaskSegmentDragState =
+	| { type: "idle" }
+	| { type: "movePending" }
+	| { type: "moving" };
+
+const MIN_SEGMENT_SECS = 1;
+const MIN_SEGMENT_PIXELS = 80;
+
+export function MaskTrack(props: {
+	onDragStateChanged: (v: MaskSegmentDragState) => void;
+	handleUpdatePlayhead: (e: MouseEvent) => void;
+}) {
+	const {
+		project,
+		setProject,
+		editorState,
+		setEditorState,
+		totalDuration,
+		projectHistory,
+		projectActions,
+	} = useEditorContext();
+	const { secsPerPixel, timelineBounds } = useTimelineContext();
+
+	const minDuration = () =>
+		Math.max(MIN_SEGMENT_SECS, secsPerPixel() * MIN_SEGMENT_PIXELS);
+
+	const maskSegments = () => project.timeline?.maskSegments ?? [];
+
+	const neighborBounds = (index: number) => {
+		const segments = maskSegments();
+		return {
+			prevEnd: segments[index - 1]?.end ?? 0,
+			nextStart: segments[index + 1]?.start ?? totalDuration(),
+		};
+	};
+
+	const findPlacement = (time: number, length: number) => {
+		const gaps: Array<{ start: number; end: number }> = [];
+		const sorted = maskSegments()
+			.slice()
+			.sort((a, b) => a.start - b.start);
+
+		let cursor = 0;
+		for (const segment of sorted) {
+			if (segment.start - cursor >= length) {
+				gaps.push({ start: cursor, end: segment.start });
+			}
+			cursor = Math.max(cursor, segment.end);
+		}
+
+		if (totalDuration() - cursor >= length) {
+			gaps.push({ start: cursor, end: totalDuration() });
+		}
+
+		if (gaps.length === 0) return null;
+
+		const maxStart = Math.max(totalDuration() - length, 0);
+		const desiredStart = Math.min(Math.max(time - length / 2, 0), maxStart);
+
+		const containingGap =
+			gaps.find(
+				(gap) => desiredStart >= gap.start && desiredStart + length <= gap.end,
+			) ??
+			gaps.find((gap) => gap.start >= desiredStart) ??
+			gaps[gaps.length - 1];
+
+		const start = Math.min(
+			Math.max(desiredStart, containingGap.start),
+			containingGap.end - length,
+		);
+
+		return { start, end: start + length };
+	};
+
+	const addSegmentAt = (time: number) => {
+		const length = Math.min(minDuration(), totalDuration());
+		if (length <= 0) return;
+
+		const placement = findPlacement(time, length);
+		if (!placement) return;
+
+		setProject(
+			"timeline",
+			"maskSegments",
+			produce((segments) => {
+				segments ??= [];
+				segments.push(defaultMaskSegment(placement.start, placement.end));
+				segments.sort((a, b) => a.start - b.start);
+			}),
+		);
+	};
+
+	const handleBackgroundMouseDown = (e: MouseEvent) => {
+		if (e.button !== 0) return;
+		if ((e.target as HTMLElement).closest("[data-mask-segment]")) return;
+		const timelineTime =
+			editorState.previewTime ??
+			editorState.playbackTime ??
+			secsPerPixel() * (e.clientX - (timelineBounds.left ?? 0));
+		addSegmentAt(timelineTime);
+	};
+
+	function createMouseDownDrag<T>(
+		segmentIndex: () => number,
+		setup: () => T,
+		update: (e: MouseEvent, value: T, initialMouseX: number) => void,
+	) {
+		return (downEvent: MouseEvent) => {
+			if (editorState.timeline.interactMode !== "seek") return;
+			downEvent.stopPropagation();
+			const initial = setup();
+			let moved = false;
+			let initialMouseX: number | null = null;
+
+			const resumeHistory = projectHistory.pause();
+			props.onDragStateChanged({ type: "movePending" });
+
+			function finish(e: MouseEvent) {
+				resumeHistory();
+				if (!moved) {
+					e.stopPropagation();
+					const currentSelection = editorState.timeline.selection;
+					const index = segmentIndex();
+					const isMultiSelect = e.ctrlKey || e.metaKey;
+					const isRangeSelect = e.shiftKey;
+
+					if (isRangeSelect && currentSelection?.type === "mask") {
+						const existingIndices = currentSelection.indices;
+						const lastIndex = existingIndices[existingIndices.length - 1];
+						const start = Math.min(lastIndex, index);
+						const end = Math.max(lastIndex, index);
+						const rangeIndices: number[] = [];
+						for (let idx = start; idx <= end; idx++) rangeIndices.push(idx);
+						setEditorState("timeline", "selection", {
+							type: "mask",
+							indices: rangeIndices,
+						});
+					} else if (isMultiSelect) {
+						if (currentSelection?.type === "mask") {
+							const base = currentSelection.indices;
+							const exists = base.includes(index);
+							const next = exists
+								? base.filter((i) => i !== index)
+								: [...base, index];
+							setEditorState(
+								"timeline",
+								"selection",
+								next.length > 0
+									? {
+											type: "mask",
+											indices: next,
+										}
+									: null,
+							);
+						} else {
+							setEditorState("timeline", "selection", {
+								type: "mask",
+								indices: [index],
+							});
+						}
+					} else {
+						setEditorState("timeline", "selection", {
+							type: "mask",
+							indices: [index],
+						});
+					}
+					props.handleUpdatePlayhead(e);
+				}
+				props.onDragStateChanged({ type: "idle" });
+			}
+
+			function handleUpdate(event: MouseEvent) {
+				if (Math.abs(event.clientX - downEvent.clientX) > 2) {
+					if (!moved) {
+						moved = true;
+						initialMouseX = event.clientX;
+						props.onDragStateChanged({ type: "moving" });
+					}
+				}
+
+				if (initialMouseX === null) return;
+				update(event, initial, initialMouseX);
+			}
+
+			createRoot((dispose) => {
+				createEventListenerMap(window, {
+					mousemove: (e) => handleUpdate(e),
+					mouseup: (e) => {
+						handleUpdate(e);
+						finish(e);
+						dispose();
+					},
+				});
+			});
+		};
+	}
+
+	return (
+		<TrackRoot
+			onMouseEnter={() => setEditorState("timeline", "hoveredTrack", "mask")}
+			onMouseLeave={() => setEditorState("timeline", "hoveredTrack", null)}
+			onMouseDown={handleBackgroundMouseDown}
+		>
+			<For
+				each={maskSegments()}
+				fallback={
+					<div class="text-center text-sm text-[--text-tertiary] flex flex-col justify-center items-center inset-0 w-full bg-gray-3/20 dark:bg-gray-3/10 hover:bg-gray-3/30 dark:hover:bg-gray-3/20 transition-colors rounded-xl pointer-events-none">
+						<div>Click to add a mask</div>
+						<div class="text-[10px] text-[--text-tertiary]/40 mt-0.5">
+							(Combine sensitive blur or highlight masks)
+						</div>
+					</div>
+				}
+			>
+				{(segment, i) => {
+					const isSelected = createMemo(() => {
+						const selection = editorState.timeline.selection;
+						if (!selection || selection.type !== "mask") return false;
+						return selection.indices.includes(i());
+					});
+
+					const contentLabel = () =>
+						segment.maskType === "sensitive" ? "Sensitive" : "Highlight";
+
+					const segmentWidth = () => segment.end - segment.start;
+
+					return (
+						<SegmentRoot
+							data-mask-segment
+							data-index={i()}
+							class={cx(
+								"border duration-200 hover:border-gray-12 transition-colors group",
+								"bg-gradient-to-r from-[#1f2022] via-[#2c2d30] to-[#1f2022]",
+								isSelected() ? "border-gray-12" : "border-transparent",
+							)}
+							innerClass="ring-red-5"
+							segment={segment}
+							onMouseDown={(e) => {
+								e.stopPropagation();
+								if (editorState.timeline.interactMode === "split") {
+									const rect = e.currentTarget.getBoundingClientRect();
+									const fraction = (e.clientX - rect.left) / rect.width;
+									const splitTime = fraction * segmentWidth();
+									projectActions.splitMaskSegment(i(), splitTime);
+								}
+							}}
+						>
+							<SegmentHandle
+								position="start"
+								onMouseDown={createMouseDownDrag(
+									i,
+									() => {
+										const bounds = neighborBounds(i());
+										const start = segment.start;
+										const minValue = bounds.prevEnd;
+										const maxValue = Math.max(
+											minValue,
+											Math.min(
+												segment.end - minDuration(),
+												bounds.nextStart - minDuration(),
+											),
+										);
+										return { start, minValue, maxValue };
+									},
+									(e, value, initialMouseX) => {
+										const delta = (e.clientX - initialMouseX) * secsPerPixel();
+										const next = Math.max(
+											value.minValue,
+											Math.min(value.maxValue, value.start + delta),
+										);
+										setProject("timeline", "maskSegments", i(), "start", next);
+										setProject(
+											"timeline",
+											"maskSegments",
+											produce((items) => {
+												items.sort((a, b) => a.start - b.start);
+											}),
+										);
+									},
+								)}
+							/>
+							<SegmentContent
+								class="flex justify-center items-center cursor-grab px-3"
+								onMouseDown={createMouseDownDrag(
+									i,
+									() => {
+										const original = { ...segment };
+										const bounds = neighborBounds(i());
+										const minDelta = bounds.prevEnd - original.start;
+										const maxDelta = bounds.nextStart - original.end;
+										return {
+											original,
+											minDelta,
+											maxDelta,
+										};
+									},
+									(e, value, initialMouseX) => {
+										const delta = (e.clientX - initialMouseX) * secsPerPixel();
+										const lowerBound = Math.min(value.minDelta, value.maxDelta);
+										const upperBound = Math.max(value.minDelta, value.maxDelta);
+										const clampedDelta = Math.min(
+											upperBound,
+											Math.max(lowerBound, delta),
+										);
+										setProject("timeline", "maskSegments", i(), {
+											...value.original,
+											start: value.original.start + clampedDelta,
+											end: value.original.end + clampedDelta,
+										});
+										setProject(
+											"timeline",
+											"maskSegments",
+											produce((items) => {
+												items.sort((a, b) => a.start - b.start);
+											}),
+										);
+									},
+								)}
+							>
+								{(() => {
+									return (
+										<div class="flex flex-col gap-0.5 justify-center items-center text-xs whitespace-nowrap text-gray-1 dark:text-gray-12">
+											<span class="opacity-70">Mask</span>
+											<div class="flex gap-1 items-center text-md">
+												<span>{contentLabel()}</span>
+											</div>
+										</div>
+									);
+								})()}
+							</SegmentContent>
+							<SegmentHandle
+								position="end"
+								onMouseDown={createMouseDownDrag(
+									i,
+									() => {
+										const bounds = neighborBounds(i());
+										const end = segment.end;
+										const minValue = segment.start + minDuration();
+										const maxValue = Math.max(minValue, bounds.nextStart);
+										return { end, minValue, maxValue };
+									},
+									(e, value, initialMouseX) => {
+										const delta = (e.clientX - initialMouseX) * secsPerPixel();
+										const next = Math.max(
+											value.minValue,
+											Math.min(value.maxValue, value.end + delta),
+										);
+										setProject("timeline", "maskSegments", i(), "end", next);
+										setProject(
+											"timeline",
+											"maskSegments",
+											produce((items) => {
+												items.sort((a, b) => a.start - b.start);
+											}),
+										);
+									},
+								)}
+							/>
+						</SegmentRoot>
+					);
+				}}
+			</For>
+		</TrackRoot>
+	);
+}
diff --git a/apps/desktop/src/routes/editor/Timeline/SceneTrack.tsx b/apps/desktop/src/routes/editor/Timeline/SceneTrack.tsx
index 3d785fb14e..193ce67b35 100644
--- a/apps/desktop/src/routes/editor/Timeline/SceneTrack.tsx
+++ b/apps/desktop/src/routes/editor/Timeline/SceneTrack.tsx
@@ -357,9 +357,7 @@ export function SceneTrack(props: {
 							class={cx(
 								"border transition-colors duration-200 hover:border-gray-12 group",
 								`bg-gradient-to-r from-[#5C1BC4] via-[#975CFA] to-[#5C1BC4] shadow-[inset_0_8px_12px_3px_rgba(255,255,255,0.2)]`,
-								isSelected()
-									? "wobble-wrapper border-gray-12"
-									: "border-transparent",
+								isSelected() ? "border-gray-12" : "border-transparent",
 							)}
 							innerClass="ring-blue-5"
 							segment={segment}
diff --git a/apps/desktop/src/routes/editor/Timeline/TextTrack.tsx b/apps/desktop/src/routes/editor/Timeline/TextTrack.tsx
new file mode 100644
index 0000000000..88a4c9fac7
--- /dev/null
+++ b/apps/desktop/src/routes/editor/Timeline/TextTrack.tsx
@@ -0,0 +1,371 @@
+import { createEventListenerMap } from "@solid-primitives/event-listener";
+import { cx } from "cva";
+import { createMemo, createRoot, For } from "solid-js";
+import { produce } from "solid-js/store";
+
+import { useEditorContext } from "../context";
+import { defaultTextSegment } from "../text";
+import { useTimelineContext } from "./context";
+import { SegmentContent, SegmentHandle, SegmentRoot, TrackRoot } from "./Track";
+
+export type TextSegmentDragState =
+	| { type: "idle" }
+	| { type: "movePending" }
+	| { type: "moving" };
+
+const MIN_SEGMENT_SECS = 1;
+const MIN_SEGMENT_PIXELS = 80;
+
+export function TextTrack(props: {
+	onDragStateChanged: (v: TextSegmentDragState) => void;
+	handleUpdatePlayhead: (e: MouseEvent) => void;
+}) {
+	const {
+		project,
+		setProject,
+		editorState,
+		setEditorState,
+		totalDuration,
+		projectHistory,
+		projectActions,
+	} = useEditorContext();
+	const { secsPerPixel, timelineBounds } = useTimelineContext();
+
+	const minDuration = () =>
+		Math.max(MIN_SEGMENT_SECS, secsPerPixel() * MIN_SEGMENT_PIXELS);
+
+	const textSegments = () => project.timeline?.textSegments ?? [];
+
+	const neighborBounds = (index: number) => {
+		const segments = textSegments();
+		return {
+			prevEnd: segments[index - 1]?.end ?? 0,
+			nextStart: segments[index + 1]?.start ?? totalDuration(),
+		};
+	};
+
+	const findPlacement = (time: number, length: number) => {
+		const gaps: Array<{ start: number; end: number }> = [];
+		const sorted = textSegments()
+			.slice()
+			.sort((a, b) => a.start - b.start);
+
+		let cursor = 0;
+		for (const segment of sorted) {
+			if (segment.start - cursor >= length) {
+				gaps.push({ start: cursor, end: segment.start });
+			}
+			cursor = Math.max(cursor, segment.end);
+		}
+
+		if (totalDuration() - cursor >= length) {
+			gaps.push({ start: cursor, end: totalDuration() });
+		}
+
+		if (gaps.length === 0) return null;
+
+		const maxStart = Math.max(totalDuration() - length, 0);
+		const desiredStart = Math.min(Math.max(time - length / 2, 0), maxStart);
+
+		const containingGap =
+			gaps.find(
+				(gap) => desiredStart >= gap.start && desiredStart + length <= gap.end,
+			) ??
+			gaps.find((gap) => gap.start >= desiredStart) ??
+			gaps[gaps.length - 1];
+
+		const start = Math.min(
+			Math.max(desiredStart, containingGap.start),
+			containingGap.end - length,
+		);
+
+		return { start, end: start + length };
+	};
+
+	const addSegmentAt = (time: number) => {
+		const length = Math.min(minDuration(), totalDuration());
+		if (length <= 0) return;
+
+		const placement = findPlacement(time, length);
+		if (!placement) return;
+
+		setProject(
+			"timeline",
+			"textSegments",
+			produce((segments) => {
+				segments ??= [];
+				segments.push(defaultTextSegment(placement.start, placement.end));
+				segments.sort((a, b) => a.start - b.start);
+			}),
+		);
+	};
+
+	const handleBackgroundMouseDown = (e: MouseEvent) => {
+		if (e.button !== 0) return;
+		if ((e.target as HTMLElement).closest("[data-text-segment]")) return;
+		const timelineTime =
+			editorState.previewTime ??
+			editorState.playbackTime ??
+			secsPerPixel() * (e.clientX - (timelineBounds.left ?? 0));
+		addSegmentAt(timelineTime);
+	};
+
+	function createMouseDownDrag<T>(
+		segmentIndex: () => number,
+		setup: () => T,
+		update: (e: MouseEvent, value: T, initialMouseX: number) => void,
+	) {
+		return (downEvent: MouseEvent) => {
+			if (editorState.timeline.interactMode !== "seek") return;
+			downEvent.stopPropagation();
+			const initial = setup();
+			let moved = false;
+			let initialMouseX: number | null = null;
+
+			const resumeHistory = projectHistory.pause();
+			props.onDragStateChanged({ type: "movePending" });
+
+			function finish(e: MouseEvent) {
+				resumeHistory();
+				if (!moved) {
+					e.stopPropagation();
+					const currentSelection = editorState.timeline.selection;
+					const index = segmentIndex();
+					const isMultiSelect = e.ctrlKey || e.metaKey;
+					const isRangeSelect = e.shiftKey;
+
+					if (isRangeSelect && currentSelection?.type === "text") {
+						const existingIndices = currentSelection.indices;
+						const lastIndex = existingIndices[existingIndices.length - 1];
+						const start = Math.min(lastIndex, index);
+						const end = Math.max(lastIndex, index);
+						const rangeIndices: number[] = [];
+						for (let idx = start; idx <= end; idx++) rangeIndices.push(idx);
+						setEditorState("timeline", "selection", {
+							type: "text",
+							indices: rangeIndices,
+						});
+					} else if (isMultiSelect) {
+						if (currentSelection?.type === "text") {
+							const base = currentSelection.indices;
+							const exists = base.includes(index);
+							const next = exists
+								? base.filter((i) => i !== index)
+								: [...base, index];
+							setEditorState(
+								"timeline",
+								"selection",
+								next.length > 0
+									? {
+											type: "text",
+											indices: next,
+										}
+									: null,
+							);
+						} else {
+							setEditorState("timeline", "selection", {
+								type: "text",
+								indices: [index],
+							});
+						}
+					} else {
+						setEditorState("timeline", "selection", {
+							type: "text",
+							indices: [index],
+						});
+					}
+					props.handleUpdatePlayhead(e);
+				}
+				props.onDragStateChanged({ type: "idle" });
+			}
+
+			function handleUpdate(event: MouseEvent) {
+				if (Math.abs(event.clientX - downEvent.clientX) > 2) {
+					if (!moved) {
+						moved = true;
+						initialMouseX = event.clientX;
+						props.onDragStateChanged({ type: "moving" });
+					}
+				}
+
+				if (initialMouseX === null) return;
+				update(event, initial, initialMouseX);
+			}
+
+			createRoot((dispose) => {
+				createEventListenerMap(window, {
+					mousemove: (e) => handleUpdate(e),
+					mouseup: (e) => {
+						handleUpdate(e);
+						finish(e);
+						dispose();
+					},
+				});
+			});
+		};
+	}
+
+	return (
+		<TrackRoot
+			onMouseEnter={() => setEditorState("timeline", "hoveredTrack", "text")}
+			onMouseLeave={() => setEditorState("timeline", "hoveredTrack", null)}
+			onMouseDown={handleBackgroundMouseDown}
+		>
+			<For
+				each={textSegments()}
+				fallback={
+					<div class="text-center text-sm text-[--text-tertiary] flex flex-col justify-center items-center inset-0 w-full bg-gray-3/20 dark:bg-gray-3/10 hover:bg-gray-3/30 dark:hover:bg-gray-3/20 transition-colors rounded-xl pointer-events-none">
+						<div>Click to add text</div>
+						<div class="text-[10px] text-[--text-tertiary]/40 mt-0.5">
+							(Set a label over your video)
+						</div>
+					</div>
+				}
+			>
+				{(segment, i) => {
+					const isSelected = createMemo(() => {
+						const selection = editorState.timeline.selection;
+						if (!selection || selection.type !== "text") return false;
+						return selection.indices.includes(i());
+					});
+
+					const segmentWidth = () => segment.end - segment.start;
+
+					return (
+						<SegmentRoot
+							data-text-segment
+							data-index={i()}
+							class={cx(
+								"border duration-200 hover:border-blue-6 transition-colors group",
+								"bg-gradient-to-r from-[#111826] via-[#1c2232] to-[#111826]",
+								isSelected() ? "border-blue-7" : "border-transparent",
+								!segment.enabled && "opacity-60",
+							)}
+							innerClass="ring-blue-6"
+							segment={segment}
+							onMouseDown={(e) => {
+								e.stopPropagation();
+								if (editorState.timeline.interactMode === "split") {
+									const rect = e.currentTarget.getBoundingClientRect();
+									const fraction = (e.clientX - rect.left) / rect.width;
+									const splitTime = fraction * segmentWidth();
+									projectActions.splitTextSegment(i(), splitTime);
+								}
+							}}
+						>
+							<SegmentHandle
+								position="start"
+								onMouseDown={createMouseDownDrag(
+									i,
+									() => {
+										const bounds = neighborBounds(i());
+										const start = segment.start;
+										const minValue = bounds.prevEnd;
+										const maxValue = Math.max(
+											minValue,
+											Math.min(
+												segment.end - minDuration(),
+												bounds.nextStart - minDuration(),
+											),
+										);
+										return { start, minValue, maxValue };
+									},
+									(e, value, initialMouseX) => {
+										const delta = (e.clientX - initialMouseX) * secsPerPixel();
+										const next = Math.max(
+											value.minValue,
+											Math.min(value.maxValue, value.start + delta),
+										);
+										setProject("timeline", "textSegments", i(), "start", next);
+										setProject(
+											"timeline",
+											"textSegments",
+											produce((items) => {
+												items.sort((a, b) => a.start - b.start);
+											}),
+										);
+									},
+								)}
+							/>
+							<SegmentContent
+								class="flex justify-center items-center cursor-grab px-3"
+								onMouseDown={createMouseDownDrag(
+									i,
+									() => {
+										const original = { ...segment };
+										const bounds = neighborBounds(i());
+										const minDelta = bounds.prevEnd - original.start;
+										const maxDelta = bounds.nextStart - original.end;
+										return {
+											original,
+											minDelta,
+											maxDelta,
+										};
+									},
+									(e, value, initialMouseX) => {
+										const delta = (e.clientX - initialMouseX) * secsPerPixel();
+										const lowerBound = Math.min(value.minDelta, value.maxDelta);
+										const upperBound = Math.max(value.minDelta, value.maxDelta);
+										const clampedDelta = Math.min(
+											upperBound,
+											Math.max(lowerBound, delta),
+										);
+										setProject("timeline", "textSegments", i(), {
+											...value.original,
+											start: value.original.start + clampedDelta,
+											end: value.original.end + clampedDelta,
+										});
+										setProject(
+											"timeline",
+											"textSegments",
+											produce((items) => {
+												items.sort((a, b) => a.start - b.start);
+											}),
+										);
+									},
+								)}
+							>
+								<div class="flex flex-col gap-0.5 justify-center items-center text-xs whitespace-nowrap text-gray-1 dark:text-gray-12">
+									<span class="opacity-70">Text</span>
+									<div class="flex gap-1 items-center text-md">
+										<span class="max-w-[10rem] truncate">
+											{segment.content || "Label"}
+										</span>
+									</div>
+								</div>
+							</SegmentContent>
+							<SegmentHandle
+								position="end"
+								onMouseDown={createMouseDownDrag(
+									i,
+									() => {
+										const bounds = neighborBounds(i());
+										const end = segment.end;
+										const minValue = segment.start + minDuration();
+										const maxValue = Math.max(minValue, bounds.nextStart);
+										return { end, minValue, maxValue };
+									},
+									(e, value, initialMouseX) => {
+										const delta = (e.clientX - initialMouseX) * secsPerPixel();
+										const next = Math.max(
+											value.minValue,
+											Math.min(value.maxValue, value.end + delta),
+										);
+										setProject("timeline", "textSegments", i(), "end", next);
+										setProject(
+											"timeline",
+											"textSegments",
+											produce((items) => {
+												items.sort((a, b) => a.start - b.start);
+											}),
+										);
+									},
+								)}
+							/>
+						</SegmentRoot>
+					);
+				}}
+			</For>
+		</TrackRoot>
+	);
+}
diff --git a/apps/desktop/src/routes/editor/Timeline/Track.tsx b/apps/desktop/src/routes/editor/Timeline/Track.tsx
index c5581ecc94..85c954f327 100644
--- a/apps/desktop/src/routes/editor/Timeline/Track.tsx
+++ b/apps/desktop/src/routes/editor/Timeline/Track.tsx
@@ -11,13 +11,19 @@ import {
 
 export function TrackRoot(props: ComponentProps<"div">) {
 	const [ref, setRef] = createSignal<HTMLDivElement>();
+	const height = "var(--track-height, 3.25rem)";
+	const style =
+		typeof props.style === "string"
+			? `${props.style};height:${height}`
+			: { height, ...(props.style ?? {}) };
 
 	return (
 		<TrackContextProvider ref={ref}>
 			<div
 				{...props}
 				ref={mergeRefs(setRef, props.ref)}
-				class={cx("flex flex-row relative h-[3.25rem]", props.class)}
+				class={cx("flex flex-row relative", props.class)}
+				style={style}
 			>
 				{props.children}
 			</div>
diff --git a/apps/desktop/src/routes/editor/Timeline/TrackManager.tsx b/apps/desktop/src/routes/editor/Timeline/TrackManager.tsx
new file mode 100644
index 0000000000..e9bcd6f529
--- /dev/null
+++ b/apps/desktop/src/routes/editor/Timeline/TrackManager.tsx
@@ -0,0 +1,78 @@
+import { LogicalPosition } from "@tauri-apps/api/dpi";
+import { CheckMenuItem, Menu } from "@tauri-apps/api/menu";
+import type { JSX } from "solid-js";
+
+import type { TimelineTrackType } from "../context";
+
+type TrackManagerOption = {
+	type: TimelineTrackType;
+	label: string;
+	icon: JSX.Element;
+	active: boolean;
+	available: boolean;
+	locked: boolean;
+};
+
+export function TrackManager(props: {
+	options: TrackManagerOption[];
+	onToggle(type: TimelineTrackType, next: boolean): void;
+}) {
+	let addButton: HTMLButtonElement | undefined;
+
+	const handleOpenMenu = async () => {
+		try {
+			const items = await Promise.all(
+				props.options.map((option) => {
+					if (!option.locked) {
+						return CheckMenuItem.new({
+							text: option.label,
+							checked: option.active,
+							enabled: option.available,
+							action: () => props.onToggle(option.type, !option.active),
+						});
+					}
+
+					return CheckMenuItem.new({
+						text: option.label,
+						checked: option.active,
+						enabled: false,
+					});
+				}),
+			);
+
+			const menu = await Menu.new({ items });
+			const rect = addButton?.getBoundingClientRect();
+			if (rect) {
+				menu.popup(new LogicalPosition(rect.x, rect.y + rect.height + 4));
+			} else {
+				menu.popup();
+			}
+		} catch (error) {
+			console.error("Failed to open track menu", error);
+		}
+	};
+
+	return (
+		<button
+			ref={(el) => {
+				addButton = el;
+			}}
+			class="flex h-[3.25rem] w-[3.5rem] items-center justify-center rounded-xl border border-gray-4/70 bg-gray-2/60 text-sm font-medium text-gray-12 transition-colors duration-150 hover:bg-gray-3 dark:border-gray-4/60 dark:bg-gray-3/40 shadow-[0_4px_16px_-12px_rgba(0,0,0,0.8)]"
+			onClick={handleOpenMenu}
+			onMouseDown={(e) => e.stopPropagation()}
+		>
+			<IconLucidePlus class="size-4" />
+		</button>
+	);
+}
+
+export function TrackIcon(props: { icon: JSX.Element }) {
+	return (
+		<div
+			class="relative z-10 w-[3.5rem] h-[3.25rem] flex items-center justify-center rounded-xl border border-gray-4/70 bg-gray-2/60 text-gray-12 shadow-[0_4px_16px_-12px_rgba(0,0,0,0.8)] dark:border-gray-4/60 dark:bg-gray-3/40"
+			onMouseDown={(e) => e.stopPropagation()}
+		>
+			{props.icon}
+		</div>
+	);
+}
diff --git a/apps/desktop/src/routes/editor/Timeline/ZoomTrack.tsx b/apps/desktop/src/routes/editor/Timeline/ZoomTrack.tsx
index d3238d6469..1e34facc98 100644
--- a/apps/desktop/src/routes/editor/Timeline/ZoomTrack.tsx
+++ b/apps/desktop/src/routes/editor/Timeline/ZoomTrack.tsx
@@ -8,7 +8,6 @@ import {
 	createRoot,
 	createSignal,
 	For,
-	Index,
 	Match,
 	Show,
 	Switch,
@@ -45,19 +44,7 @@ export function ZoomTrack(props: {
 		projectActions,
 	} = useEditorContext();
 
-	const { duration, secsPerPixel, isSegmentVisible } = useTimelineContext();
-
-	const visibleZoomIndices = createMemo(() => {
-		const zoomSegments = project.timeline?.zoomSegments ?? [];
-		const visible: number[] = [];
-		for (let i = 0; i < zoomSegments.length; i++) {
-			const seg = zoomSegments[i];
-			if (isSegmentVisible(seg.start, seg.end)) {
-				visible.push(i);
-			}
-		}
-		return visible;
-	});
+	const { duration, secsPerPixel } = useTimelineContext();
 
 	const [creatingSegmentViaDrag, setCreatingSegmentViaDrag] =
 		createSignal(false);
@@ -279,8 +266,8 @@ export function ZoomTrack(props: {
 				});
 			}}
 		>
-			<Show
-				when={(project.timeline?.zoomSegments ?? []).length > 0}
+			<For
+				each={project.timeline?.zoomSegments}
 				fallback={
 					<div class="text-center text-sm text-[--text-tertiary] flex flex-col justify-center items-center inset-0 w-full bg-gray-3/20 dark:bg-gray-3/10 hover:bg-gray-3/30 dark:hover:bg-gray-3/20 transition-colors rounded-xl pointer-events-none">
 						<div>Click to add zoom segment</div>
@@ -290,352 +277,329 @@ export function ZoomTrack(props: {
 					</div>
 				}
 			>
-				<Index each={visibleZoomIndices()}>
-					{(segmentIndex) => {
-						const i = segmentIndex;
-						const segment = () => (project.timeline?.zoomSegments ?? [])[i()];
-						const { setTrackState } = useTrackContext();
-
-						const zoomPercentage = () => {
-							const seg = segment();
-							if (!seg) return "1.0x";
-							const amount = seg.amount;
-							return `${amount.toFixed(1)}x`;
-						};
+				{(segment, i) => {
+					const { setTrackState } = useTrackContext();
 
-						const zoomSegments = () => project.timeline?.zoomSegments ?? [];
+					const zoomPercentage = () => {
+						const amount = segment.amount;
+						return `${amount.toFixed(1)}x`;
+					};
 
-						function createMouseDownDrag<T>(
-							setup: () => T,
-							_update: (e: MouseEvent, v: T, initialMouseX: number) => void,
-						) {
-							return (downEvent: MouseEvent) => {
-								if (editorState.timeline.interactMode !== "seek") return;
+					const zoomSegments = () => project.timeline?.zoomSegments ?? [];
 
-								downEvent.stopPropagation();
+					function createMouseDownDrag<T>(
+						setup: () => T,
+						_update: (e: MouseEvent, v: T, initialMouseX: number) => void,
+					) {
+						return (downEvent: MouseEvent) => {
+							if (editorState.timeline.interactMode !== "seek") return;
 
-								const initial = setup();
+							downEvent.stopPropagation();
 
-								let moved = false;
-								let initialMouseX: null | number = null;
+							const initial = setup();
 
-								setTrackState("draggingSegment", true);
+							let moved = false;
+							let initialMouseX: null | number = null;
 
-								const resumeHistory = projectHistory.pause();
+							setTrackState("draggingSegment", true);
 
-								props.onDragStateChanged({ type: "movePending" });
+							const resumeHistory = projectHistory.pause();
 
-								function finish(e: MouseEvent) {
-									resumeHistory();
-									if (!moved) {
-										e.stopPropagation();
-
-										const currentSelection = editorState.timeline.selection;
-										const segmentIndex = i();
-										const isMultiSelect = e.ctrlKey || e.metaKey;
-										const isRangeSelect = e.shiftKey;
-
-										if (isRangeSelect && currentSelection?.type === "zoom") {
-											// Range selection: select from last selected to current
-											const existingIndices = currentSelection.indices;
-											const lastIndex =
-												existingIndices[existingIndices.length - 1];
-											const start = Math.min(lastIndex, segmentIndex);
-											const end = Math.max(lastIndex, segmentIndex);
-											const rangeIndices: number[] = [];
-											for (let idx = start; idx <= end; idx++) {
-												rangeIndices.push(idx);
-											}
+							props.onDragStateChanged({ type: "movePending" });
 
-											setEditorState("timeline", "selection", {
-												type: "zoom",
-												indices: rangeIndices,
-											});
-										} else if (isMultiSelect) {
-											// Handle multi-selection with Ctrl/Cmd+click
-											if (currentSelection?.type === "zoom") {
-												const baseIndices = currentSelection.indices;
-												const exists = baseIndices.includes(segmentIndex);
-												const newIndices = exists
-													? baseIndices.filter((idx) => idx !== segmentIndex)
-													: [...baseIndices, segmentIndex];
-
-												if (newIndices.length > 0) {
-													setEditorState("timeline", "selection", {
-														type: "zoom",
-														indices: newIndices,
-													});
-												} else {
-													setEditorState("timeline", "selection", null);
-												}
-											} else {
-												// Start new multi-selection
+							function finish(e: MouseEvent) {
+								resumeHistory();
+								if (!moved) {
+									e.stopPropagation();
+
+									const currentSelection = editorState.timeline.selection;
+									const segmentIndex = i();
+									const isMultiSelect = e.ctrlKey || e.metaKey;
+									const isRangeSelect = e.shiftKey;
+
+									if (isRangeSelect && currentSelection?.type === "zoom") {
+										// Range selection: select from last selected to current
+										const existingIndices = currentSelection.indices;
+										const lastIndex =
+											existingIndices[existingIndices.length - 1];
+										const start = Math.min(lastIndex, segmentIndex);
+										const end = Math.max(lastIndex, segmentIndex);
+										const rangeIndices: number[] = [];
+										for (let idx = start; idx <= end; idx++) {
+											rangeIndices.push(idx);
+										}
+
+										setEditorState("timeline", "selection", {
+											type: "zoom",
+											indices: rangeIndices,
+										});
+									} else if (isMultiSelect) {
+										// Handle multi-selection with Ctrl/Cmd+click
+										if (currentSelection?.type === "zoom") {
+											const baseIndices = currentSelection.indices;
+											const exists = baseIndices.includes(segmentIndex);
+											const newIndices = exists
+												? baseIndices.filter((idx) => idx !== segmentIndex)
+												: [...baseIndices, segmentIndex];
+
+											if (newIndices.length > 0) {
 												setEditorState("timeline", "selection", {
 													type: "zoom",
-													indices: [segmentIndex],
+													indices: newIndices,
 												});
+											} else {
+												setEditorState("timeline", "selection", null);
 											}
 										} else {
-											// Normal single selection
+											// Start new multi-selection
 											setEditorState("timeline", "selection", {
 												type: "zoom",
 												indices: [segmentIndex],
 											});
 										}
-										props.handleUpdatePlayhead(e);
+									} else {
+										// Normal single selection
+										setEditorState("timeline", "selection", {
+											type: "zoom",
+											indices: [segmentIndex],
+										});
 									}
-									props.onDragStateChanged({ type: "idle" });
-									setTrackState("draggingSegment", false);
+									props.handleUpdatePlayhead(e);
 								}
+								props.onDragStateChanged({ type: "idle" });
+								setTrackState("draggingSegment", false);
+							}
 
-								function update(event: MouseEvent) {
-									if (Math.abs(event.clientX - downEvent.clientX) > 2) {
-										if (!moved) {
-											moved = true;
-											initialMouseX = event.clientX;
-											props.onDragStateChanged({
-												type: "moving",
-											});
-										}
+							function update(event: MouseEvent) {
+								if (Math.abs(event.clientX - downEvent.clientX) > 2) {
+									if (!moved) {
+										moved = true;
+										initialMouseX = event.clientX;
+										props.onDragStateChanged({
+											type: "moving",
+										});
 									}
-
-									if (initialMouseX === null) return;
-
-									_update(event, initial, initialMouseX);
 								}
 
-								createRoot((dispose) => {
-									createEventListenerMap(window, {
-										mousemove: (e) => {
-											update(e);
-										},
-										mouseup: (e) => {
-											update(e);
-											finish(e);
-											dispose();
-										},
-									});
+								if (initialMouseX === null) return;
+
+								_update(event, initial, initialMouseX);
+							}
+
+							createRoot((dispose) => {
+								createEventListenerMap(window, {
+									mousemove: (e) => {
+										update(e);
+									},
+									mouseup: (e) => {
+										update(e);
+										finish(e);
+										dispose();
+									},
 								});
-							};
-						}
+							});
+						};
+					}
 
-						const isSelected = createMemo(() => {
-							const selection = editorState.timeline.selection;
-							if (!selection || selection.type !== "zoom") return false;
-							const seg = segment();
-							if (!seg) return false;
+					const isSelected = createMemo(() => {
+						const selection = editorState.timeline.selection;
+						if (!selection || selection.type !== "zoom") return false;
 
-							const segmentIndex = project.timeline?.zoomSegments?.findIndex(
-								(s) => s.start === seg.start && s.end === seg.end,
-							);
+						const segmentIndex = project.timeline?.zoomSegments?.findIndex(
+							(s) => s.start === segment.start && s.end === segment.end,
+						);
 
-							if (segmentIndex === undefined || segmentIndex === -1)
-								return false;
+						// Support both single selection (index) and multi-selection (indices)
+						if (segmentIndex === undefined || segmentIndex === -1) return false;
 
-							return selection.indices.includes(segmentIndex);
-						});
+						return selection.indices.includes(segmentIndex);
+					});
 
-						return (
-							<Show when={segment()}>
-								{(seg) => (
-									<SegmentRoot
-										class={cx(
-											"border duration-200 hover:border-gray-12 transition-colors group",
-											"bg-gradient-to-r from-[#292929] via-[#434343] to-[#292929] shadow-[inset_0_8px_12px_3px_rgba(255,255,255,0.2)]",
-											isSelected()
-												? "wobble-wrapper border-gray-12"
-												: "border-transparent",
-										)}
-										innerClass="ring-red-5"
-										segment={seg()}
-										onMouseDown={(e) => {
-											e.stopPropagation();
-
-											if (editorState.timeline.interactMode === "split") {
-												const rect = e.currentTarget.getBoundingClientRect();
-												const fraction = (e.clientX - rect.left) / rect.width;
-
-												const splitTime = fraction * (seg().end - seg().start);
-
-												projectActions.splitZoomSegment(i(), splitTime);
+					return (
+						<SegmentRoot
+							class={cx(
+								"border duration-200 hover:border-gray-12 transition-colors group",
+								"bg-gradient-to-r from-[#292929] via-[#434343] to-[#292929] shadow-[inset_0_8px_12px_3px_rgba(255,255,255,0.2)]",
+								isSelected() ? "border-gray-12" : "border-transparent",
+							)}
+							innerClass="ring-red-5"
+							segment={segment}
+							onMouseDown={(e) => {
+								e.stopPropagation();
+
+								if (editorState.timeline.interactMode === "split") {
+									const rect = e.currentTarget.getBoundingClientRect();
+									const fraction = (e.clientX - rect.left) / rect.width;
+
+									const splitTime = fraction * (segment.end - segment.start);
+
+									projectActions.splitZoomSegment(i(), splitTime);
+								}
+							}}
+						>
+							<SegmentHandle
+								position="start"
+								onMouseDown={createMouseDownDrag(
+									() => {
+										const start = segment.start;
+
+										let minValue = 0;
+
+										const maxValue = segment.end - 1;
+
+										for (let i = zoomSegments().length - 1; i >= 0; i--) {
+											const segment = zoomSegments()[i]!;
+											if (segment.end <= start) {
+												minValue = segment.end;
+												break;
 											}
-										}}
-									>
-										<SegmentHandle
-											position="start"
-											onMouseDown={createMouseDownDrag(
-												() => {
-													const start = seg().start;
-
-													let minValue = 0;
-
-													const maxValue = seg().end - 1;
-
-													for (
-														let idx = zoomSegments().length - 1;
-														idx >= 0;
-														idx--
-													) {
-														const zs = zoomSegments()[idx]!;
-														if (zs.end <= start) {
-															minValue = zs.end;
-															break;
-														}
-													}
-
-													return { start, minValue, maxValue };
-												},
-												(e, value, initialMouseX) => {
-													const newStart =
-														value.start +
-														(e.clientX - initialMouseX) * secsPerPixel();
-
-													setProject(
-														"timeline",
-														"zoomSegments",
-														i(),
-														"start",
-														Math.min(
-															value.maxValue,
-															Math.max(value.minValue, newStart),
-														),
-													);
-
-													setProject(
-														"timeline",
-														"zoomSegments",
-														produce((s) => {
-															s.sort((a, b) => a.start - b.start);
-														}),
-													);
-												},
-											)}
-										/>
-										<SegmentContent
-											class="flex justify-center items-center cursor-grab"
-											onMouseDown={createMouseDownDrag(
-												() => {
-													const original = { ...seg() };
-
-													const prevSegment = zoomSegments()[i() - 1];
-													const nextSegment = zoomSegments()[i() + 1];
-
-													const minStart = prevSegment?.end ?? 0;
-													const maxEnd = nextSegment?.start ?? duration();
-
-													return {
-														original,
-														minStart,
-														maxEnd,
-													};
-												},
-												(e, value, initialMouseX) => {
-													const rawDelta =
-														(e.clientX - initialMouseX) * secsPerPixel();
-
-													const newStart = value.original.start + rawDelta;
-													const newEnd = value.original.end + rawDelta;
-
-													let delta = rawDelta;
-
-													if (newStart < value.minStart)
-														delta = value.minStart - value.original.start;
-													else if (newEnd > value.maxEnd)
-														delta = value.maxEnd - value.original.end;
-
-													setProject("timeline", "zoomSegments", i(), {
-														start: value.original.start + delta,
-														end: value.original.end + delta,
-													});
-												},
-											)}
-										>
-											{(() => {
-												const ctx = useSegmentContext();
-
-												return (
-													<Switch>
-														<Match when={ctx.width() < 40}>
-															<div class="flex justify-center items-center">
-																<IconLucideSearch class="size-3.5 text-gray-1 dark:text-gray-12" />
-															</div>
-														</Match>
-														<Match when={ctx.width() < 100}>
-															<div class="flex gap-1 items-center text-xs whitespace-nowrap text-gray-1 dark:text-gray-12">
-																<IconLucideSearch class="size-3" />
-																<span>{zoomPercentage()}</span>
-															</div>
-														</Match>
-														<Match when={true}>
-															<div class="flex flex-col gap-1 justify-center items-center text-xs whitespace-nowrap text-gray-1 dark:text-gray-12 animate-in fade-in">
-																<span class="opacity-70">Zoom</span>
-																<div class="flex gap-1 items-center text-md">
-																	<IconLucideSearch class="size-3.5" />
-																	{zoomPercentage()}
-																</div>
-															</div>
-														</Match>
-													</Switch>
-												);
-											})()}
-										</SegmentContent>
-										<SegmentHandle
-											position="end"
-											onMouseDown={createMouseDownDrag(
-												() => {
-													const end = seg().end;
-
-													const minValue = seg().start + 1;
-
-													let maxValue = duration();
-
-													for (
-														let idx = 0;
-														idx < zoomSegments().length;
-														idx++
-													) {
-														const zs = zoomSegments()[idx]!;
-														if (zs.start > end) {
-															maxValue = zs.start;
-															break;
-														}
-													}
-
-													return { end, minValue, maxValue };
-												},
-												(e, value, initialMouseX) => {
-													const newEnd =
-														value.end +
-														(e.clientX - initialMouseX) * secsPerPixel();
-
-													setProject(
-														"timeline",
-														"zoomSegments",
-														i(),
-														"end",
-														Math.min(
-															value.maxValue,
-															Math.max(value.minValue, newEnd),
-														),
-													);
-
-													setProject(
-														"timeline",
-														"zoomSegments",
-														produce((s) => {
-															s.sort((a, b) => a.start - b.start);
-														}),
-													);
-												},
-											)}
-										/>
-									</SegmentRoot>
+										}
+
+										return { start, minValue, maxValue };
+									},
+									(e, value, initialMouseX) => {
+										const newStart =
+											value.start +
+											(e.clientX - initialMouseX) * secsPerPixel();
+
+										setProject(
+											"timeline",
+											"zoomSegments",
+											i(),
+											"start",
+											Math.min(
+												value.maxValue,
+												Math.max(value.minValue, newStart),
+											),
+										);
+
+										setProject(
+											"timeline",
+											"zoomSegments",
+											produce((s) => {
+												s.sort((a, b) => a.start - b.start);
+											}),
+										);
+									},
 								)}
-							</Show>
-						);
-					}}
-				</Index>
-			</Show>
+							/>
+							<SegmentContent
+								class="flex justify-center items-center cursor-grab"
+								onMouseDown={createMouseDownDrag(
+									() => {
+										const original = { ...segment };
+
+										const prevSegment = zoomSegments()[i() - 1];
+										const nextSegment = zoomSegments()[i() + 1];
+
+										const minStart = prevSegment?.end ?? 0;
+										const maxEnd = nextSegment?.start ?? duration();
+
+										return {
+											original,
+											minStart,
+											maxEnd,
+										};
+									},
+									(e, value, initialMouseX) => {
+										const rawDelta =
+											(e.clientX - initialMouseX) * secsPerPixel();
+
+										const newStart = value.original.start + rawDelta;
+										const newEnd = value.original.end + rawDelta;
+
+										let delta = rawDelta;
+
+										if (newStart < value.minStart)
+											delta = value.minStart - value.original.start;
+										else if (newEnd > value.maxEnd)
+											delta = value.maxEnd - value.original.end;
+
+										setProject("timeline", "zoomSegments", i(), {
+											start: value.original.start + delta,
+											end: value.original.end + delta,
+										});
+									},
+								)}
+							>
+								{(() => {
+									const ctx = useSegmentContext();
+
+									return (
+										<Switch>
+											<Match when={ctx.width() < 40}>
+												<div class="flex justify-center items-center">
+													<IconLucideSearch class="size-3.5 text-gray-1 dark:text-gray-12" />
+												</div>
+											</Match>
+											<Match when={ctx.width() < 100}>
+												<div class="flex gap-1 items-center text-xs whitespace-nowrap text-gray-1 dark:text-gray-12">
+													<IconLucideSearch class="size-3" />
+													<span>{zoomPercentage()}</span>
+												</div>
+											</Match>
+											<Match when={true}>
+												<div class="flex flex-col gap-1 justify-center items-center text-xs whitespace-nowrap text-gray-1 dark:text-gray-12 animate-in fade-in">
+													<span class="opacity-70">Zoom</span>
+													<div class="flex gap-1 items-center text-md">
+														<IconLucideSearch class="size-3.5" />
+														{zoomPercentage()}
+													</div>
+												</div>
+											</Match>
+										</Switch>
+									);
+								})()}
+							</SegmentContent>
+							<SegmentHandle
+								position="end"
+								onMouseDown={createMouseDownDrag(
+									() => {
+										const end = segment.end;
+
+										const minValue = segment.start + 1;
+
+										let maxValue = duration();
+
+										for (let i = 0; i < zoomSegments().length; i++) {
+											const segment = zoomSegments()[i]!;
+											if (segment.start > end) {
+												maxValue = segment.start;
+												break;
+											}
+										}
+
+										return { end, minValue, maxValue };
+									},
+									(e, value, initialMouseX) => {
+										const newEnd =
+											value.end + (e.clientX - initialMouseX) * secsPerPixel();
+
+										setProject(
+											"timeline",
+											"zoomSegments",
+											i(),
+											"end",
+											Math.min(
+												value.maxValue,
+												Math.max(value.minValue, newEnd),
+											),
+										);
+
+										setProject(
+											"timeline",
+											"zoomSegments",
+											produce((s) => {
+												s.sort((a, b) => a.start - b.start);
+											}),
+										);
+									},
+								)}
+							/>
+						</SegmentRoot>
+					);
+				}}
+			</For>
 			<Show
 				when={
 					!useTrackContext().trackState.draggingSegment && newSegmentDetails()
diff --git a/apps/desktop/src/routes/editor/Timeline/index.tsx b/apps/desktop/src/routes/editor/Timeline/index.tsx
index 4f849a0f83..a88b96fd24 100644
--- a/apps/desktop/src/routes/editor/Timeline/index.tsx
+++ b/apps/desktop/src/routes/editor/Timeline/index.tsx
@@ -2,20 +2,82 @@ import { createElementBounds } from "@solid-primitives/bounds";
 import { createEventListener } from "@solid-primitives/event-listener";
 import { platform } from "@tauri-apps/plugin-os";
 import { cx } from "cva";
-import { batch, createRoot, createSignal, For, onMount, Show } from "solid-js";
+import {
+	batch,
+	createRoot,
+	createSignal,
+	For,
+	type JSX,
+	onMount,
+	Show,
+} from "solid-js";
 import { produce } from "solid-js/store";
 
 import "./styles.css";
 
+import Tooltip from "~/components/Tooltip";
 import { commands } from "~/utils/tauri";
-import { FPS, OUTPUT_SIZE, useEditorContext } from "../context";
+import { FPS, type TimelineTrackType, useEditorContext } from "../context";
 import { formatTime } from "../utils";
 import { ClipTrack } from "./ClipTrack";
 import { TimelineContextProvider, useTimelineContext } from "./context";
+import { type MaskSegmentDragState, MaskTrack } from "./MaskTrack";
 import { type SceneSegmentDragState, SceneTrack } from "./SceneTrack";
+import { type TextSegmentDragState, TextTrack } from "./TextTrack";
+import { TrackIcon, TrackManager } from "./TrackManager";
 import { type ZoomSegmentDragState, ZoomTrack } from "./ZoomTrack";
 
 const TIMELINE_PADDING = 16;
+const TRACK_GUTTER = 64;
+const TIMELINE_HEADER_HEIGHT = 32;
+
+const trackIcons: Record<TimelineTrackType, JSX.Element> = {
+	clip: <IconLucideClapperboard class="size-4" />,
+	text: <IconLucideType class="size-4" />,
+	mask: <IconLucideBoxSelect class="size-4" />,
+	zoom: <IconLucideSearch class="size-4" />,
+	scene: <IconLucideVideo class="size-4" />,
+};
+
+type TrackDefinition = {
+	type: TimelineTrackType;
+	label: string;
+	icon: JSX.Element;
+	locked: boolean;
+};
+
+const trackDefinitions: TrackDefinition[] = [
+	{
+		type: "clip",
+		label: "Clip",
+		icon: trackIcons.clip,
+		locked: true,
+	},
+	{
+		type: "text",
+		label: "Text",
+		icon: trackIcons.text,
+		locked: false,
+	},
+	{
+		type: "mask",
+		label: "Mask",
+		icon: trackIcons.mask,
+		locked: false,
+	},
+	{
+		type: "zoom",
+		label: "Zoom",
+		icon: trackIcons.zoom,
+		locked: true,
+	},
+	{
+		type: "scene",
+		label: "Scene",
+		icon: trackIcons.scene,
+		locked: false,
+	},
+];
 
 export function Timeline() {
 	const {
@@ -28,6 +90,7 @@ export function Timeline() {
 		editorState,
 		projectActions,
 		meta,
+		previewResolutionBase,
 	} = useEditorContext();
 
 	const duration = () => editorInstance.recordingDuration;
@@ -38,6 +101,51 @@ export function Timeline() {
 
 	const secsPerPixel = () => transform().zoom / (timelineBounds.width ?? 1);
 
+	const trackState = () => editorState.timeline.tracks;
+	const sceneAvailable = () => meta().hasCamera && !project.camera.hide;
+	const trackOptions = () =>
+		trackDefinitions.map((definition) => ({
+			...definition,
+			active:
+				definition.type === "scene"
+					? trackState().scene
+					: definition.type === "mask"
+						? trackState().mask
+						: definition.type === "text"
+							? trackState().text
+							: true,
+			available: definition.type === "scene" ? sceneAvailable() : true,
+		}));
+	const sceneTrackVisible = () => trackState().scene && sceneAvailable();
+	const visibleTrackCount = () =>
+		2 +
+		(trackState().text ? 1 : 0) +
+		(trackState().mask ? 1 : 0) +
+		(sceneTrackVisible() ? 1 : 0);
+	const trackHeight = () => (visibleTrackCount() > 2 ? "3rem" : "3.25rem");
+
+	function handleToggleTrack(type: TimelineTrackType, next: boolean) {
+		if (type === "scene") {
+			setEditorState("timeline", "tracks", "scene", next);
+			return;
+		}
+
+		if (type === "text") {
+			setEditorState("timeline", "tracks", "text", next);
+			if (!next && editorState.timeline.selection?.type === "text") {
+				setEditorState("timeline", "selection", null);
+			}
+			return;
+		}
+
+		if (type === "mask") {
+			setEditorState("timeline", "tracks", "mask", next);
+			if (!next && editorState.timeline.selection?.type === "mask") {
+				setEditorState("timeline", "selection", null);
+			}
+		}
+	}
+
 	onMount(() => {
 		if (!project.timeline) {
 			const resume = projectHistory.pause();
@@ -49,6 +157,10 @@ export function Timeline() {
 						end: duration(),
 					},
 				],
+				zoomSegments: [],
+				sceneSegments: [],
+				maskSegments: [],
+				textSegments: [],
 			});
 			resume();
 		}
@@ -72,7 +184,9 @@ export function Timeline() {
 
 	if (
 		!project.timeline?.zoomSegments ||
-		project.timeline.zoomSegments.length < 1
+		project.timeline.zoomSegments.length < 1 ||
+		!project.timeline?.maskSegments ||
+		!project.timeline?.textSegments
 	) {
 		setProject(
 			produce((project) => {
@@ -85,19 +199,30 @@ export function Timeline() {
 						},
 					],
 					zoomSegments: [],
+					sceneSegments: [],
+					maskSegments: [],
+					textSegments: [],
 				};
+				project.timeline.sceneSegments ??= [];
+				project.timeline.maskSegments ??= [];
+				project.timeline.textSegments ??= [];
+				project.timeline.zoomSegments ??= [];
 			}),
 		);
 	}
 
 	let zoomSegmentDragState = { type: "idle" } as ZoomSegmentDragState;
 	let sceneSegmentDragState = { type: "idle" } as SceneSegmentDragState;
+	let maskSegmentDragState = { type: "idle" } as MaskSegmentDragState;
+	let textSegmentDragState = { type: "idle" } as TextSegmentDragState;
 
 	async function handleUpdatePlayhead(e: MouseEvent) {
 		const { left } = timelineBounds;
 		if (
 			zoomSegmentDragState.type !== "moving" &&
-			sceneSegmentDragState.type !== "moving"
+			sceneSegmentDragState.type !== "moving" &&
+			maskSegmentDragState.type !== "moving" &&
+			textSegmentDragState.type !== "moving"
 		) {
 			// Guard against missing bounds and clamp computed time to [0, totalDuration()]
 			if (left == null) return;
@@ -120,7 +245,7 @@ export function Timeline() {
 						return;
 					}
 
-					await commands.startPlayback(FPS, OUTPUT_SIZE);
+					await commands.startPlayback(FPS, previewResolutionBase());
 					setEditorState("playing", true);
 				} catch (err) {
 					console.error("Failed to seek during playback:", err);
@@ -134,12 +259,23 @@ export function Timeline() {
 	createEventListener(window, "keydown", (e) => {
 		const hasNoModifiers = !e.shiftKey && !e.ctrlKey && !e.metaKey && !e.altKey;
 
+		if (
+			document.activeElement instanceof HTMLInputElement ||
+			document.activeElement instanceof HTMLTextAreaElement
+		) {
+			return;
+		}
+
 		if (e.code === "Backspace" || (e.code === "Delete" && hasNoModifiers)) {
 			const selection = editorState.timeline.selection;
 			if (!selection) return;
 
 			if (selection.type === "zoom") {
 				projectActions.deleteZoomSegments(selection.indices);
+			} else if (selection.type === "mask") {
+				projectActions.deleteMaskSegments(selection.indices);
+			} else if (selection.type === "text") {
+				projectActions.deleteTextSegments(selection.indices);
 			} else if (selection.type === "clip") {
 				// Delete all selected clips in reverse order
 				[...selection.indices]
@@ -169,6 +305,50 @@ export function Timeline() {
 
 	const split = () => editorState.timeline.interactMode === "split";
 
+	const maskImage = () => {
+		const pos = transform().position;
+		const zoom = transform().zoom;
+		const total = totalDuration();
+		const secPerPx = secsPerPixel();
+
+		const FADE_WIDTH = 32;
+		const FADE_RAMP_PX = 50;
+		const LEFT_OFFSET = TIMELINE_PADDING + TRACK_GUTTER;
+		const RIGHT_PADDING = TIMELINE_PADDING;
+
+		// Calculate alpha for left fade (0 = fully faded, 1 = no fade)
+		// When pos is 0, we are at start -> no fade needed -> strength 0
+		// When pos increases, we want fade to appear -> strength 1
+		const scrollLeftPx = pos / secPerPx;
+		const leftFadeStrength = Math.min(1, scrollLeftPx / FADE_RAMP_PX);
+
+		// Calculate alpha for right fade
+		// When at end, right scroll is 0 -> no fade -> strength 0
+		const scrollRightPx = (total - (pos + zoom)) / secPerPx;
+		const rightFadeStrength = Math.min(1, scrollRightPx / FADE_RAMP_PX);
+
+		const leftStartColor = `rgba(0, 0, 0, ${1 - leftFadeStrength})`;
+		const rightEndColor = `rgba(0, 0, 0, ${1 - rightFadeStrength})`;
+
+		// Left stops:
+		// 0px to LEFT_OFFSET: Always black (icons area)
+		// LEFT_OFFSET: Starts fading. If strength is 0 (start), it's black. If strength is 1, it's transparent.
+		// LEFT_OFFSET + FADE_WIDTH: Always black (content fully visible)
+		const leftStops = `black 0px, black ${LEFT_OFFSET}px, ${leftStartColor} ${LEFT_OFFSET}px, black ${
+			LEFT_OFFSET + FADE_WIDTH
+		}px`;
+
+		// Right stops:
+		// calc(100% - (RIGHT_PADDING + FADE_WIDTH)): Always black (content fully visible)
+		// calc(100% - RIGHT_PADDING): Ends fading. If strength is 0 (end), it's black. If strength is 1, it's transparent.
+		// 100%: Transparent
+		const rightStops = `black calc(100% - ${
+			RIGHT_PADDING + FADE_WIDTH
+		}px), ${rightEndColor} calc(100% - ${RIGHT_PADDING}px), transparent 100%`;
+
+		return `linear-gradient(to right, ${leftStops}, ${rightStops})`;
+	};
+
 	return (
 		<TimelineContextProvider
 			duration={duration()}
@@ -176,10 +356,13 @@ export function Timeline() {
 			timelineBounds={timelineBounds}
 		>
 			<div
-				class="pt-[2rem] relative overflow-hidden flex flex-col gap-2"
+				class="pt-[2rem] relative overflow-hidden flex flex-col gap-2 h-full"
 				style={{
 					"padding-left": `${TIMELINE_PADDING}px`,
 					"padding-right": `${TIMELINE_PADDING}px`,
+					"mask-image": maskImage(),
+					"-webkit-mask-image": maskImage(),
+					"--track-height": trackHeight(),
 				}}
 				onMouseDown={(e) => {
 					createRoot((dispose) => {
@@ -195,11 +378,17 @@ export function Timeline() {
 					});
 				}}
 				onMouseMove={(e) => {
-					const { left } = timelineBounds;
+					const { left, width } = timelineBounds;
 					if (editorState.playing) return;
+					if (left == null || !width || width <= 0) return;
+					const offsetX = e.clientX - left;
+					if (offsetX < 0 || offsetX > width) {
+						setEditorState("previewTime", null);
+						return;
+					}
 					setEditorState(
 						"previewTime",
-						transform().position + secsPerPixel() * (e.clientX - left!),
+						transform().position + secsPerPixel() * offsetX,
 					);
 				}}
 				onMouseEnter={() => setEditorState("timeline", "hoveredTrack", null)}
@@ -243,69 +432,133 @@ export function Timeline() {
 					}
 				}}
 			>
-				<TimelineMarkings />
-				<Show when={!editorState.playing && editorState.previewTime}>
-					{(time) => (
-						<div
-							class={cx(
-								"flex absolute bottom-0 top-4 left-5 z-10 justify-center items-center w-px pointer-events-none bg-gradient-to-b to-[120%]",
-								split() ? "from-red-300" : "from-gray-400",
-							)}
-							style={{
-								left: `${TIMELINE_PADDING}px`,
-								transform: `translateX(${
-									(time() - transform().position) / secsPerPixel() - 0.5
-								}px)`,
-							}}
-						>
+				<div class="relative" style={{ height: `${TIMELINE_HEADER_HEIGHT}px` }}>
+					<div class="absolute inset-0 flex items-end">
+						<TimelineMarkings />
+					</div>
+					<div class="absolute bottom-0">
+						<Tooltip content="Add track">
+							<TrackManager
+								options={trackOptions()}
+								onToggle={handleToggleTrack}
+							/>
+						</Tooltip>
+					</div>
+				</div>
+				<div class="relative flex-1 min-h-0">
+					<Show when={!editorState.playing && editorState.previewTime}>
+						{(time) => (
 							<div
 								class={cx(
-									"absolute -top-2 rounded-full size-3",
-									split() ? "bg-red-300" : "bg-gray-10",
+									"flex absolute bottom-0 z-10 justify-center items-center w-px pointer-events-none bg-gradient-to-b to-[120%]",
+									split() ? "from-red-300" : "from-gray-400",
 								)}
-							/>
+								style={{
+									left: `${TRACK_GUTTER}px`,
+									transform: `translateX(${
+										(time() - transform().position) / secsPerPixel() - 0.5
+									}px)`,
+									top: "0px",
+								}}
+							>
+								<div
+									class={cx(
+										"absolute -top-2 rounded-full size-3 -ml-[calc(0.37rem-0.5px)]",
+										split() ? "bg-red-300" : "bg-gray-10",
+									)}
+								/>
+							</div>
+						)}
+					</Show>
+					<div
+						class={cx(
+							"absolute bottom-0 h-full rounded-full z-10 w-px pointer-events-none bg-gradient-to-b to-[120%] from-[rgb(226,64,64)]",
+							split() && "opacity-50",
+						)}
+						style={{
+							left: `${TRACK_GUTTER}px`,
+							transform: `translateX(${Math.min(
+								(editorState.playbackTime - transform().position) /
+									secsPerPixel(),
+								timelineBounds.width ?? 0,
+							)}px)`,
+							top: "0px",
+						}}
+					>
+						<div class="size-3 bg-[rgb(226,64,64)] rounded-full -mt-2 -ml-[calc(0.37rem-0.5px)]" />
+					</div>
+					<div
+						class="absolute inset-0 overflow-y-auto overflow-x-hidden pr-1"
+						onWheel={(e) => {
+							if (!e.ctrlKey && Math.abs(e.deltaY) > Math.abs(e.deltaX)) {
+								e.stopPropagation();
+							}
+						}}
+					>
+						<div class="flex flex-col gap-2 min-h-full">
+							<TrackRow icon={trackIcons.clip}>
+								<ClipTrack
+									ref={setTimelineRef}
+									handleUpdatePlayhead={handleUpdatePlayhead}
+								/>
+							</TrackRow>
+							<Show when={trackState().text}>
+								<TrackRow icon={trackIcons.text}>
+									<TextTrack
+										onDragStateChanged={(v) => {
+											textSegmentDragState = v;
+										}}
+										handleUpdatePlayhead={handleUpdatePlayhead}
+									/>
+								</TrackRow>
+							</Show>
+							<Show when={trackState().mask}>
+								<TrackRow icon={trackIcons.mask}>
+									<MaskTrack
+										onDragStateChanged={(v) => {
+											maskSegmentDragState = v;
+										}}
+										handleUpdatePlayhead={handleUpdatePlayhead}
+									/>
+								</TrackRow>
+							</Show>
+							<TrackRow icon={trackIcons.zoom}>
+								<ZoomTrack
+									onDragStateChanged={(v) => {
+										zoomSegmentDragState = v;
+									}}
+									handleUpdatePlayhead={handleUpdatePlayhead}
+								/>
+							</TrackRow>
+							<Show when={sceneTrackVisible()}>
+								<TrackRow icon={trackIcons.scene}>
+									<SceneTrack
+										onDragStateChanged={(v) => {
+											sceneSegmentDragState = v;
+										}}
+										handleUpdatePlayhead={handleUpdatePlayhead}
+									/>
+								</TrackRow>
+							</Show>
 						</div>
-					)}
-				</Show>
-				<div
-					class={cx(
-						"absolute bottom-0 top-4 h-full rounded-full z-10 w-px pointer-events-none bg-gradient-to-b to-[120%] from-[rgb(226,64,64)]",
-						split() && "opacity-50",
-					)}
-					style={{
-						left: `${TIMELINE_PADDING}px`,
-						transform: `translateX(${Math.min(
-							(editorState.playbackTime - transform().position) /
-								secsPerPixel(),
-							timelineBounds.width ?? 0,
-						)}px)`,
-					}}
-				>
-					<div class="size-3 bg-[rgb(226,64,64)] rounded-full -mt-2 -ml-[calc(0.37rem-0.5px)]" />
+					</div>
 				</div>
-				<ClipTrack
-					ref={setTimelineRef}
-					handleUpdatePlayhead={handleUpdatePlayhead}
-				/>
-				<ZoomTrack
-					onDragStateChanged={(v) => {
-						zoomSegmentDragState = v;
-					}}
-					handleUpdatePlayhead={handleUpdatePlayhead}
-				/>
-				<Show when={meta().hasCamera && !project.camera.hide}>
-					<SceneTrack
-						onDragStateChanged={(v) => {
-							sceneSegmentDragState = v;
-						}}
-						handleUpdatePlayhead={handleUpdatePlayhead}
-					/>
-				</Show>
 			</div>
 		</TimelineContextProvider>
 	);
 }
 
+function TrackRow(props: { icon: JSX.Element; children: JSX.Element }) {
+	return (
+		<div class="flex items-stretch gap-2">
+			<TrackIcon icon={props.icon} />
+			<div class="flex-1 relative overflow-hidden min-w-0">
+				{props.children}
+			</div>
+		</div>
+	);
+}
+
 function TimelineMarkings() {
 	const { editorState } = useEditorContext();
 	const { secsPerPixel, markingResolution } = useTimelineContext();
@@ -321,7 +574,10 @@ function TimelineMarkings() {
 	};
 
 	return (
-		<div class="relative h-4 text-xs text-gray-9">
+		<div
+			class="relative flex-1 h-4 text-xs text-gray-9"
+			style={{ "margin-left": `${TRACK_GUTTER}px` }}
+		>
 			<For each={timelineMarkings()}>
 				{(second) => (
 					<Show when={second > 0}>
diff --git a/apps/desktop/src/routes/editor/Timeline/styles.css b/apps/desktop/src/routes/editor/Timeline/styles.css
index fcf1312df8..12084fda39 100644
--- a/apps/desktop/src/routes/editor/Timeline/styles.css
+++ b/apps/desktop/src/routes/editor/Timeline/styles.css
@@ -1,18 +1,3 @@
-@keyframes wobble {
-	0%,
-	100% {
-		transform: translateY(0) translateX(var(--segment-x));
-	}
-	50% {
-		transform: translateY(-3px) translateX(var(--segment-x));
-	}
-}
-
-.wobble-wrapper {
-	animation: wobble 1s ease-in-out infinite;
-	will-change: transform;
-}
-
 .timeline-scissors-cursor {
 	cursor:
 		url("data:image/svg+xml,%3Csvg width='24' height='24' viewBox='0 0 20 20' fill='none' xmlns='http://www.w3.org/2000/svg' shape-rendering='geometricPrecision'%3E%3Cg transform='rotate(90 10 10)'%3E%3Cpath fill-rule='evenodd' clip-rule='evenodd' d='M5 2.5C3.15905 2.5 1.66666 3.99238 1.66666 5.83333C1.66666 7.67428 3.15905 9.16667 5 9.16667C5.85421 9.16667 6.63337 8.84536 7.22322 8.317L9.74771 10L7.22322 11.683C6.63337 11.1546 5.85421 10.8333 5 10.8333C3.15905 10.8333 1.66666 12.3257 1.66666 14.1667C1.66666 16.0076 3.15905 17.5 5 17.5C6.84095 17.5 8.33333 16.0076 8.33333 14.1667C8.33333 13.7822 8.26824 13.4129 8.14846 13.0692L18.6556 6.06446C18.1451 5.29858 17.1103 5.09162 16.3444 5.60221L11.25 8.99846L8.14846 6.93075C8.26824 6.5871 8.33333 6.21782 8.33333 5.83333C8.33333 3.99238 6.84095 2.5 5 2.5ZM3.33333 5.83333C3.33333 4.91286 4.07952 4.16667 5 4.16667C5.92047 4.16667 6.66666 4.91286 6.66666 5.83333C6.66666 6.75381 5.92047 7.5 5 7.5C4.07952 7.5 3.33333 6.75381 3.33333 5.83333ZM3.33333 14.1667C3.33333 13.2462 4.07952 12.5 5 12.5C5.92047 12.5 6.66666 13.2462 6.66666 14.1667C6.66666 15.0871 5.92047 15.8333 5 15.8333C4.07952 15.8333 3.33333 15.0871 3.33333 14.1667Z' fill='%23ffffff' stroke='%23000000' stroke-width='0.6'/%3E%3Cpath d='M16.3444 14.3978L12.0012 11.5023L13.5035 10.5008L18.6556 13.9355C18.1451 14.7014 17.1103 14.9084 16.3444 14.3978Z' fill='%23ffffff' stroke='%23000000' stroke-width='0.6'/%3E%3C/g%3E%3C/svg%3E")
diff --git a/apps/desktop/src/routes/editor/context.ts b/apps/desktop/src/routes/editor/context.ts
index efc0d97d91..088fab417c 100644
--- a/apps/desktop/src/routes/editor/context.ts
+++ b/apps/desktop/src/routes/editor/context.ts
@@ -15,6 +15,7 @@ import {
 	createSignal,
 	on,
 	onCleanup,
+	untrack,
 } from "solid-js";
 import { createStore, produce, reconcile, unwrap } from "solid-js/store";
 
@@ -28,10 +29,16 @@ import {
 	type MultipleSegments,
 	type ProjectConfiguration,
 	type RecordingMeta,
+	type SceneSegment,
 	type SerializedEditorInstance,
 	type SingleSegment,
+	type TimelineConfiguration,
+	type TimelineSegment,
 	type XY,
+	type ZoomSegment,
 } from "~/utils/tauri";
+import type { MaskSegment } from "./masks";
+import type { TextSegment } from "./text";
 import { createProgressBar } from "./utils";
 
 export type CurrentDialog =
@@ -50,6 +57,25 @@ export const OUTPUT_SIZE = {
 	y: 1080,
 };
 
+export type PreviewQuality = "half" | "full";
+
+export const DEFAULT_PREVIEW_QUALITY: PreviewQuality = "full";
+
+const previewQualityScale: Record<PreviewQuality, number> = {
+	full: 1,
+	half: 0.5,
+};
+
+export const getPreviewResolution = (quality: PreviewQuality): XY<number> => {
+	const scale = previewQualityScale[quality];
+	const width = (Math.max(2, Math.round(OUTPUT_SIZE.x * scale)) + 1) & ~1;
+	const height = (Math.max(2, Math.round(OUTPUT_SIZE.y * scale)) + 1) & ~1;
+
+	return { x: width, y: height };
+};
+
+export type TimelineTrackType = "clip" | "text" | "zoom" | "scene" | "mask";
+
 export const MAX_ZOOM_IN = 3;
 const PROJECT_SAVE_DEBOUNCE_MS = 250;
 
@@ -66,12 +92,23 @@ export type CornerRoundingType = "rounded" | "squircle";
 
 type WithCornerStyle<T> = T & { roundingType: CornerRoundingType };
 
+type EditorTimelineConfiguration = Omit<
+	TimelineConfiguration,
+	"sceneSegments" | "maskSegments"
+> & {
+	sceneSegments?: SceneSegment[];
+	maskSegments: MaskSegment[];
+	textSegments: TextSegment[];
+};
+
 export type EditorProjectConfiguration = Omit<
 	ProjectConfiguration,
-	"background" | "camera"
+	"background" | "camera" | "timeline"
 > & {
 	background: WithCornerStyle<ProjectConfiguration["background"]>;
 	camera: WithCornerStyle<ProjectConfiguration["camera"]>;
+	timeline?: EditorTimelineConfiguration | null;
+	hiddenTextSegments?: number[];
 };
 
 function withCornerDefaults<
@@ -90,8 +127,28 @@ function withCornerDefaults<
 export function normalizeProject(
 	config: ProjectConfiguration,
 ): EditorProjectConfiguration {
+	const timeline = config.timeline
+		? {
+				...config.timeline,
+				sceneSegments: config.timeline.sceneSegments ?? [],
+				maskSegments:
+					(
+						config.timeline as TimelineConfiguration & {
+							maskSegments?: MaskSegment[];
+						}
+					).maskSegments ?? [],
+				textSegments:
+					(
+						config.timeline as TimelineConfiguration & {
+							textSegments?: TextSegment[];
+						}
+					).textSegments ?? [],
+			}
+		: undefined;
+
 	return {
 		...config,
+		timeline,
 		background: withCornerDefaults(config.background),
 		camera: withCornerDefaults(config.camera),
 	};
@@ -105,8 +162,17 @@ export function serializeProjectConfiguration(
 		background;
 	const { roundingType: cameraRoundingType, ...cameraRest } = camera;
 
+	const timeline = project.timeline
+		? {
+				...project.timeline,
+				maskSegments: project.timeline.maskSegments ?? [],
+				textSegments: project.timeline.textSegments ?? [],
+			}
+		: project.timeline;
+
 	return {
 		...rest,
+		timeline: timeline as unknown as ProjectConfiguration["timeline"],
 		background: {
 			...backgroundRest,
 			roundingType: backgroundRoundingType,
@@ -223,6 +289,84 @@ export const [EditorContextProvider, useEditorContext] = createContextProvider(
 					setEditorState("timeline", "selection", null);
 				});
 			},
+			splitMaskSegment: (index: number, time: number) => {
+				setProject(
+					"timeline",
+					"maskSegments",
+					produce((segments) => {
+						const segment = segments?.[index];
+						if (!segment) return;
+
+						const duration = segment.end - segment.start;
+						const remaining = duration - time;
+						if (time < 1 || remaining < 1) return;
+
+						segments.splice(index + 1, 0, {
+							...segment,
+							start: segment.start + time,
+							end: segment.end,
+						});
+						segments[index].end = segment.start + time;
+					}),
+				);
+			},
+			deleteMaskSegments: (segmentIndices: number[]) => {
+				batch(() => {
+					setProject(
+						"timeline",
+						"maskSegments",
+						produce((segments) => {
+							if (!segments) return;
+							const sorted = [...new Set(segmentIndices)]
+								.filter(
+									(i) => Number.isInteger(i) && i >= 0 && i < segments.length,
+								)
+								.sort((a, b) => b - a);
+							for (const i of sorted) segments.splice(i, 1);
+						}),
+					);
+					setEditorState("timeline", "selection", null);
+				});
+			},
+			splitTextSegment: (index: number, time: number) => {
+				setProject(
+					"timeline",
+					"textSegments",
+					produce((segments) => {
+						const segment = segments?.[index];
+						if (!segment) return;
+
+						const duration = segment.end - segment.start;
+						const remaining = duration - time;
+						if (time < 1 || remaining < 1) return;
+
+						segments.splice(index + 1, 0, {
+							...segment,
+							start: segment.start + time,
+							end: segment.end,
+						});
+						segments[index].end = segment.start + time;
+					}),
+				);
+			},
+			deleteTextSegments: (segmentIndices: number[]) => {
+				batch(() => {
+					setProject(
+						"timeline",
+						"textSegments",
+						produce((segments) => {
+							if (!segments) return;
+							const sorted = [...new Set(segmentIndices)]
+								.filter(
+									(i) => Number.isInteger(i) && i >= 0 && i < segments.length,
+								)
+								.sort((a, b) => b - a);
+							for (const i of sorted) segments.splice(i, 1);
+						}),
+					);
+					setEditorState("timeline", "selection", null);
+				});
+			},
 			splitSceneSegment: (index: number, time: number) => {
 				setProject(
 					"timeline",
@@ -290,6 +434,16 @@ export const [EditorContextProvider, useEditorContext] = createContextProvider(
 							zoomSegment.end += diff(zoomSegment.end);
 						}
 
+						for (const maskSegment of timeline.maskSegments) {
+							maskSegment.start += diff(maskSegment.start);
+							maskSegment.end += diff(maskSegment.end);
+						}
+
+						for (const textSegment of timeline.textSegments) {
+							textSegment.start += diff(textSegment.start);
+							textSegment.end += diff(textSegment.end);
+						}
+
 						segment.timescale = timescale;
 					}),
 				);
@@ -313,7 +467,8 @@ export const [EditorContextProvider, useEditorContext] = createContextProvider(
 			shouldResave = false;
 			hasPendingProjectSave = false;
 			try {
-				await commands.setProjectConfig(serializeProjectConfiguration(project));
+				const config = serializeProjectConfiguration(project);
+				await commands.setProjectConfig(config);
 			} catch (error) {
 				console.error("Failed to persist project config", error);
 			} finally {
@@ -356,6 +511,12 @@ export const [EditorContextProvider, useEditorContext] = createContextProvider(
 			),
 		);
 
+		const [previewQuality, setPreviewQuality] = createSignal<PreviewQuality>(
+			DEFAULT_PREVIEW_QUALITY,
+		);
+
+		const previewResolutionBase = () => getPreviewResolution(previewQuality());
+
 		const [dialog, setDialog] = createSignal<DialogState>({
 			open: false,
 		});
@@ -430,6 +591,11 @@ export const [EditorContextProvider, useEditorContext] = createContextProvider(
 			};
 		}
 
+		const initialMaskTrackEnabled =
+			(project.timeline?.maskSegments?.length ?? 0) > 0;
+		const initialTextTrackEnabled =
+			(project.timeline?.textSegments?.length ?? 0) > 0;
+
 		const [editorState, setEditorState] = createStore({
 			previewTime: null as number | null,
 			playbackTime: 0,
@@ -440,7 +606,9 @@ export const [EditorContextProvider, useEditorContext] = createContextProvider(
 					| null
 					| { type: "zoom"; indices: number[] }
 					| { type: "clip"; indices: number[] }
-					| { type: "scene"; indices: number[] },
+					| { type: "scene"; indices: number[] }
+					| { type: "mask"; indices: number[] }
+					| { type: "text"; indices: number[] },
 				transform: {
 					// visible seconds
 					zoom: zoomOutLimit(),
@@ -477,7 +645,14 @@ export const [EditorContextProvider, useEditorContext] = createContextProvider(
 						);
 					},
 				},
-				hoveredTrack: null as null | "clip" | "zoom" | "scene",
+				tracks: {
+					clip: true,
+					zoom: true,
+					scene: true,
+					mask: initialMaskTrackEnabled,
+					text: initialTextTrackEnabled,
+				},
+				hoveredTrack: null as null | TimelineTrackType,
 			},
 		});
 
@@ -509,6 +684,9 @@ export const [EditorContextProvider, useEditorContext] = createContextProvider(
 			setExportState,
 			micWaveforms,
 			systemAudioWaveforms,
+			previewQuality,
+			setPreviewQuality,
+			previewResolutionBase,
 		};
 	},
 	// biome-ignore lint/style/noNonNullAssertion: it's ok
@@ -577,7 +755,7 @@ export const [EditorInstanceContextProvider, useEditorInstanceContext] =
 					events.renderFrameEvent.emit({
 						frame_number: Math.floor(0),
 						fps: FPS,
-						resolution_base: OUTPUT_SIZE,
+						resolution_base: getPreviewResolution(DEFAULT_PREVIEW_QUALITY),
 					});
 				}
 			});
diff --git a/apps/desktop/src/routes/editor/masks.ts b/apps/desktop/src/routes/editor/masks.ts
new file mode 100644
index 0000000000..8b7935151b
--- /dev/null
+++ b/apps/desktop/src/routes/editor/masks.ts
@@ -0,0 +1,117 @@
+import type { XY } from "~/utils/tauri";
+
+export type MaskKind = "sensitive" | "highlight";
+
+export type MaskScalarKeyframe = {
+	time: number;
+	value: number;
+};
+
+export type MaskVectorKeyframe = {
+	time: number;
+	x: number;
+	y: number;
+};
+
+export type MaskKeyframes = {
+	position: MaskVectorKeyframe[];
+	size: MaskVectorKeyframe[];
+	intensity: MaskScalarKeyframe[];
+};
+
+export type MaskSegment = {
+	start: number;
+	end: number;
+	enabled: boolean;
+	maskType: MaskKind;
+	center: XY<number>;
+	size: XY<number>;
+	feather: number;
+	opacity: number;
+	pixelation: number;
+	darkness: number;
+	keyframes: MaskKeyframes;
+};
+
+export type MaskState = {
+	position: XY<number>;
+	size: XY<number>;
+	intensity: number;
+};
+
+export const defaultMaskSegment = (
+	start: number,
+	end: number,
+): MaskSegment => ({
+	start,
+	end,
+	enabled: true,
+	maskType: "sensitive",
+	center: { x: 0.5, y: 0.5 },
+	size: { x: 0.35, y: 0.35 },
+	feather: 0.1,
+	opacity: 1,
+	pixelation: 18,
+	darkness: 0.5,
+	keyframes: { position: [], size: [], intensity: [] },
+});
+
+export const evaluateMask = (
+	segment: MaskSegment,
+	_time?: number,
+): MaskState => {
+	const position = {
+		x: Math.min(Math.max(segment.center.x, 0), 1),
+		y: Math.min(Math.max(segment.center.y, 0), 1),
+	};
+	const size = {
+		x: Math.min(Math.max(segment.size.x, 0.01), 2),
+		y: Math.min(Math.max(segment.size.y, 0.01), 2),
+	};
+	const intensity = Math.min(Math.max(segment.opacity, 0), 1);
+
+	return { position, size, intensity };
+};
+
+const sortByTime = <T extends { time: number }>(items: T[]) =>
+	[...items].sort((a, b) => a.time - b.time);
+
+const timeMatch = (a: number, b: number) => Math.abs(a - b) < 1e-3;
+
+export const upsertVectorKeyframe = (
+	keyframes: MaskVectorKeyframe[],
+	time: number,
+	value: XY<number>,
+) => {
+	const existingIndex = keyframes.findIndex((k) => timeMatch(k.time, time));
+	if (existingIndex >= 0) {
+		const next = [...keyframes];
+		next[existingIndex] = {
+			...next[existingIndex],
+			time,
+			x: value.x,
+			y: value.y,
+		};
+		return sortByTime(next);
+	}
+	return sortByTime([...keyframes, { time, x: value.x, y: value.y }]);
+};
+
+export const upsertScalarKeyframe = (
+	keyframes: MaskScalarKeyframe[],
+	time: number,
+	value: number,
+) => {
+	const existingIndex = keyframes.findIndex((k) => timeMatch(k.time, time));
+	if (existingIndex >= 0) {
+		const next = [...keyframes];
+		next[existingIndex] = { ...next[existingIndex], time, value };
+		return sortByTime(next);
+	}
+	return sortByTime([...keyframes, { time, value }]);
+};
+
+export const removeKeyframeAt = <T extends { time: number }>(
+	items: T[],
+	time: number,
+) => items.filter((k) => !timeMatch(k.time, time));
diff --git a/apps/desktop/src/routes/editor/text.ts b/apps/desktop/src/routes/editor/text.ts
new file mode 100644
index 0000000000..d9cae709cd
--- /dev/null
+++ b/apps/desktop/src/routes/editor/text.ts
@@ -0,0 +1,32 @@
+import type { XY } from "~/utils/tauri";
+
+export type TextSegment = {
+	start: number;
+	end: number;
+	enabled: boolean;
+	content: string;
+	center: XY<number>;
+	size: XY<number>;
+	fontFamily: string;
+	fontSize: number;
+	fontWeight: number;
+	italic: boolean;
+	color: string;
+};
+
+export const defaultTextSegment = (
+	start: number,
+	end: number,
+): TextSegment => ({
+	start,
+	end,
+	enabled: true,
+	content: "Text",
+	center: { x: 0.5, y: 0.5 },
+	size: { x: 0.35, y: 0.2 },
+	fontFamily: "sans-serif",
+	fontSize: 48,
+	fontWeight: 700,
+	italic: false,
+	color: "#ffffff",
+});
diff --git a/apps/desktop/src/routes/target-select-overlay.tsx b/apps/desktop/src/routes/target-select-overlay.tsx
index 1d9455e4fc..a9dfab5645 100644
--- a/apps/desktop/src/routes/target-select-overlay.tsx
+++ b/apps/desktop/src/routes/target-select-overlay.tsx
@@ -414,6 +414,13 @@ function Inner() {
 						if (win) setCameraWindow(win);
 					});
 
+					const areaDisplayInfo = useQuery(() => ({
+						queryKey: ["areaDisplayInfo", displayId()],
+						queryFn: async () => {
+							return await commands.displayInformation(displayId());
+						},
+					}));
+
 					const [aspect, setAspect] = createSignal<Ratio | null>(null);
 					const [snapToRatioEnabled, setSnapToRatioEnabled] =
 						createSignal(true);
@@ -698,21 +705,36 @@ function Inner() {
 
 						if (was && !interacting) {
 							if (options.mode === "screenshot" && isValid()) {
+								const cropBounds = crop();
+								const displayInfo = areaDisplayInfo.data;
+								console.log("[Screenshot Debug] crop bounds:", cropBounds);
+								console.log("[Screenshot Debug] display info:", displayInfo);
+								console.log(
+									"[Screenshot Debug] window.innerWidth/Height:",
+									window.innerWidth,
+									window.innerHeight,
+								);
+
 								const target: ScreenCaptureTarget = {
 									variant: "area",
 									screen: displayId(),
 									bounds: {
 										position: {
-											x: crop().x,
-											y: crop().y,
+											x: cropBounds.x,
+											y: cropBounds.y,
 										},
 										size: {
-											width: crop().width,
-											height: crop().height,
+											width: cropBounds.width,
+											height: cropBounds.height,
 										},
 									},
 								};
 
+								console.log(
+									"[Screenshot Debug] target being sent:",
+									JSON.stringify(target, null, 2),
+								);
+
 								try {
 									const path = await invoke<string>("take_screenshot", {
 										target,
diff --git a/apps/desktop/src/store/captions.ts b/apps/desktop/src/store/captions.ts
index 14fa50ff65..f3854a92d8 100644
--- a/apps/desktop/src/store/captions.ts
+++ b/apps/desktop/src/store/captions.ts
@@ -1,28 +1,10 @@
 import { createEffect, createRoot } from "solid-js";
 import { createStore } from "solid-js/store";
-import { type CaptionSegment, commands } from "~/utils/tauri";
-
-// export type CaptionSegment = {
-//   id: string;
-//   start: number;
-//   end: number;
-//   text: string;
-// };
-
-export type CaptionSettings = {
-	enabled: boolean;
-	font: string;
-	size: number;
-	color: string;
-	backgroundColor: string;
-	backgroundOpacity: number;
-	position: string;
-	bold: boolean;
-	italic: boolean;
-	outline: boolean;
-	outlineColor: string;
-	exportWithSubtitles: boolean;
-};
+import {
+	type CaptionSegment,
+	type CaptionSettings,
+	commands,
+} from "~/utils/tauri";
 
 export type CaptionsState = {
 	segments: CaptionSegment[];
@@ -30,23 +12,27 @@ export type CaptionsState = {
 	currentCaption: string | null;
 };
 
+export const defaultCaptionSettings: CaptionSettings = {
+	enabled: false,
+	font: "System Sans-Serif",
+	size: 45,
+	color: "#A0A0A0",
+	backgroundColor: "#000000",
+	backgroundOpacity: 95,
+	position: "bottom-center",
+	bold: false,
+	italic: false,
+	outline: true,
+	outlineColor: "#000000",
+	exportWithSubtitles: false,
+	highlightColor: "#FFFFFF",
+	fadeDuration: 0.2,
+};
+
 function createCaptionsStore() {
 	const [state, setState] = createStore<CaptionsState>({
 		segments: [],
-		settings: {
-			enabled: false,
-			font: "System Sans-Serif",
-			size: 24,
-			color: "#FFFFFF",
-			backgroundColor: "#000000",
-			backgroundOpacity: 80,
-			position: "bottom",
-			bold: true,
-			italic: false,
-			outline: true,
-			outlineColor: "#000000",
-			exportWithSubtitles: false,
-		},
+		settings: { ...defaultCaptionSettings },
 		currentCaption: null,
 	});
 
@@ -103,23 +89,13 @@ function createCaptionsStore() {
 			try {
 				const captionsData = await commands.loadCaptions(videoPath);
 				if (captionsData) {
+					const loadedSettings = captionsData.settings
+						? { ...defaultCaptionSettings, ...captionsData.settings }
+						: { ...defaultCaptionSettings, enabled: true };
 					setState((prev) => ({
 						...prev,
 						segments: captionsData.segments,
-						settings: captionsData.settings || {
-							enabled: true,
-							font: "Arial",
-							size: 24,
-							color: "#FFFFFF",
-							backgroundColor: "#000000",
-							backgroundOpacity: 80,
-							position: "bottom",
-							bold: true,
-							italic: false,
-							outline: true,
-							outlineColor: "#000000",
-							exportWithSubtitles: false,
-						},
+						settings: loadedSettings,
 					}));
 				}
 
@@ -132,7 +108,10 @@ function createCaptionsStore() {
 						setState("segments", localCaptionsData.segments);
 					}
 					if (localCaptionsData.settings) {
-						setState("settings", localCaptionsData.settings);
+						setState("settings", {
+							...defaultCaptionSettings,
+							...localCaptionsData.settings,
+						});
 					}
 				} catch (e) {
 					console.error("Error loading saved captions from localStorage:", e);
@@ -160,6 +139,8 @@ function createCaptionsStore() {
 						outline: state.settings.outline,
 						outlineColor: state.settings.outlineColor,
 						exportWithSubtitles: state.settings.exportWithSubtitles,
+						highlightColor: state.settings.highlightColor,
+						fadeDuration: state.settings.fadeDuration,
 					},
 				};
 
diff --git a/apps/desktop/src/utils/export.ts b/apps/desktop/src/utils/export.ts
index fd73dbc00e..98e262c7d4 100644
--- a/apps/desktop/src/utils/export.ts
+++ b/apps/desktop/src/utils/export.ts
@@ -1,7 +1,7 @@
 import { Channel } from "@tauri-apps/api/core";
 import { commands, type ExportSettings, type FramesRendered } from "./tauri";
 
-export async function exportVideo(
+export function createExportTask(
 	projectPath: string,
 	settings: ExportSettings,
 	onProgress: (progress: FramesRendered) => void,
@@ -9,5 +9,28 @@ export async function exportVideo(
 	const progress = new Channel<FramesRendered>((e) => {
 		onProgress(e);
 	});
-	return await commands.exportVideo(projectPath, progress, settings);
+	let closed = false;
+	const cancel = () => {
+		if (closed) return;
+		closed = true;
+		const internals = (
+			globalThis as {
+				__TAURI_INTERNALS__?: { unregisterCallback?: (id: number) => void };
+			}
+		).__TAURI_INTERNALS__;
+		internals?.unregisterCallback?.(progress.id);
+	};
+	const promise = commands
+		.exportVideo(projectPath, progress, settings)
+		.finally(cancel);
+	return { promise, cancel };
+}
+
+export async function exportVideo(
+	projectPath: string,
+	settings: ExportSettings,
+	onProgress: (progress: FramesRendered) => void,
+) {
+	const { promise } = createExportTask(projectPath, settings, onProgress);
+	return await promise;
 }
diff --git a/apps/desktop/src/utils/tauri.ts b/apps/desktop/src/utils/tauri.ts
index e0e2b94359..5ddd89bf93 100644
--- a/apps/desktop/src/utils/tauri.ts
+++ b/apps/desktop/src/utils/tauri.ts
@@ -119,6 +119,9 @@ async setPlayheadPosition(frameNumber: number) : Promise<null> {
 async setProjectConfig(config: ProjectConfiguration) : Promise<null> {
     return await TAURI_INVOKE("set_project_config", { config });
 },
+async updateProjectConfigInMemory(config: ProjectConfiguration) : Promise<null> {
+    return await TAURI_INVOKE("update_project_config_in_memory", { config });
+},
 async generateZoomSegmentsFromClicks() : Promise<ZoomSegment[]> {
     return await TAURI_INVOKE("generate_zoom_segments_from_clicks");
 },
@@ -229,57 +232,30 @@ async setCameraPreviewState(state: CameraPreviewState) : Promise<null> {
 async awaitCameraPreviewReady() : Promise<boolean> {
     return await TAURI_INVOKE("await_camera_preview_ready");
 },
-/**
- * Function to handle creating directories for the model
- */
 async createDir(path: string, recursive: boolean) : Promise<null> {
     return await TAURI_INVOKE("create_dir", { path, recursive });
 },
-/**
- * Function to save the model file
- */
 async saveModelFile(path: string, data: number[]) : Promise<null> {
     return await TAURI_INVOKE("save_model_file", { path, data });
 },
-/**
- * Function to transcribe audio from a video file using Whisper
- */
 async transcribeAudio(videoPath: string, modelPath: string, language: string) : Promise<CaptionData> {
     return await TAURI_INVOKE("transcribe_audio", { videoPath, modelPath, language });
 },
-/**
- * Function to save caption data to a file
- */
 async saveCaptions(videoId: string, captions: CaptionData) : Promise<null> {
     return await TAURI_INVOKE("save_captions", { videoId, captions });
 },
-/**
- * Function to load caption data from a file
- */
 async loadCaptions(videoId: string) : Promise<CaptionData | null> {
     return await TAURI_INVOKE("load_captions", { videoId });
 },
-/**
- * Helper function to download a Whisper model from Hugging Face Hub
- */
 async downloadWhisperModel(modelName: string, outputPath: string) : Promise<null> {
     return await TAURI_INVOKE("download_whisper_model", { modelName, outputPath });
 },
-/**
- * Function to check if a model file exists
- */
 async checkModelExists(modelPath: string) : Promise<boolean> {
     return await TAURI_INVOKE("check_model_exists", { modelPath });
 },
-/**
- * Function to delete a downloaded model
- */
 async deleteWhisperModel(modelPath: string) : Promise<null> {
     return await TAURI_INVOKE("delete_whisper_model", { modelPath });
 },
-/**
- * Export captions to an SRT file
- */
 async exportCaptionsSrt(videoId: string) : Promise<string | null> {
     return await TAURI_INVOKE("export_captions_srt", { videoId });
 },
@@ -303,6 +279,9 @@ async focusWindow(windowId: WindowId) : Promise<null> {
 },
 async editorDeleteProject() : Promise<null> {
     return await TAURI_INVOKE("editor_delete_project");
+},
+async formatProjectName(template: string | null, targetName: string, targetKind: string, recordingMode: RecordingMode, datetime: string | null) : Promise<string> {
+    return await TAURI_INVOKE("format_project_name", { template, targetName, targetKind, recordingMode, datetime });
 }
 }
 
@@ -387,8 +366,9 @@ export type CameraShape = "square" | "source"
 export type CameraXPosition = "left" | "center" | "right"
 export type CameraYPosition = "top" | "bottom"
 export type CaptionData = { segments: CaptionSegment[]; settings: CaptionSettings | null }
-export type CaptionSegment = { id: string; start: number; end: number; text: string }
-export type CaptionSettings = { enabled: boolean; font: string; size: number; color: string; backgroundColor: string; backgroundOpacity: number; position: string; bold: boolean; italic: boolean; outline: boolean; outlineColor: string; exportWithSubtitles: boolean }
+export type CaptionSegment = { id: string; start: number; end: number; text: string; words?: CaptionWord[] }
+export type CaptionSettings = { enabled: boolean; font: string; size: number; color: string; backgroundColor: string; backgroundOpacity: number; position?: string; bold: boolean; italic: boolean; outline: boolean; outlineColor: string; exportWithSubtitles: boolean; highlightColor?: string; fadeDuration?: number }
+export type CaptionWord = { text: string; start: number; end: number }
 export type CaptionsData = { segments: CaptionSegment[]; settings: CaptionSettings }
 export type CaptureDisplay = { id: DisplayId; name: string; refresh_rate: number }
 export type CaptureDisplayWithThumbnail = { id: DisplayId; name: string; refresh_rate: number; thumbnail: string | null }
@@ -409,7 +389,7 @@ export type CursorType = "pointer" | "circle"
 export type Cursors = { [key in string]: string } | { [key in string]: CursorMeta }
 export type DeviceOrModelID = { DeviceID: string } | { ModelID: ModelIDType }
 export type DisplayId = string
-export type DisplayInformation = { name: string | null; physical_size: PhysicalSize | null; refresh_rate: string }
+export type DisplayInformation = { name: string | null; physical_size: PhysicalSize | null; logical_size: LogicalSize | null; refresh_rate: string }
 export type DownloadProgress = { progress: number; message: string }
 export type EditorStateChanged = { playhead_position: number }
 export type ExportCompression = "Minimal" | "Social" | "Web" | "Potato"
@@ -418,7 +398,7 @@ export type ExportSettings = ({ format: "Mp4" } & Mp4ExportSettings) | ({ format
 export type FileType = "recording" | "screenshot"
 export type Flags = { captions: boolean }
 export type FramesRendered = { renderedCount: number; totalFrames: number; type: "FramesRendered" }
-export type GeneralSettingsStore = { instanceId?: string; uploadIndividualFiles?: boolean; hideDockIcon?: boolean; autoCreateShareableLink?: boolean; enableNotifications?: boolean; disableAutoOpenLinks?: boolean; hasCompletedStartup?: boolean; theme?: AppTheme; commercialLicense?: CommercialLicense | null; lastVersion?: string | null; windowTransparency?: boolean; postStudioRecordingBehaviour?: PostStudioRecordingBehaviour; mainWindowRecordingStartBehaviour?: MainWindowRecordingStartBehaviour; custom_cursor_capture2?: boolean; serverUrl?: string; recordingCountdown?: number | null; enableNativeCameraPreview: boolean; autoZoomOnClicks?: boolean; enableNewRecordingFlow: boolean; recordingPickerPreferenceSet?: boolean; postDeletionBehaviour?: PostDeletionBehaviour; excludedWindows?: WindowExclusion[]; deleteInstantRecordingsAfterUpload?: boolean; instantModeMaxResolution?: number }
+export type GeneralSettingsStore = { instanceId?: string; uploadIndividualFiles?: boolean; hideDockIcon?: boolean; autoCreateShareableLink?: boolean; enableNotifications?: boolean; disableAutoOpenLinks?: boolean; hasCompletedStartup?: boolean; theme?: AppTheme; commercialLicense?: CommercialLicense | null; lastVersion?: string | null; windowTransparency?: boolean; postStudioRecordingBehaviour?: PostStudioRecordingBehaviour; mainWindowRecordingStartBehaviour?: MainWindowRecordingStartBehaviour; custom_cursor_capture2?: boolean; serverUrl?: string; recordingCountdown?: number | null; enableNativeCameraPreview: boolean; autoZoomOnClicks?: boolean; enableNewRecordingFlow: boolean; recordingPickerPreferenceSet?: boolean; postDeletionBehaviour?: PostDeletionBehaviour; excludedWindows?: WindowExclusion[]; deleteInstantRecordingsAfterUpload?: boolean; instantModeMaxResolution?: number; defaultProjectNameTemplate?: string | null }
 export type GifExportSettings = { fps: number; resolution_base: XY<number>; quality: GifQuality | null }
 export type GifQuality = { 
 /**
@@ -441,7 +421,12 @@ export type LogicalBounds = { position: LogicalPosition; size: LogicalSize }
 export type LogicalPosition = { x: number; y: number }
 export type LogicalSize = { width: number; height: number }
 export type MainWindowRecordingStartBehaviour = "close" | "minimise"
+export type MaskKeyframes = { position?: MaskVectorKeyframe[]; size?: MaskVectorKeyframe[]; intensity?: MaskScalarKeyframe[] }
+export type MaskKind = "sensitive" | "highlight"
+export type MaskScalarKeyframe = { time: number; value: number }
+export type MaskSegment = { start: number; end: number; enabled?: boolean; maskType: MaskKind; center: XY<number>; size: XY<number>; feather?: number; opacity?: number; pixelation?: number; darkness?: number; keyframes?: MaskKeyframes }
 export type MaskType = "blur" | "pixelate"
+export type MaskVectorKeyframe = { time: number; x: number; y: number }
 export type ModelIDType = string
 export type Mp4ExportSettings = { fps: number; resolution_base: XY<number>; compression: ExportCompression }
 export type MultipleSegment = { display: VideoMeta; camera?: VideoMeta | null; mic?: AudioMeta | null; system_audio?: AudioMeta | null; cursor?: string | null }
@@ -498,7 +483,8 @@ export type StereoMode = "stereo" | "monoL" | "monoR"
 export type StudioRecordingMeta = { segment: SingleSegment } | { inner: MultipleSegments }
 export type StudioRecordingStatus = { status: "InProgress" } | { status: "Failed"; error: string } | { status: "Complete" }
 export type TargetUnderCursor = { display_id: DisplayId | null; window: WindowUnderCursor | null }
-export type TimelineConfiguration = { segments: TimelineSegment[]; zoomSegments: ZoomSegment[]; sceneSegments?: SceneSegment[] }
+export type TextSegment = { start: number; end: number; enabled?: boolean; content?: string; center?: XY<number>; size?: XY<number>; fontFamily?: string; fontSize?: number; fontWeight?: number; italic?: boolean; color?: string }
+export type TimelineConfiguration = { segments: TimelineSegment[]; zoomSegments: ZoomSegment[]; sceneSegments?: SceneSegment[]; maskSegments?: MaskSegment[]; textSegments?: TextSegment[] }
 export type TimelineSegment = { recordingSegment?: number; timescale: number; start: number; end: number }
 export type UploadMeta = { state: "MultipartUpload"; video_id: string; file_path: string; pre_created_video: VideoUploadInfo; recording_dir: string } | { state: "SinglePartUpload"; video_id: string; recording_dir: string; file_path: string; screenshot_path: string } | { state: "Failed"; error: string } | { state: "Complete" }
 export type UploadMode = { Initial: { pre_created_video: VideoUploadInfo | null } } | "Reupload"
diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs
index e03acb64ab..93c73127be 100644
--- a/crates/editor/src/editor.rs
+++ b/crates/editor/src/editor.rs
@@ -5,10 +5,7 @@ use cap_rendering::{
     DecodedSegmentFrames, FrameRenderer, ProjectRecordingsMeta, ProjectUniforms,
     RenderVideoConstants, RenderedFrame, RendererLayers,
 };
-use tokio::{
-    sync::{mpsc, oneshot},
-    task::JoinHandle,
-};
+use tokio::sync::{mpsc, oneshot};
 
 #[allow(clippy::large_enum_variant)]
 pub enum RendererMessage {
@@ -17,6 +14,7 @@ pub enum RendererMessage {
         uniforms: ProjectUniforms,
         finished: oneshot::Sender<()>,
         cursor: Arc<CursorEvents>,
+        frame_number: u32,
     },
     Stop {
         finished: oneshot::Sender<()>,
@@ -73,48 +71,90 @@ impl Renderer {
     }
 
     async fn run(mut self) {
-        let mut frame_task: Option<JoinHandle<()>> = None;
-
         let mut frame_renderer = FrameRenderer::new(&self.render_constants);
 
         let mut layers =
             RendererLayers::new(&self.render_constants.device, &self.render_constants.queue);
 
+        struct PendingFrame {
+            segment_frames: DecodedSegmentFrames,
+            uniforms: ProjectUniforms,
+            finished: oneshot::Sender<()>,
+            cursor: Arc<CursorEvents>,
+            frame_number: u32,
+        }
+
+        let mut pending_frame: Option<PendingFrame> = None;
+
         loop {
-            while let Some(msg) = self.rx.recv().await {
+            let frame_to_render = if let Some(pending) = pending_frame.take() {
+                Some(pending)
+            } else {
+                match self.rx.recv().await {
+                    Some(RendererMessage::RenderFrame {
+                        segment_frames,
+                        uniforms,
+                        finished,
+                        cursor,
+                        frame_number,
+                    }) => Some(PendingFrame {
+                        segment_frames,
+                        uniforms,
+                        finished,
+                        cursor,
+                        frame_number,
+                    }),
+                    Some(RendererMessage::Stop { finished }) => {
+                        let _ = finished.send(());
+                        return;
+                    }
+                    None => return,
+                }
+            };
+
+            let Some(mut current) = frame_to_render else {
+                continue;
+            };
+
+            while let Ok(msg) = self.rx.try_recv() {
                 match msg {
                     RendererMessage::RenderFrame {
                         segment_frames,
                         uniforms,
                         finished,
                         cursor,
+                        frame_number,
                     } => {
-                        if let Some(task) = frame_task.as_ref() {
-                            if task.is_finished() {
-                                frame_task = None
-                            } else {
-                                continue;
-                            }
-                        }
-
-                        let frame = frame_renderer
-                            .render(segment_frames, uniforms, &cursor, &mut layers)
-                            .await
-                            .unwrap();
-
-                        (self.frame_cb)(frame);
-
-                        let _ = finished.send(());
+                        let _ = current.finished.send(());
+                        current = PendingFrame {
+                            segment_frames,
+                            uniforms,
+                            finished,
+                            cursor,
+                            frame_number,
+                        };
                     }
                     RendererMessage::Stop { finished } => {
-                        if let Some(task) = frame_task.take() {
-                            task.abort();
-                        }
+                        let _ = current.finished.send(());
                         let _ = finished.send(());
                         return;
                     }
                 }
             }
+
+            let frame = frame_renderer
+                .render(
+                    current.segment_frames,
+                    current.uniforms,
+                    &current.cursor,
+                    &mut layers,
+                )
+                .await
+                .unwrap();
+
+            (self.frame_cb)(frame);
+
+            let _ = current.finished.send(());
         }
     }
 }
@@ -129,6 +169,7 @@ impl RendererHandle {
         segment_frames: DecodedSegmentFrames,
         uniforms: ProjectUniforms,
         cursor: Arc<CursorEvents>,
+        frame_number: u32,
     ) {
         let (finished_tx, finished_rx) = oneshot::channel();
 
@@ -137,6 +178,7 @@ impl RendererHandle {
             uniforms,
             finished: finished_tx,
             cursor,
+            frame_number,
         })
         .await;
 
diff --git a/crates/editor/src/editor_instance.rs b/crates/editor/src/editor_instance.rs
index e5824dcf71..f834a75b16 100644
--- a/crates/editor/src/editor_instance.rs
+++ b/crates/editor/src/editor_instance.rs
@@ -7,7 +7,6 @@ use cap_rendering::{
     ProjectRecordingsMeta, ProjectUniforms, RecordingSegmentDecoders, RenderVideoConstants,
     RenderedFrame, SegmentVideoPaths, get_duration,
 };
-use std::ops::Deref;
 use std::{path::PathBuf, sync::Arc};
 use tokio::sync::{Mutex, watch};
 use tracing::{trace, warn};
@@ -200,43 +199,65 @@ impl EditorInstance {
         tokio::spawn(async move {
             loop {
                 preview_rx.changed().await.unwrap();
-                let Some((frame_number, fps, resolution_base)) = *preview_rx.borrow().deref()
-                else {
-                    continue;
-                };
-
-                let project = self.project_config.1.borrow().clone();
-
-                let Some((segment_time, segment)) =
-                    project.get_segment_time(frame_number as f64 / fps as f64)
-                else {
-                    continue;
-                };
-
-                let segment_medias = &self.segment_medias[segment.recording_clip as usize];
-                let clip_config = project
-                    .clips
-                    .iter()
-                    .find(|v| v.index == segment.recording_clip);
-                let clip_offsets = clip_config.map(|v| v.offsets).unwrap_or_default();
-
-                if let Some(segment_frames) = segment_medias
-                    .decoders
-                    .get_frames(segment_time as f32, !project.camera.hide, clip_offsets)
-                    .await
-                {
-                    let uniforms = ProjectUniforms::new(
-                        &self.render_constants,
-                        &project,
-                        frame_number,
-                        fps,
-                        resolution_base,
-                        &segment_medias.cursor,
-                        &segment_frames,
+
+                loop {
+                    let Some((frame_number, fps, resolution_base)) =
+                        *preview_rx.borrow_and_update()
+                    else {
+                        break;
+                    };
+
+                    let project = self.project_config.1.borrow().clone();
+
+                    let Some((segment_time, segment)) =
+                        project.get_segment_time(frame_number as f64 / fps as f64)
+                    else {
+                        break;
+                    };
+
+                    let segment_medias = &self.segment_medias[segment.recording_clip as usize];
+                    let clip_config = project
+                        .clips
+                        .iter()
+                        .find(|v| v.index == segment.recording_clip);
+                    let clip_offsets = clip_config.map(|v| v.offsets).unwrap_or_default();
+
+                    let get_frames_future = segment_medias.decoders.get_frames(
+                        segment_time as f32,
+                        !project.camera.hide,
+                        clip_offsets,
                     );
-                    self.renderer
-                        .render_frame(segment_frames, uniforms, segment_medias.cursor.clone())
-                        .await;
+
+                    tokio::select! {
+                        biased;
+
+                        _ = preview_rx.changed() => {
+                            continue;
+                        }
+
+                        segment_frames_opt = get_frames_future => {
+                            if preview_rx.has_changed().unwrap_or(false) {
+                                continue;
+                            }
+
+                            if let Some(segment_frames) = segment_frames_opt {
+                                let uniforms = ProjectUniforms::new(
+                                    &self.render_constants,
+                                    &project,
+                                    frame_number,
+                                    fps,
+                                    resolution_base,
+                                    &segment_medias.cursor,
+                                    &segment_frames,
+                                );
+                                self.renderer
+                                    .render_frame(segment_frames, uniforms, segment_medias.cursor.clone(), frame_number)
+                                    .await;
+                            }
+                        }
+                    }
+
+                    break;
                 }
             }
         })
diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs
index a94afec572..ab66f429a6 100644
--- a/crates/editor/src/playback.rs
+++ b/crates/editor/src/playback.rs
@@ -288,7 +288,12 @@ impl Playback {
                     );
 
                     self.renderer
-                        .render_frame(segment_frames, uniforms, segment_media.cursor.clone())
+                        .render_frame(
+                            segment_frames,
+                            uniforms,
+                            segment_media.cursor.clone(),
+                            frame_number,
+                        )
                         .await;
                 }
 
diff --git a/crates/enc-ffmpeg/src/video/h264.rs b/crates/enc-ffmpeg/src/video/h264.rs
index a338229cb7..e709c7d804 100644
--- a/crates/enc-ffmpeg/src/video/h264.rs
+++ b/crates/enc-ffmpeg/src/video/h264.rs
@@ -8,7 +8,7 @@ use ffmpeg::{
     frame,
     threading::Config,
 };
-use tracing::{debug, error};
+use tracing::{debug, error, trace};
 
 use crate::base::EncoderBase;
 
@@ -24,6 +24,7 @@ pub struct H264EncoderBuilder {
     input_config: VideoInfo,
     preset: H264Preset,
     output_size: Option<(u32, u32)>,
+    external_conversion: bool,
 }
 
 #[derive(Clone, Copy)]
@@ -54,6 +55,7 @@ impl H264EncoderBuilder {
             bpp: Self::QUALITY_BPP,
             preset: H264Preset::Ultrafast,
             output_size: None,
+            external_conversion: false,
         }
     }
 
@@ -76,14 +78,16 @@ impl H264EncoderBuilder {
         Ok(self)
     }
 
+    pub fn with_external_conversion(mut self) -> Self {
+        self.external_conversion = true;
+        self
+    }
+
     pub fn build(
         self,
         output: &mut format::context::Output,
     ) -> Result<H264Encoder, H264EncoderError> {
         let input_config = self.input_config;
-        let (codec, encoder_options) = get_codec_and_options(&input_config, self.preset)
-            .ok_or(H264EncoderError::CodecNotFound)?;
-
         let (output_width, output_height) = self
             .output_size
             .unwrap_or((input_config.width, input_config.height));
@@ -95,6 +99,50 @@ impl H264EncoderBuilder {
             });
         }
 
+        let candidates = get_codec_and_options(&input_config, self.preset);
+        if candidates.is_empty() {
+            return Err(H264EncoderError::CodecNotFound);
+        }
+
+        let mut last_error = None;
+
+        for (codec, encoder_options) in candidates {
+            let codec_name = codec.name().to_string();
+
+            match Self::build_with_codec(
+                codec,
+                encoder_options,
+                &input_config,
+                output,
+                output_width,
+                output_height,
+                self.bpp,
+                self.external_conversion,
+            ) {
+                Ok(encoder) => {
+                    debug!("Using encoder {}", codec_name);
+                    return Ok(encoder);
+                }
+                Err(err) => {
+                    debug!("Encoder {} init failed: {:?}", codec_name, err);
+                    last_error = Some(err);
+                }
+            }
+        }
+
+        Err(last_error.unwrap_or(H264EncoderError::CodecNotFound))
+    }
+
+    fn build_with_codec(
+        codec: Codec,
+        encoder_options: Dictionary<'static>,
+        input_config: &VideoInfo,
+        output: &mut format::context::Output,
+        output_width: u32,
+        output_height: u32,
+        bpp: f32,
+        external_conversion: bool,
+    ) -> Result<H264Encoder, H264EncoderError> {
         let encoder_supports_input_format = codec
             .video()
             .ok()
@@ -108,10 +156,12 @@ impl H264EncoderBuilder {
         } else {
             needs_pixel_conversion = true;
             let format = ffmpeg::format::Pixel::NV12;
-            debug!(
-                "Converting from {:?} to {:?} for H264 encoding",
-                input_config.pixel_format, format
-            );
+            if !external_conversion {
+                debug!(
+                    "Converting from {:?} to {:?} for H264 encoding",
+                    input_config.pixel_format, format
+                );
+            }
             format
         };
 
@@ -125,14 +175,20 @@ impl H264EncoderBuilder {
         let needs_scaling =
             output_width != input_config.width || output_height != input_config.height;
 
-        if needs_scaling {
+        if needs_scaling && !external_conversion {
             debug!(
                 "Scaling video frames for H264 encoding from {}x{} to {}x{}",
                 input_config.width, input_config.height, output_width, output_height
             );
         }
 
-        let converter = if needs_pixel_conversion || needs_scaling {
+        let converter = if external_conversion {
+            debug!(
+                "External conversion enabled, skipping internal converter. Expected input: {:?} {}x{}",
+                output_format, output_width, output_height
+            );
+            None
+        } else if needs_pixel_conversion || needs_scaling {
             let flags = if needs_scaling {
                 ffmpeg::software::scaling::flag::Flags::BICUBIC
             } else {
@@ -181,12 +237,11 @@ impl H264EncoderBuilder {
         encoder.set_time_base(input_config.time_base);
         encoder.set_frame_rate(Some(input_config.frame_rate));
 
-        // let target_bitrate = compression.bitrate();
         let bitrate = get_bitrate(
             output_width,
             output_height,
             input_config.frame_rate.0 as f32 / input_config.frame_rate.1 as f32,
-            self.bpp,
+            bpp,
         );
 
         encoder.set_bit_rate(bitrate);
@@ -207,6 +262,9 @@ impl H264EncoderBuilder {
             output_format,
             output_width,
             output_height,
+            input_format: input_config.pixel_format,
+            input_width: input_config.width,
+            input_height: input_config.height,
         })
     }
 }
@@ -218,6 +276,19 @@ pub struct H264Encoder {
     output_format: format::Pixel,
     output_width: u32,
     output_height: u32,
+    input_format: format::Pixel,
+    input_width: u32,
+    input_height: u32,
+}
+
+pub struct ConversionRequirements {
+    pub input_format: format::Pixel,
+    pub input_width: u32,
+    pub input_height: u32,
+    pub output_format: format::Pixel,
+    pub output_width: u32,
+    pub output_height: u32,
+    pub needs_conversion: bool,
 }
 
 #[derive(thiserror::Error, Debug)]
@@ -235,6 +306,21 @@ impl H264Encoder {
         H264EncoderBuilder::new(input_config)
     }
 
+    pub fn conversion_requirements(&self) -> ConversionRequirements {
+        let needs_conversion = self.input_format != self.output_format
+            || self.input_width != self.output_width
+            || self.input_height != self.output_height;
+        ConversionRequirements {
+            input_format: self.input_format,
+            input_width: self.input_width,
+            input_height: self.input_height,
+            output_format: self.output_format,
+            output_width: self.output_width,
+            output_height: self.output_height,
+            needs_conversion,
+        }
+    }
+
     pub fn queue_frame(
         &mut self,
         mut frame: frame::Video,
@@ -262,6 +348,32 @@ impl H264Encoder {
         Ok(())
     }
 
+    pub fn queue_preconverted_frame(
+        &mut self,
+        mut frame: frame::Video,
+        timestamp: Duration,
+        output: &mut format::context::Output,
+    ) -> Result<(), QueueFrameError> {
+        trace!(
+            "Encoding pre-converted frame: format={:?}, size={}x{}, expected={:?} {}x{}",
+            frame.format(),
+            frame.width(),
+            frame.height(),
+            self.output_format,
+            self.output_width,
+            self.output_height
+        );
+
+        self.base
+            .update_pts(&mut frame, timestamp, &mut self.encoder);
+
+        self.base
+            .send_frame(&frame, output, &mut self.encoder)
+            .map_err(QueueFrameError::Encode)?;
+
+        Ok(())
+    }
+
     pub fn flush(&mut self, output: &mut format::context::Output) -> Result<(), ffmpeg::Error> {
         self.base.process_eof(output, &mut self.encoder)
     }
@@ -270,53 +382,78 @@ impl H264Encoder {
 fn get_codec_and_options(
     config: &VideoInfo,
     preset: H264Preset,
-) -> Option<(Codec, Dictionary<'_>)> {
-    let encoder_name = {
-        // if cfg!(target_os = "macos") {
-        //     "libx264"
-        //     // looks terrible rn :(
-        //     // "h264_videotoolbox"
-        // } else {
-        //     "libx264"
-        // }
-
-        "libx264"
+) -> Vec<(Codec, Dictionary<'static>)> {
+    let keyframe_interval_secs = 2;
+    let keyframe_interval = keyframe_interval_secs * config.frame_rate.numerator();
+    let keyframe_interval_str = keyframe_interval.to_string();
+
+    let encoder_priority: &[&str] = if cfg!(target_os = "macos") {
+        &[
+            "h264_videotoolbox",
+            "h264_qsv",
+            "h264_nvenc",
+            "h264_amf",
+            "h264_mf",
+            "libx264",
+        ]
+    } else {
+        &["h264_nvenc", "h264_qsv", "h264_amf", "h264_mf", "libx264"]
     };
 
-    if let Some(codec) = encoder::find_by_name(encoder_name) {
+    let mut encoders = Vec::new();
+
+    for encoder_name in encoder_priority {
+        let Some(codec) = encoder::find_by_name(encoder_name) else {
+            continue;
+        };
+
         let mut options = Dictionary::new();
 
-        if encoder_name == "h264_videotoolbox" {
-            options.set("realtime", "true");
-        } else if encoder_name == "libx264" {
-            let keyframe_interval_secs = 2;
-            let keyframe_interval = keyframe_interval_secs * config.frame_rate.numerator();
-            let keyframe_interval_str = keyframe_interval.to_string();
-
-            options.set(
-                "preset",
-                match preset {
-                    H264Preset::Slow => "slow",
-                    H264Preset::Medium => "medium",
-                    H264Preset::Ultrafast => "ultrafast",
-                },
-            );
-            if let H264Preset::Ultrafast = preset {
-                options.set("tune", "zerolatency");
+        match *encoder_name {
+            "h264_videotoolbox" => {
+                options.set("realtime", "true");
+            }
+            "h264_nvenc" => {
+                options.set("preset", "fast");
+                options.set("g", &keyframe_interval_str);
+            }
+            "h264_qsv" => {
+                options.set("preset", "fast");
+                options.set("g", &keyframe_interval_str);
+            }
+            "h264_amf" => {
+                options.set("quality", "speed");
+                options.set("g", &keyframe_interval_str);
+            }
+            "h264_mf" => {
+                options.set("hw_encoding", "true");
+                options.set("scenario", "4");
+                options.set("quality", "1");
+                options.set("g", &keyframe_interval_str);
+            }
+            "libx264" => {
+                options.set(
+                    "preset",
+                    match preset {
+                        H264Preset::Slow => "slow",
+                        H264Preset::Medium => "medium",
+                        H264Preset::Ultrafast => "ultrafast",
+                    },
+                );
+                if let H264Preset::Ultrafast = preset {
+                    options.set("tune", "zerolatency");
+                }
+                options.set("vsync", "1");
+                options.set("g", &keyframe_interval_str);
+                options.set("keyint_min", &keyframe_interval_str);
             }
-            options.set("vsync", "1");
-            options.set("g", &keyframe_interval_str);
-            options.set("keyint_min", &keyframe_interval_str);
-        } else if encoder_name == "h264_mf" {
-            options.set("hw_encoding", "true");
-            options.set("scenario", "4");
-            options.set("quality", "1");
+            _ => {}
         }
 
-        return Some((codec, options));
+        encoders.push((codec, options));
     }
 
-    None
+    encoders
 }
 
 fn get_bitrate(width: u32, height: u32, frame_rate: f32, bpp: f32) -> usize {
diff --git a/crates/export/src/mp4.rs b/crates/export/src/mp4.rs
index 00276ea432..080c53cb48 100644
--- a/crates/export/src/mp4.rs
+++ b/crates/export/src/mp4.rs
@@ -187,32 +187,41 @@ impl Mp4ExportSettings {
                     frame_count += 1;
                 }
 
+                drop(frame_tx);
+
                 if let Some(frame) = first_frame {
-                    let rgb_img = ImageBuffer::<image::Rgb<u8>, Vec<u8>>::from_raw(
-                        frame.width,
-                        frame.height,
-                        frame
-                            .data
-                            .chunks(frame.padded_bytes_per_row as usize)
-                            .flat_map(|row| {
-                                row[0..(frame.width * 4) as usize]
-                                    .chunks(4)
-                                    .flat_map(|chunk| [chunk[0], chunk[1], chunk[2]])
-                            })
-                            .collect::<Vec<_>>(),
-                    )
-                    .expect("Failed to create image from frame data");
+                    let project_path = project_path.clone();
+                    let screenshot_task = tokio::task::spawn_blocking(move || {
+                        let rgb_img = ImageBuffer::<image::Rgb<u8>, Vec<u8>>::from_raw(
+                            frame.width,
+                            frame.height,
+                            frame
+                                .data
+                                .chunks(frame.padded_bytes_per_row as usize)
+                                .flat_map(|row| {
+                                    row[0..(frame.width * 4) as usize]
+                                        .chunks(4)
+                                        .flat_map(|chunk| [chunk[0], chunk[1], chunk[2]])
+                                })
+                                .collect::<Vec<_>>(),
+                        );
+
+                        let Some(rgb_img) = rgb_img else {
+                            return;
+                        };
 
-                    let screenshots_dir = project_path.join("screenshots");
-                    std::fs::create_dir_all(&screenshots_dir).unwrap_or_else(|e| {
-                        eprintln!("Failed to create screenshots directory: {e:?}");
-                    });
+                        let screenshots_dir = project_path.join("screenshots");
+                        if std::fs::create_dir_all(&screenshots_dir).is_err() {
+                            return;
+                        }
 
-                    // Save full-size screenshot
-                    let screenshot_path = screenshots_dir.join("display.jpg");
-                    rgb_img.save(&screenshot_path).unwrap_or_else(|e| {
-                        eprintln!("Failed to save screenshot: {e:?}");
+                        let screenshot_path = screenshots_dir.join("display.jpg");
+                        let _ = rgb_img.save(&screenshot_path);
                     });
+
+                    if let Err(e) = screenshot_task.await {
+                        warn!("Screenshot task failed: {e}");
+                    }
                 } else {
                     warn!("No frames were processed, cannot save screenshot or thumbnail");
                 }
diff --git a/crates/frame-converter/Cargo.toml b/crates/frame-converter/Cargo.toml
new file mode 100644
index 0000000000..a69a81dbba
--- /dev/null
+++ b/crates/frame-converter/Cargo.toml
@@ -0,0 +1,30 @@
+[package]
+name = "cap-frame-converter"
+version = "0.1.0"
+edition = "2024"
+
+[lints]
+workspace = true
+
+[dependencies]
+cap-media-info = { path = "../media-info" }
+
+ffmpeg.workspace = true
+thiserror.workspace = true
+tracing.workspace = true
+flume.workspace = true
+crossbeam-channel = "0.5"
+parking_lot = "0.12"
+workspace-hack = { version = "0.1", path = "../workspace-hack" }
+
+[target.'cfg(target_os = "macos")'.dependencies]
+
+[target.'cfg(target_os = "windows")'.dependencies]
+windows = { workspace = true, features = [
+    "Win32_Foundation",
+    "Win32_Graphics_Direct3D",
+    "Win32_Graphics_Direct3D11",
+    "Win32_Graphics_Dxgi_Common",
+    "Win32_Media_MediaFoundation",
+] }
+
diff --git a/crates/frame-converter/build.rs b/crates/frame-converter/build.rs
new file mode 100644
index 0000000000..2f12dbceec
--- /dev/null
+++ b/crates/frame-converter/build.rs
@@ -0,0 +1,8 @@
+fn main() {
+    #[cfg(target_os = "macos")]
+    {
+        println!("cargo:rustc-link-lib=framework=VideoToolbox");
+        println!("cargo:rustc-link-lib=framework=CoreVideo");
+        println!("cargo:rustc-link-lib=framework=CoreFoundation");
+    }
+}
diff --git a/crates/frame-converter/examples/benchmark.rs b/crates/frame-converter/examples/benchmark.rs
new file mode 100644
index 0000000000..c65c783c18
--- /dev/null
+++ b/crates/frame-converter/examples/benchmark.rs
@@ -0,0 +1,274 @@
+use cap_frame_converter::{
+    AsyncConverterPool, ConversionConfig, ConverterPoolConfig, DropStrategy, FrameConverter,
+    SwscaleConverter,
+};
+use ffmpeg::format::Pixel;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::time::{Duration, Instant};
+
+fn create_test_frame(format: Pixel, width: u32, height: u32) -> ffmpeg::frame::Video {
+    let mut frame = ffmpeg::frame::Video::new(format, width, height);
+    for plane_idx in 0..frame.planes() {
+        let data = frame.data_mut(plane_idx);
+        for (i, byte) in data.iter_mut().enumerate() {
+            *byte = ((i * 17 + plane_idx * 31) % 256) as u8;
+        }
+    }
+    frame.set_pts(Some(0));
+    frame
+}
+
+fn benchmark_single_threaded(iterations: u32, config: &ConversionConfig) -> Duration {
+    let converter = SwscaleConverter::new(config.clone()).expect("Failed to create converter");
+
+    let mut total_time = Duration::ZERO;
+    let mut first_frame = true;
+
+    for i in 0..iterations {
+        let frame = create_test_frame(config.input_format, config.input_width, config.input_height);
+        let start = Instant::now();
+        let _output = converter.convert(frame).expect("Conversion failed");
+        let elapsed = start.elapsed();
+
+        if first_frame {
+            println!("  First frame: {:?}", elapsed);
+            first_frame = false;
+        }
+        total_time += elapsed;
+    }
+
+    total_time
+}
+
+fn benchmark_pool(
+    frame_count: u32,
+    config: &ConversionConfig,
+    worker_count: usize,
+) -> (Duration, u64, u64) {
+    let pool_config = ConverterPoolConfig {
+        worker_count,
+        input_capacity: 120,
+        output_capacity: 120,
+        drop_strategy: DropStrategy::DropOldest,
+    };
+
+    let pool = AsyncConverterPool::from_config(config.clone(), pool_config)
+        .expect("Failed to create pool");
+
+    let start = Instant::now();
+
+    for i in 0..frame_count {
+        let mut frame =
+            create_test_frame(config.input_format, config.input_width, config.input_height);
+        frame.set_pts(Some(i as i64 * 33333));
+        pool.submit(frame, i as u64).expect("Submit failed");
+    }
+
+    let mut received = 0u64;
+    let deadline = Instant::now() + Duration::from_secs(30);
+
+    while received < frame_count as u64 && Instant::now() < deadline {
+        if let Some(_converted) = pool.recv_timeout(Duration::from_millis(100)) {
+            received += 1;
+        }
+        let stats = pool.stats();
+        if stats.frames_converted >= frame_count as u64 {
+            while let Some(_) = pool.try_recv() {
+                received += 1;
+            }
+            break;
+        }
+    }
+
+    let elapsed = start.elapsed();
+    let stats = pool.stats();
+
+    (elapsed, stats.frames_converted, stats.frames_dropped)
+}
+
+fn main() {
+    ffmpeg::init().expect("Failed to init ffmpeg");
+
+    println!("=== Camera Frame Conversion Benchmark ===\n");
+
+    let config = ConversionConfig::new(Pixel::UYVY422, 1920, 1080, Pixel::NV12, 1920, 1080);
+
+    println!(
+        "Conversion: {:?} {}x{} -> {:?} {}x{}\n",
+        config.input_format,
+        config.input_width,
+        config.input_height,
+        config.output_format,
+        config.output_width,
+        config.output_height
+    );
+
+    println!("--- Single-threaded SwScale ---");
+    let warmup_iterations = 10;
+    let test_iterations = 100;
+
+    println!("Warmup ({} frames)...", warmup_iterations);
+    let _ = benchmark_single_threaded(warmup_iterations, &config);
+
+    println!("Benchmark ({} frames)...", test_iterations);
+    let single_time = benchmark_single_threaded(test_iterations, &config);
+    let avg_per_frame = single_time / test_iterations;
+    let max_fps = 1.0 / avg_per_frame.as_secs_f64();
+    println!("  Total time: {:?}", single_time);
+    println!("  Avg per frame: {:?}", avg_per_frame);
+    println!("  Max theoretical FPS: {:.1}", max_fps);
+    println!(
+        "  Can sustain 30fps: {}",
+        if max_fps >= 30.0 { "YES" } else { "NO" }
+    );
+    println!();
+
+    let frame_count = 300;
+
+    for worker_count in [1, 2, 4, 6, 8] {
+        println!(
+            "--- Pool with {} workers ({} frames) ---",
+            worker_count, frame_count
+        );
+        let (elapsed, converted, dropped) = benchmark_pool(frame_count, &config, worker_count);
+
+        let conversion_fps = converted as f64 / elapsed.as_secs_f64();
+        println!("  Total time: {:?}", elapsed);
+        println!("  Converted: {}, Dropped: {}", converted, dropped);
+        println!("  Throughput: {:.1} fps", conversion_fps);
+        println!(
+            "  Can sustain 30fps: {}",
+            if conversion_fps >= 30.0 { "YES" } else { "NO" }
+        );
+        println!();
+    }
+
+    println!("--- Alternative Format Tests ---");
+
+    let formats_to_test = [
+        (Pixel::UYVY422, Pixel::YUV420P, "UYVY422 -> YUV420P"),
+        (Pixel::YUYV422, Pixel::NV12, "YUYV422 -> NV12"),
+        (Pixel::BGRA, Pixel::NV12, "BGRA -> NV12"),
+    ];
+
+    for (input_format, output_format, name) in formats_to_test {
+        let alt_config = ConversionConfig::new(input_format, 1920, 1080, output_format, 1920, 1080);
+
+        if SwscaleConverter::new(alt_config.clone()).is_ok() {
+            let time = benchmark_single_threaded(50, &alt_config);
+            let avg = time / 50;
+            let fps = 1.0 / avg.as_secs_f64();
+            println!("  {}: {:.1} fps ({:?}/frame)", name, fps, avg);
+        }
+    }
+
+    println!("\n--- Real-time Simulations ---");
+
+    for encode_time_ms in [0.1, 1.0, 5.0, 10.0, 20.0] {
+        println!("\n  Encode time: {}ms", encode_time_ms);
+        simulate_realtime_pipeline(
+            &config,
+            30.0,
+            Duration::from_secs(10),
+            Duration::from_secs_f64(encode_time_ms / 1000.0),
+        );
+    }
+
+    println!("\n=== Benchmark Complete ===");
+}
+
+fn simulate_realtime_pipeline(
+    config: &ConversionConfig,
+    target_fps: f64,
+    duration: Duration,
+    encode_time_per_frame: Duration,
+) {
+    let frame_interval = Duration::from_secs_f64(1.0 / target_fps);
+    let total_frames = (duration.as_secs_f64() * target_fps) as u32;
+
+    let pool_config = ConverterPoolConfig {
+        worker_count: 4,
+        input_capacity: 60,
+        output_capacity: 30,
+        drop_strategy: DropStrategy::DropOldest,
+    };
+
+    let pool = AsyncConverterPool::from_config(config.clone(), pool_config)
+        .expect("Failed to create pool");
+
+    let start = Instant::now();
+    let mut next_frame_time = start;
+    let mut submitted = 0u32;
+    let mut converted = 0u64;
+    let mut encode_time_total = Duration::ZERO;
+
+    println!(
+        "  Simulating {} frames at {:.1} fps over {:?}",
+        total_frames, target_fps, duration
+    );
+
+    for i in 0..total_frames {
+        let now = Instant::now();
+        if now < next_frame_time {
+            std::thread::sleep(next_frame_time - now);
+        }
+        next_frame_time += frame_interval;
+
+        let mut frame =
+            create_test_frame(config.input_format, config.input_width, config.input_height);
+        frame.set_pts(Some(i as i64 * 33333));
+
+        if pool.submit(frame, i as u64).is_ok() {
+            submitted += 1;
+        }
+
+        while let Some(_converted_frame) = pool.try_recv() {
+            converted += 1;
+            let encode_start = Instant::now();
+            std::thread::sleep(encode_time_per_frame);
+            encode_time_total += encode_start.elapsed();
+        }
+    }
+
+    let drain_deadline = Instant::now() + Duration::from_secs(5);
+    while Instant::now() < drain_deadline {
+        if let Some(_) = pool.recv_timeout(Duration::from_millis(100)) {
+            converted += 1;
+        } else {
+            break;
+        }
+    }
+
+    let elapsed = start.elapsed();
+    let stats = pool.stats();
+
+    println!("  Elapsed: {:?}", elapsed);
+    println!(
+        "  Submitted: {}, Converted: {}, Dropped: {}",
+        submitted, converted, stats.frames_dropped
+    );
+    println!(
+        "  Drop rate: {:.1}%",
+        (stats.frames_dropped as f64 / total_frames as f64) * 100.0
+    );
+    println!("  Total encode time: {:?}", encode_time_total);
+
+    let expected_duration = Duration::from_secs_f64(total_frames as f64 / target_fps);
+    let overhead = if elapsed > expected_duration {
+        elapsed - expected_duration
+    } else {
+        Duration::ZERO
+    };
+    println!("  Processing overhead: {:?}", overhead);
+
+    if stats.frames_dropped == 0 {
+        println!("  Result: SUCCESS - No frames dropped!");
+    } else {
+        println!(
+            "  Result: FAILED - {} frames dropped ({:.1}%)",
+            stats.frames_dropped,
+            (stats.frames_dropped as f64 / total_frames as f64) * 100.0
+        );
+    }
+}
diff --git a/crates/frame-converter/src/d3d11.rs b/crates/frame-converter/src/d3d11.rs
new file mode 100644
index 0000000000..4fa7bc3444
--- /dev/null
+++ b/crates/frame-converter/src/d3d11.rs
@@ -0,0 +1,574 @@
+use crate::{ConversionConfig, ConvertError, ConverterBackend, FrameConverter};
+use ffmpeg::{format::Pixel, frame};
+use parking_lot::Mutex;
+use std::{
+    ptr,
+    sync::atomic::{AtomicBool, AtomicU64, Ordering},
+};
+use windows::{
+    Win32::Graphics::{
+        Direct3D::D3D_DRIVER_TYPE_HARDWARE,
+        Direct3D11::{
+            D3D11_BIND_RENDER_TARGET, D3D11_CPU_ACCESS_READ, D3D11_CPU_ACCESS_WRITE,
+            D3D11_CREATE_DEVICE_VIDEO_SUPPORT, D3D11_MAP_READ, D3D11_MAP_WRITE, D3D11_SDK_VERSION,
+            D3D11_TEXTURE2D_DESC, D3D11_USAGE_DEFAULT, D3D11_USAGE_STAGING,
+            D3D11_VIDEO_PROCESSOR_CONTENT_DESC, D3D11_VIDEO_PROCESSOR_INPUT_VIEW_DESC,
+            D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC, D3D11_VIDEO_PROCESSOR_STREAM,
+            D3D11_VPIV_DIMENSION_TEXTURE2D, D3D11_VPOV_DIMENSION_TEXTURE2D, D3D11CreateDevice,
+            ID3D11Device, ID3D11DeviceContext, ID3D11Texture2D, ID3D11VideoContext,
+            ID3D11VideoDevice, ID3D11VideoProcessor, ID3D11VideoProcessorEnumerator,
+            ID3D11VideoProcessorInputView, ID3D11VideoProcessorOutputView,
+        },
+        Dxgi::{
+            Common::{DXGI_FORMAT, DXGI_FORMAT_NV12, DXGI_FORMAT_YUY2},
+            IDXGIAdapter, IDXGIDevice,
+        },
+    },
+    core::Interface,
+};
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum GpuVendor {
+    Nvidia,
+    Amd,
+    Intel,
+    Qualcomm,
+    Arm,
+    Microsoft,
+    Unknown(u32),
+}
+
+impl GpuVendor {
+    pub fn from_id(vendor_id: u32) -> Self {
+        match vendor_id {
+            0x10DE => GpuVendor::Nvidia,
+            0x1002 | 0x1022 => GpuVendor::Amd,
+            0x8086 => GpuVendor::Intel,
+            0x5143 => GpuVendor::Qualcomm,
+            0x13B5 => GpuVendor::Arm,
+            0x1414 => GpuVendor::Microsoft,
+            _ => GpuVendor::Unknown(vendor_id),
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct GpuInfo {
+    pub vendor: GpuVendor,
+    pub vendor_id: u32,
+    pub device_id: u32,
+    pub description: String,
+    pub dedicated_video_memory: u64,
+}
+
+impl GpuInfo {
+    pub fn vendor_name(&self) -> &'static str {
+        match self.vendor {
+            GpuVendor::Nvidia => "NVIDIA",
+            GpuVendor::Amd => "AMD",
+            GpuVendor::Intel => "Intel",
+            GpuVendor::Qualcomm => "Qualcomm",
+            GpuVendor::Arm => "ARM",
+            GpuVendor::Microsoft => "Microsoft",
+            GpuVendor::Unknown(_) => "Unknown",
+        }
+    }
+}
+
+struct D3D11Resources {
+    #[allow(dead_code)]
+    device: ID3D11Device,
+    context: ID3D11DeviceContext,
+    video_device: ID3D11VideoDevice,
+    video_context: ID3D11VideoContext,
+    processor: ID3D11VideoProcessor,
+    enumerator: ID3D11VideoProcessorEnumerator,
+    input_texture: ID3D11Texture2D,
+    output_texture: ID3D11Texture2D,
+    staging_input: ID3D11Texture2D,
+    staging_output: ID3D11Texture2D,
+}
+
+pub struct D3D11Converter {
+    resources: Mutex<D3D11Resources>,
+    #[allow(dead_code)]
+    input_format: Pixel,
+    output_format: Pixel,
+    #[allow(dead_code)]
+    input_width: u32,
+    #[allow(dead_code)]
+    input_height: u32,
+    output_width: u32,
+    output_height: u32,
+    gpu_info: GpuInfo,
+    conversion_count: AtomicU64,
+    verified_gpu_usage: AtomicBool,
+}
+
+fn get_gpu_info(device: &ID3D11Device) -> Result<GpuInfo, ConvertError> {
+    unsafe {
+        let dxgi_device: IDXGIDevice = device.cast().map_err(|e| {
+            ConvertError::HardwareUnavailable(format!("Failed to get DXGI device: {:?}", e))
+        })?;
+
+        let adapter: IDXGIAdapter = dxgi_device.GetAdapter().map_err(|e| {
+            ConvertError::HardwareUnavailable(format!("Failed to get adapter: {:?}", e))
+        })?;
+
+        let desc = adapter.GetDesc().map_err(|e| {
+            ConvertError::HardwareUnavailable(format!("Failed to get adapter description: {:?}", e))
+        })?;
+
+        let description = String::from_utf16_lossy(
+            &desc
+                .Description
+                .iter()
+                .take_while(|&&c| c != 0)
+                .copied()
+                .collect::<Vec<_>>(),
+        );
+
+        Ok(GpuInfo {
+            vendor: GpuVendor::from_id(desc.VendorId),
+            vendor_id: desc.VendorId,
+            device_id: desc.DeviceId,
+            description,
+            dedicated_video_memory: desc.DedicatedVideoMemory as u64,
+        })
+    }
+}
+
+impl D3D11Converter {
+    pub fn new(config: ConversionConfig) -> Result<Self, ConvertError> {
+        let input_dxgi = pixel_to_dxgi(config.input_format)?;
+        let output_dxgi = pixel_to_dxgi(config.output_format)?;
+
+        let (device, context) = unsafe {
+            let mut device = None;
+            let mut context = None;
+
+            D3D11CreateDevice(
+                None,
+                D3D_DRIVER_TYPE_HARDWARE,
+                None,
+                D3D11_CREATE_DEVICE_VIDEO_SUPPORT,
+                None,
+                D3D11_SDK_VERSION,
+                Some(&mut device),
+                None,
+                Some(&mut context),
+            )
+            .map_err(|e| {
+                ConvertError::HardwareUnavailable(format!(
+                    "D3D11CreateDevice failed (no hardware GPU available?): {:?}",
+                    e
+                ))
+            })?;
+
+            let device = device.ok_or_else(|| {
+                ConvertError::HardwareUnavailable("D3D11 device was null".to_string())
+            })?;
+            let context = context.ok_or_else(|| {
+                ConvertError::HardwareUnavailable("D3D11 context was null".to_string())
+            })?;
+
+            (device, context)
+        };
+
+        let gpu_info = get_gpu_info(&device)?;
+
+        tracing::debug!(
+            "D3D11 GPU detected: {} (Vendor: {}, VendorID: 0x{:04X}, DeviceID: 0x{:04X}, VRAM: {} MB)",
+            gpu_info.description,
+            gpu_info.vendor_name(),
+            gpu_info.vendor_id,
+            gpu_info.device_id,
+            gpu_info.dedicated_video_memory / (1024 * 1024)
+        );
+
+        let video_device: ID3D11VideoDevice = device.cast().map_err(|e| {
+            ConvertError::HardwareUnavailable(format!(
+                "GPU does not support D3D11 Video API (ID3D11VideoDevice): {:?}",
+                e
+            ))
+        })?;
+
+        let video_context: ID3D11VideoContext = context.cast().map_err(|e| {
+            ConvertError::HardwareUnavailable(format!("Failed to get ID3D11VideoContext: {:?}", e))
+        })?;
+
+        let content_desc = D3D11_VIDEO_PROCESSOR_CONTENT_DESC {
+            InputFrameFormat:
+                windows::Win32::Graphics::Direct3D11::D3D11_VIDEO_FRAME_FORMAT_PROGRESSIVE,
+            InputFrameRate: windows::Win32::Graphics::Dxgi::Common::DXGI_RATIONAL {
+                Numerator: 30,
+                Denominator: 1,
+            },
+            InputWidth: config.input_width,
+            InputHeight: config.input_height,
+            OutputFrameRate: windows::Win32::Graphics::Dxgi::Common::DXGI_RATIONAL {
+                Numerator: 30,
+                Denominator: 1,
+            },
+            OutputWidth: config.output_width,
+            OutputHeight: config.output_height,
+            Usage: windows::Win32::Graphics::Direct3D11::D3D11_VIDEO_USAGE_PLAYBACK_NORMAL,
+        };
+
+        let enumerator = unsafe {
+            video_device
+                .CreateVideoProcessorEnumerator(&content_desc)
+                .map_err(|e| {
+                    ConvertError::HardwareUnavailable(format!(
+                        "CreateVideoProcessorEnumerator failed (format {:?}->{:?} not supported by GPU?): {:?}",
+                        config.input_format, config.output_format, e
+                    ))
+                })?
+        };
+
+        let processor = unsafe {
+            video_device
+                .CreateVideoProcessor(&enumerator, 0)
+                .map_err(|e| {
+                    ConvertError::HardwareUnavailable(format!(
+                        "CreateVideoProcessor failed: {:?}",
+                        e
+                    ))
+                })?
+        };
+
+        let input_texture = create_texture(
+            &device,
+            config.input_width,
+            config.input_height,
+            input_dxgi,
+            D3D11_USAGE_DEFAULT,
+            D3D11_BIND_RENDER_TARGET.0,
+            0,
+        )?;
+
+        let output_texture = create_texture(
+            &device,
+            config.output_width,
+            config.output_height,
+            output_dxgi,
+            D3D11_USAGE_DEFAULT,
+            D3D11_BIND_RENDER_TARGET.0,
+            0,
+        )?;
+
+        let staging_input = create_texture(
+            &device,
+            config.input_width,
+            config.input_height,
+            input_dxgi,
+            D3D11_USAGE_STAGING,
+            0,
+            D3D11_CPU_ACCESS_WRITE.0,
+        )?;
+
+        let staging_output = create_texture(
+            &device,
+            config.output_width,
+            config.output_height,
+            output_dxgi,
+            D3D11_USAGE_STAGING,
+            0,
+            D3D11_CPU_ACCESS_READ.0,
+        )?;
+
+        let resources = D3D11Resources {
+            device,
+            context,
+            video_device,
+            video_context,
+            processor,
+            enumerator,
+            input_texture,
+            output_texture,
+            staging_input,
+            staging_output,
+        };
+
+        tracing::debug!(
+            "D3D11 converter created: {:?} {}x{} -> {:?} {}x{} on {}",
+            config.input_format,
+            config.input_width,
+            config.input_height,
+            config.output_format,
+            config.output_width,
+            config.output_height,
+            gpu_info.description
+        );
+
+        Ok(Self {
+            resources: Mutex::new(resources),
+            input_format: config.input_format,
+            output_format: config.output_format,
+            input_width: config.input_width,
+            input_height: config.input_height,
+            output_width: config.output_width,
+            output_height: config.output_height,
+            gpu_info,
+            conversion_count: AtomicU64::new(0),
+            verified_gpu_usage: AtomicBool::new(false),
+        })
+    }
+
+    pub fn gpu_info(&self) -> &GpuInfo {
+        &self.gpu_info
+    }
+}
+
+impl FrameConverter for D3D11Converter {
+    fn convert(&self, input: frame::Video) -> Result<frame::Video, ConvertError> {
+        let count = self.conversion_count.fetch_add(1, Ordering::Relaxed);
+
+        if count == 0 {
+            tracing::info!(
+                "D3D11 converter first frame: converting on GPU {} ({})",
+                self.gpu_info.description,
+                self.gpu_info.vendor_name()
+            );
+        }
+
+        let pts = input.pts();
+        let mut resources = self.resources.lock();
+
+        unsafe {
+            let mapped = resources
+                .context
+                .Map(&resources.staging_input, 0, D3D11_MAP_WRITE, 0)
+                .map_err(|e| {
+                    ConvertError::ConversionFailed(format!("Map input failed: {:?}", e))
+                })?;
+
+            copy_frame_to_mapped(&input, mapped.pData as *mut u8, mapped.RowPitch as usize);
+
+            resources.context.Unmap(&resources.staging_input, 0);
+
+            resources
+                .context
+                .CopyResource(&resources.input_texture, &resources.staging_input);
+
+            let input_view_desc = D3D11_VIDEO_PROCESSOR_INPUT_VIEW_DESC {
+                FourCC: 0,
+                ViewDimension: D3D11_VPIV_DIMENSION_TEXTURE2D,
+                Anonymous:
+                    windows::Win32::Graphics::Direct3D11::D3D11_VIDEO_PROCESSOR_INPUT_VIEW_DESC_0 {
+                        Texture2D: windows::Win32::Graphics::Direct3D11::D3D11_TEX2D_VPIV {
+                            MipSlice: 0,
+                            ArraySlice: 0,
+                        },
+                    },
+            };
+
+            let input_view: ID3D11VideoProcessorInputView = resources
+                .video_device
+                .CreateVideoProcessorInputView(
+                    &resources.input_texture,
+                    &resources.enumerator,
+                    &input_view_desc,
+                )
+                .map_err(|e| {
+                    ConvertError::ConversionFailed(format!("CreateInputView failed: {:?}", e))
+                })?;
+
+            let output_view_desc = D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC {
+                ViewDimension: D3D11_VPOV_DIMENSION_TEXTURE2D,
+                Anonymous:
+                    windows::Win32::Graphics::Direct3D11::D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC_0 {
+                        Texture2D: windows::Win32::Graphics::Direct3D11::D3D11_TEX2D_VPOV {
+                            MipSlice: 0,
+                        },
+                    },
+            };
+
+            let output_view: ID3D11VideoProcessorOutputView = resources
+                .video_device
+                .CreateVideoProcessorOutputView(
+                    &resources.output_texture,
+                    &resources.enumerator,
+                    &output_view_desc,
+                )
+                .map_err(|e| {
+                    ConvertError::ConversionFailed(format!("CreateOutputView failed: {:?}", e))
+                })?;
+
+            let stream = D3D11_VIDEO_PROCESSOR_STREAM {
+                Enable: true.into(),
+                OutputIndex: 0,
+                InputFrameOrField: 0,
+                PastFrames: 0,
+                FutureFrames: 0,
+                ppPastSurfaces: ptr::null_mut(),
+                pInputSurface: std::mem::transmute_copy(&input_view),
+                ppFutureSurfaces: ptr::null_mut(),
+                ppPastSurfacesRight: ptr::null_mut(),
+                pInputSurfaceRight: None,
+                ppFutureSurfacesRight: ptr::null_mut(),
+            };
+
+            resources
+                .video_context
+                .VideoProcessorBlt(&resources.processor, &output_view, 0, &[stream])
+                .map_err(|e| {
+                    ConvertError::ConversionFailed(format!("VideoProcessorBlt failed: {:?}", e))
+                })?;
+
+            if !self.verified_gpu_usage.swap(true, Ordering::Relaxed) {
+                tracing::info!(
+                    "D3D11 VideoProcessorBlt succeeded - confirmed GPU hardware acceleration on {}",
+                    self.gpu_info.description
+                );
+            }
+
+            resources
+                .context
+                .CopyResource(&resources.staging_output, &resources.output_texture);
+
+            let mapped = resources
+                .context
+                .Map(&resources.staging_output, 0, D3D11_MAP_READ, 0)
+                .map_err(|e| {
+                    ConvertError::ConversionFailed(format!("Map output failed: {:?}", e))
+                })?;
+
+            let mut output =
+                frame::Video::new(self.output_format, self.output_width, self.output_height);
+            copy_mapped_to_frame(
+                mapped.pData as *const u8,
+                mapped.RowPitch as usize,
+                &mut output,
+            );
+
+            resources.context.Unmap(&resources.staging_output, 0);
+
+            output.set_pts(pts);
+            Ok(output)
+        }
+    }
+
+    fn name(&self) -> &'static str {
+        "d3d11"
+    }
+
+    fn backend(&self) -> ConverterBackend {
+        ConverterBackend::D3D11
+    }
+
+    fn conversion_count(&self) -> u64 {
+        self.conversion_count.load(Ordering::Relaxed)
+    }
+
+    fn verify_hardware_usage(&self) -> Option<bool> {
+        Some(self.verified_gpu_usage.load(Ordering::Relaxed))
+    }
+}
+
+fn pixel_to_dxgi(pixel: Pixel) -> Result<DXGI_FORMAT, ConvertError> {
+    match pixel {
+        Pixel::NV12 => Ok(DXGI_FORMAT_NV12),
+        Pixel::YUYV422 => Ok(DXGI_FORMAT_YUY2),
+        _ => Err(ConvertError::UnsupportedFormat(pixel, Pixel::NV12)),
+    }
+}
+
+fn create_texture(
+    device: &ID3D11Device,
+    width: u32,
+    height: u32,
+    format: DXGI_FORMAT,
+    usage: windows::Win32::Graphics::Direct3D11::D3D11_USAGE,
+    bind_flags: u32,
+    cpu_access: u32,
+) -> Result<ID3D11Texture2D, ConvertError> {
+    let desc = D3D11_TEXTURE2D_DESC {
+        Width: width,
+        Height: height,
+        MipLevels: 1,
+        ArraySize: 1,
+        Format: format,
+        SampleDesc: windows::Win32::Graphics::Dxgi::Common::DXGI_SAMPLE_DESC {
+            Count: 1,
+            Quality: 0,
+        },
+        Usage: usage,
+        BindFlags: windows::Win32::Graphics::Direct3D11::D3D11_BIND_FLAG(bind_flags as i32),
+        CPUAccessFlags: windows::Win32::Graphics::Direct3D11::D3D11_CPU_ACCESS_FLAG(
+            cpu_access as i32,
+        ),
+        MiscFlags: windows::Win32::Graphics::Direct3D11::D3D11_RESOURCE_MISC_FLAG(0),
+    };
+
+    unsafe {
+        device.CreateTexture2D(&desc, None).map_err(|e| {
+            ConvertError::HardwareUnavailable(format!("CreateTexture2D failed: {:?}", e))
+        })
+    }
+}
+
+unsafe fn copy_frame_to_mapped(frame: &frame::Video, dst: *mut u8, dst_stride: usize) {
+    let height = frame.height() as usize;
+    let format = frame.format();
+
+    match format {
+        Pixel::NV12 => {
+            for y in 0..height {
+                ptr::copy_nonoverlapping(
+                    frame.data(0).as_ptr().add(y * frame.stride(0)),
+                    dst.add(y * dst_stride),
+                    frame.width() as usize,
+                );
+            }
+            let uv_offset = height * dst_stride;
+            for y in 0..height / 2 {
+                ptr::copy_nonoverlapping(
+                    frame.data(1).as_ptr().add(y * frame.stride(1)),
+                    dst.add(uv_offset + y * dst_stride),
+                    frame.width() as usize,
+                );
+            }
+        }
+        Pixel::YUYV422 | Pixel::UYVY422 => {
+            let row_bytes = frame.width() as usize * 2;
+            for y in 0..height {
+                ptr::copy_nonoverlapping(
+                    frame.data(0).as_ptr().add(y * frame.stride(0)),
+                    dst.add(y * dst_stride),
+                    row_bytes,
+                );
+            }
+        }
+        _ => {}
+    }
+}
+
+unsafe fn copy_mapped_to_frame(src: *const u8, src_stride: usize, frame: &mut frame::Video) {
+    let height = frame.height() as usize;
+    let format = frame.format();
+
+    match format {
+        Pixel::NV12 => {
+            for y in 0..height {
+                ptr::copy_nonoverlapping(
+                    src.add(y * src_stride),
+                    frame.data_mut(0).as_mut_ptr().add(y * frame.stride(0)),
+                    frame.width() as usize,
+                );
+            }
+            let uv_offset = height * src_stride;
+            for y in 0..height / 2 {
+                ptr::copy_nonoverlapping(
+                    src.add(uv_offset + y * src_stride),
+                    frame.data_mut(1).as_mut_ptr().add(y * frame.stride(1)),
+                    frame.width() as usize,
+                );
+            }
+        }
+        _ => {}
+    }
+}
+
+unsafe impl Send for D3D11Converter {}
+unsafe impl Sync for D3D11Converter {}
diff --git a/crates/frame-converter/src/lib.rs b/crates/frame-converter/src/lib.rs
new file mode 100644
index 0000000000..ed3664692d
--- /dev/null
+++ b/crates/frame-converter/src/lib.rs
@@ -0,0 +1,253 @@
+mod swscale;
+pub use swscale::*;
+
+mod pool;
+pub use pool::*;
+
+#[cfg(target_os = "macos")]
+mod videotoolbox;
+#[cfg(target_os = "macos")]
+pub use videotoolbox::*;
+
+#[cfg(target_os = "windows")]
+mod d3d11;
+#[cfg(target_os = "windows")]
+pub use d3d11::*;
+
+use std::sync::Arc;
+
+#[derive(Debug, Clone, thiserror::Error)]
+pub enum ConvertError {
+    #[error("Conversion failed: {0}")]
+    ConversionFailed(String),
+    #[error("Unsupported format: {0:?} -> {1:?}")]
+    UnsupportedFormat(ffmpeg::format::Pixel, ffmpeg::format::Pixel),
+    #[error("Hardware unavailable: {0}")]
+    HardwareUnavailable(String),
+    #[error("Channel closed")]
+    ChannelClosed,
+    #[error("Pool shutdown")]
+    PoolShutdown,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum ConverterBackend {
+    #[cfg(target_os = "macos")]
+    VideoToolbox,
+    #[cfg(target_os = "windows")]
+    D3D11,
+    Swscale,
+    Passthrough,
+}
+
+impl std::fmt::Display for ConverterBackend {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            #[cfg(target_os = "macos")]
+            ConverterBackend::VideoToolbox => write!(f, "VideoToolbox (hardware)"),
+            #[cfg(target_os = "windows")]
+            ConverterBackend::D3D11 => write!(f, "D3D11 (GPU)"),
+            ConverterBackend::Swscale => write!(f, "swscale (CPU)"),
+            ConverterBackend::Passthrough => write!(f, "passthrough"),
+        }
+    }
+}
+
+pub trait FrameConverter: Send + Sync + 'static {
+    fn convert(&self, input: ffmpeg::frame::Video) -> Result<ffmpeg::frame::Video, ConvertError>;
+
+    fn name(&self) -> &'static str;
+
+    fn backend(&self) -> ConverterBackend;
+
+    fn is_hardware_accelerated(&self) -> bool {
+        match self.backend() {
+            #[cfg(target_os = "macos")]
+            ConverterBackend::VideoToolbox => true,
+            #[cfg(target_os = "windows")]
+            ConverterBackend::D3D11 => true,
+            ConverterBackend::Swscale => false,
+            ConverterBackend::Passthrough => false,
+        }
+    }
+
+    fn conversion_count(&self) -> u64 {
+        0
+    }
+
+    fn verify_hardware_usage(&self) -> Option<bool> {
+        None
+    }
+}
+
+#[derive(Clone)]
+pub struct ConversionConfig {
+    pub input_format: ffmpeg::format::Pixel,
+    pub input_width: u32,
+    pub input_height: u32,
+    pub output_format: ffmpeg::format::Pixel,
+    pub output_width: u32,
+    pub output_height: u32,
+}
+
+impl ConversionConfig {
+    pub fn new(
+        input_format: ffmpeg::format::Pixel,
+        input_width: u32,
+        input_height: u32,
+        output_format: ffmpeg::format::Pixel,
+        output_width: u32,
+        output_height: u32,
+    ) -> Self {
+        Self {
+            input_format,
+            input_width,
+            input_height,
+            output_format,
+            output_width,
+            output_height,
+        }
+    }
+
+    pub fn needs_conversion(&self) -> bool {
+        self.input_format != self.output_format
+            || self.input_width != self.output_width
+            || self.input_height != self.output_height
+    }
+
+    pub fn needs_scaling(&self) -> bool {
+        self.input_width != self.output_width || self.input_height != self.output_height
+    }
+}
+
+pub struct ConverterSelectionResult {
+    pub converter: Arc<dyn FrameConverter>,
+    pub backend: ConverterBackend,
+    pub fallback_reason: Option<String>,
+}
+
+impl ConverterSelectionResult {
+    pub fn log_selection(&self) {
+        if let Some(reason) = &self.fallback_reason {
+            tracing::warn!("Using {} converter (fallback: {})", self.backend, reason);
+        } else {
+            tracing::info!("Using {} converter", self.backend);
+        }
+    }
+}
+
+pub fn create_converter(config: ConversionConfig) -> Result<Arc<dyn FrameConverter>, ConvertError> {
+    let result = create_converter_with_details(config)?;
+    result.log_selection();
+    Ok(result.converter)
+}
+
+pub fn create_converter_with_details(
+    config: ConversionConfig,
+) -> Result<ConverterSelectionResult, ConvertError> {
+    if !config.needs_conversion() {
+        tracing::info!(
+            "No conversion needed ({:?} {}x{} -> {:?} {}x{}), using passthrough",
+            config.input_format,
+            config.input_width,
+            config.input_height,
+            config.output_format,
+            config.output_width,
+            config.output_height
+        );
+        return Ok(ConverterSelectionResult {
+            converter: Arc::new(PassthroughConverter),
+            backend: ConverterBackend::Passthrough,
+            fallback_reason: None,
+        });
+    }
+
+    tracing::info!(
+        "Creating converter: {:?} {}x{} -> {:?} {}x{}",
+        config.input_format,
+        config.input_width,
+        config.input_height,
+        config.output_format,
+        config.output_width,
+        config.output_height
+    );
+
+    let mut fallback_reasons: Vec<String> = Vec::new();
+
+    #[cfg(target_os = "macos")]
+    {
+        match VideoToolboxConverter::new(config.clone()) {
+            Ok(converter) => {
+                tracing::info!(
+                    "✓ Created VideoToolbox converter - hardware accelerated pixel transfer"
+                );
+                return Ok(ConverterSelectionResult {
+                    converter: Arc::new(converter),
+                    backend: ConverterBackend::VideoToolbox,
+                    fallback_reason: None,
+                });
+            }
+            Err(e) => {
+                let reason = format!("VideoToolbox: {}", e);
+                tracing::debug!("{}", reason);
+                fallback_reasons.push(reason);
+            }
+        }
+    }
+
+    #[cfg(target_os = "windows")]
+    {
+        match D3D11Converter::new(config.clone()) {
+            Ok(converter) => {
+                let gpu_info = converter.gpu_info();
+                tracing::info!(
+                    "✓ Created D3D11 hardware converter - GPU: {} ({})",
+                    gpu_info.description,
+                    gpu_info.vendor_name()
+                );
+                return Ok(ConverterSelectionResult {
+                    converter: Arc::new(converter),
+                    backend: ConverterBackend::D3D11,
+                    fallback_reason: None,
+                });
+            }
+            Err(e) => {
+                let reason = format!("D3D11: {}", e);
+                tracing::debug!("{}", reason);
+                fallback_reasons.push(reason);
+            }
+        }
+    }
+
+    let converter = SwscaleConverter::new(config.clone())?;
+
+    let fallback_reason = if fallback_reasons.is_empty() {
+        None
+    } else {
+        Some(fallback_reasons.join("; "))
+    };
+
+    tracing::info!("✓ Created swscale software converter (CPU-based, SIMD optimized)");
+
+    Ok(ConverterSelectionResult {
+        converter: Arc::new(converter),
+        backend: ConverterBackend::Swscale,
+        fallback_reason,
+    })
+}
+
+struct PassthroughConverter;
+
+impl FrameConverter for PassthroughConverter {
+    fn convert(&self, input: ffmpeg::frame::Video) -> Result<ffmpeg::frame::Video, ConvertError> {
+        Ok(input)
+    }
+
+    fn name(&self) -> &'static str {
+        "passthrough"
+    }
+
+    fn backend(&self) -> ConverterBackend {
+        ConverterBackend::Passthrough
+    }
+}
diff --git a/crates/frame-converter/src/pool.rs b/crates/frame-converter/src/pool.rs
new file mode 100644
index 0000000000..a4ebdcb5b3
--- /dev/null
+++ b/crates/frame-converter/src/pool.rs
@@ -0,0 +1,363 @@
+use crate::{ConversionConfig, ConvertError, FrameConverter, create_converter};
+use ffmpeg::frame;
+use std::{
+    sync::{
+        Arc,
+        atomic::{AtomicBool, AtomicU64, Ordering},
+    },
+    thread::{self, JoinHandle},
+    time::{Duration, Instant},
+};
+use tracing::{debug, info, warn};
+
+pub struct ConvertedFrame {
+    pub frame: frame::Video,
+    pub sequence: u64,
+    pub submit_time: Instant,
+    pub conversion_duration: Duration,
+}
+
+pub struct InputFrame {
+    pub frame: frame::Video,
+    pub sequence: u64,
+    pub submit_time: Instant,
+}
+
+pub struct ConverterPoolConfig {
+    pub worker_count: usize,
+    pub input_capacity: usize,
+    pub output_capacity: usize,
+    pub drop_strategy: DropStrategy,
+}
+
+impl Default for ConverterPoolConfig {
+    fn default() -> Self {
+        Self {
+            worker_count: 2,
+            input_capacity: 8,
+            output_capacity: 8,
+            drop_strategy: DropStrategy::DropOldest,
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+pub enum DropStrategy {
+    DropOldest,
+    DropNewest,
+}
+
+pub struct ConverterPoolStats {
+    pub frames_received: u64,
+    pub frames_converted: u64,
+    pub frames_dropped: u64,
+    pub current_queue_depth: usize,
+}
+
+pub struct AsyncConverterPool {
+    input_tx: Option<flume::Sender<InputFrame>>,
+    output_rx: flume::Receiver<ConvertedFrame>,
+    workers: Vec<JoinHandle<()>>,
+    shutdown: Arc<AtomicBool>,
+    stats: Arc<PoolStats>,
+}
+
+struct PoolStats {
+    frames_received: AtomicU64,
+    frames_converted: AtomicU64,
+    frames_dropped: AtomicU64,
+}
+
+impl Default for PoolStats {
+    fn default() -> Self {
+        Self {
+            frames_received: AtomicU64::new(0),
+            frames_converted: AtomicU64::new(0),
+            frames_dropped: AtomicU64::new(0),
+        }
+    }
+}
+
+impl AsyncConverterPool {
+    pub fn new(converter: Arc<dyn FrameConverter>, config: ConverterPoolConfig) -> Self {
+        let converter_name = converter.name().to_string();
+        Self::new_with_factory(move |_| Ok(Arc::clone(&converter)), &converter_name, config)
+            .expect("Factory using pre-created converter should not fail")
+    }
+
+    pub fn from_config(
+        conversion_config: ConversionConfig,
+        pool_config: ConverterPoolConfig,
+    ) -> Result<Self, ConvertError> {
+        let first_converter = create_converter(conversion_config.clone())?;
+        let converter_name = first_converter.name().to_string();
+
+        Self::new_with_factory(
+            move |worker_id| {
+                if worker_id == 0 {
+                    Ok(Arc::clone(&first_converter))
+                } else {
+                    create_converter(conversion_config.clone())
+                }
+            },
+            &converter_name,
+            pool_config,
+        )
+    }
+
+    fn new_with_factory<F>(
+        factory: F,
+        converter_name: &str,
+        config: ConverterPoolConfig,
+    ) -> Result<Self, ConvertError>
+    where
+        F: Fn(usize) -> Result<Arc<dyn FrameConverter>, ConvertError> + Send + Sync + 'static,
+    {
+        let (input_tx, input_rx) = flume::bounded(config.input_capacity);
+        let (output_tx, output_rx) = flume::bounded(config.output_capacity);
+        let shutdown = Arc::new(AtomicBool::new(false));
+        let stats = Arc::new(PoolStats::default());
+
+        let mut workers = Vec::with_capacity(config.worker_count);
+        let factory = Arc::new(factory);
+
+        for worker_id in 0..config.worker_count {
+            let factory = Arc::clone(&factory);
+            let input_rx = input_rx.clone();
+            let output_tx = output_tx.clone();
+            let shutdown = Arc::clone(&shutdown);
+            let stats = Arc::clone(&stats);
+            let drop_strategy = config.drop_strategy;
+
+            let handle = thread::Builder::new()
+                .name(format!("converter-worker-{}", worker_id))
+                .spawn(move || {
+                    let converter = match factory(worker_id) {
+                        Ok(c) => c,
+                        Err(e) => {
+                            warn!("Worker {} failed to create converter: {}", worker_id, e);
+                            return;
+                        }
+                    };
+                    worker_loop(
+                        worker_id,
+                        converter,
+                        input_rx,
+                        output_tx,
+                        shutdown,
+                        stats,
+                        drop_strategy,
+                    );
+                })
+                .expect("Failed to spawn converter worker thread");
+
+            workers.push(handle);
+        }
+
+        info!(
+            "AsyncConverterPool started with {} workers using {} converter",
+            config.worker_count, converter_name
+        );
+
+        Ok(Self {
+            input_tx: Some(input_tx),
+            output_rx,
+            workers,
+            shutdown,
+            stats,
+        })
+    }
+
+    pub fn submit(&self, frame: frame::Video, sequence: u64) -> Result<(), ConvertError> {
+        let Some(input_tx) = &self.input_tx else {
+            return Err(ConvertError::PoolShutdown);
+        };
+
+        self.stats.frames_received.fetch_add(1, Ordering::Relaxed);
+        let submit_time = Instant::now();
+
+        match input_tx.try_send(InputFrame {
+            frame,
+            sequence,
+            submit_time,
+        }) {
+            Ok(()) => Ok(()),
+            Err(flume::TrySendError::Full(_)) => {
+                self.stats.frames_dropped.fetch_add(1, Ordering::Relaxed);
+                let dropped = self.stats.frames_dropped.load(Ordering::Relaxed);
+                if dropped % 30 == 0 {
+                    warn!(
+                        "Converter pool input full, dropped {} frames so far",
+                        dropped
+                    );
+                }
+                Ok(())
+            }
+            Err(flume::TrySendError::Disconnected(_)) => Err(ConvertError::PoolShutdown),
+        }
+    }
+
+    pub fn try_recv(&self) -> Option<ConvertedFrame> {
+        self.output_rx.try_recv().ok()
+    }
+
+    pub fn recv(&self) -> Result<ConvertedFrame, ConvertError> {
+        self.output_rx
+            .recv()
+            .map_err(|_| ConvertError::PoolShutdown)
+    }
+
+    pub fn recv_timeout(&self, timeout: Duration) -> Option<ConvertedFrame> {
+        self.output_rx.recv_timeout(timeout).ok()
+    }
+
+    pub fn output_receiver(&self) -> flume::Receiver<ConvertedFrame> {
+        self.output_rx.clone()
+    }
+
+    pub fn stats(&self) -> ConverterPoolStats {
+        ConverterPoolStats {
+            frames_received: self.stats.frames_received.load(Ordering::Relaxed),
+            frames_converted: self.stats.frames_converted.load(Ordering::Relaxed),
+            frames_dropped: self.stats.frames_dropped.load(Ordering::Relaxed),
+            current_queue_depth: self.input_tx.as_ref().map(|tx| tx.len()).unwrap_or(0),
+        }
+    }
+
+    pub fn drain_with_timeout(
+        &mut self,
+        mut frame_handler: impl FnMut(ConvertedFrame),
+        timeout: Duration,
+    ) -> usize {
+        self.input_tx.take();
+
+        let deadline = std::time::Instant::now() + timeout;
+        let mut drained = 0;
+
+        while std::time::Instant::now() < deadline {
+            match self.output_rx.recv_timeout(Duration::from_millis(50)) {
+                Ok(frame) => {
+                    frame_handler(frame);
+                    drained += 1;
+                }
+                Err(flume::RecvTimeoutError::Timeout) => {
+                    let stats = self.stats();
+                    let pending = stats
+                        .frames_received
+                        .saturating_sub(stats.frames_converted + stats.frames_dropped);
+                    if pending == 0 {
+                        break;
+                    }
+                }
+                Err(flume::RecvTimeoutError::Disconnected) => break,
+            }
+        }
+
+        self.shutdown.store(true, Ordering::SeqCst);
+        for worker in self.workers.drain(..) {
+            let _ = worker.join();
+        }
+
+        while let Ok(frame) = self.output_rx.try_recv() {
+            frame_handler(frame);
+            drained += 1;
+        }
+
+        let stats = self.stats();
+        info!(
+            "Converter pool drained: {} frames collected, {} received, {} converted, {} dropped",
+            drained, stats.frames_received, stats.frames_converted, stats.frames_dropped
+        );
+
+        drained
+    }
+
+    fn do_shutdown(&mut self) {
+        self.shutdown.store(true, Ordering::SeqCst);
+        self.input_tx.take();
+
+        for worker in self.workers.drain(..) {
+            let _ = worker.join();
+        }
+
+        let stats = self.stats();
+        info!(
+            "Converter pool shutdown: {} received, {} converted, {} dropped",
+            stats.frames_received, stats.frames_converted, stats.frames_dropped
+        );
+    }
+}
+
+fn worker_loop(
+    worker_id: usize,
+    converter: Arc<dyn FrameConverter>,
+    input_rx: flume::Receiver<InputFrame>,
+    output_tx: flume::Sender<ConvertedFrame>,
+    shutdown: Arc<AtomicBool>,
+    stats: Arc<PoolStats>,
+    drop_strategy: DropStrategy,
+) {
+    debug!("Converter worker {} started", worker_id);
+    let mut local_converted = 0u64;
+    let mut local_errors = 0u64;
+
+    while !shutdown.load(Ordering::Relaxed) {
+        match input_rx.recv_timeout(Duration::from_millis(100)) {
+            Ok(input) => {
+                let sequence = input.sequence;
+                let submit_time = input.submit_time;
+                let convert_start = Instant::now();
+                match converter.convert(input.frame) {
+                    Ok(converted) => {
+                        let conversion_duration = convert_start.elapsed();
+                        local_converted += 1;
+                        stats.frames_converted.fetch_add(1, Ordering::Relaxed);
+
+                        match output_tx.try_send(ConvertedFrame {
+                            frame: converted,
+                            sequence,
+                            submit_time,
+                            conversion_duration,
+                        }) {
+                            Ok(()) => {}
+                            Err(flume::TrySendError::Full(_frame)) => match drop_strategy {
+                                DropStrategy::DropOldest | DropStrategy::DropNewest => {
+                                    stats.frames_dropped.fetch_add(1, Ordering::Relaxed);
+                                }
+                            },
+                            Err(flume::TrySendError::Disconnected(_)) => {
+                                break;
+                            }
+                        }
+                    }
+                    Err(e) => {
+                        local_errors += 1;
+                        if local_errors % 10 == 1 {
+                            warn!(
+                                "Worker {}: conversion error (#{} total): {}",
+                                worker_id, local_errors, e
+                            );
+                        }
+                    }
+                }
+            }
+            Err(flume::RecvTimeoutError::Timeout) => {
+                continue;
+            }
+            Err(flume::RecvTimeoutError::Disconnected) => {
+                break;
+            }
+        }
+    }
+
+    debug!(
+        "Converter worker {} finished: {} converted, {} errors",
+        worker_id, local_converted, local_errors
+    );
+}
+
+impl Drop for AsyncConverterPool {
+    fn drop(&mut self) {
+        self.do_shutdown();
+    }
+}
diff --git a/crates/frame-converter/src/swscale.rs b/crates/frame-converter/src/swscale.rs
new file mode 100644
index 0000000000..eb07b0d7ad
--- /dev/null
+++ b/crates/frame-converter/src/swscale.rs
@@ -0,0 +1,65 @@
+use crate::{ConversionConfig, ConvertError, ConverterBackend, FrameConverter};
+use ffmpeg::{format::Pixel, frame, software::scaling};
+use parking_lot::Mutex;
+
+pub struct SwscaleConverter {
+    context: Mutex<scaling::Context>,
+    output_format: Pixel,
+    output_width: u32,
+    output_height: u32,
+}
+
+impl SwscaleConverter {
+    pub fn new(config: ConversionConfig) -> Result<Self, ConvertError> {
+        let flags = if config.needs_scaling() {
+            scaling::flag::Flags::BICUBIC
+        } else {
+            scaling::flag::Flags::FAST_BILINEAR
+        };
+
+        let context = scaling::Context::get(
+            config.input_format,
+            config.input_width,
+            config.input_height,
+            config.output_format,
+            config.output_width,
+            config.output_height,
+            flags,
+        )
+        .map_err(|_| ConvertError::UnsupportedFormat(config.input_format, config.output_format))?;
+
+        Ok(Self {
+            context: Mutex::new(context),
+            output_format: config.output_format,
+            output_width: config.output_width,
+            output_height: config.output_height,
+        })
+    }
+}
+
+impl FrameConverter for SwscaleConverter {
+    fn convert(&self, input: frame::Video) -> Result<frame::Video, ConvertError> {
+        let pts = input.pts();
+        let mut output =
+            frame::Video::new(self.output_format, self.output_width, self.output_height);
+
+        self.context
+            .lock()
+            .run(&input, &mut output)
+            .map_err(|e| ConvertError::ConversionFailed(e.to_string()))?;
+
+        output.set_pts(pts);
+        Ok(output)
+    }
+
+    fn name(&self) -> &'static str {
+        "swscale"
+    }
+
+    fn backend(&self) -> ConverterBackend {
+        ConverterBackend::Swscale
+    }
+}
+
+unsafe impl Send for SwscaleConverter {}
+unsafe impl Sync for SwscaleConverter {}
diff --git a/crates/frame-converter/src/videotoolbox.rs b/crates/frame-converter/src/videotoolbox.rs
new file mode 100644
index 0000000000..560059e0df
--- /dev/null
+++ b/crates/frame-converter/src/videotoolbox.rs
@@ -0,0 +1,331 @@
+use crate::{ConversionConfig, ConvertError, ConverterBackend, FrameConverter};
+use ffmpeg::{format::Pixel, frame};
+use std::{
+    ffi::c_void,
+    ptr,
+    sync::atomic::{AtomicBool, AtomicU64, Ordering},
+};
+
+type CFAllocatorRef = *const c_void;
+type CFDictionaryRef = *const c_void;
+type CVPixelBufferRef = *mut c_void;
+type VTPixelTransferSessionRef = *mut c_void;
+type OSStatus = i32;
+
+const K_CV_RETURN_SUCCESS: i32 = 0;
+
+const K_CV_PIXEL_FORMAT_TYPE_422_YP_CB_YP_CR8: u32 = 0x79757679;
+const K_CV_PIXEL_FORMAT_TYPE_420_YP_CB_CR8_BI_PLANAR_VIDEO_RANGE: u32 = 0x34323076;
+const K_CV_PIXEL_FORMAT_TYPE_2VUY: u32 = 0x32767579;
+
+#[link(name = "CoreFoundation", kind = "framework")]
+unsafe extern "C" {
+    fn CFRelease(cf: *const c_void);
+}
+
+#[link(name = "CoreVideo", kind = "framework")]
+unsafe extern "C" {
+    fn CVPixelBufferCreate(
+        allocator: CFAllocatorRef,
+        width: usize,
+        height: usize,
+        pixel_format_type: u32,
+        pixel_buffer_attributes: CFDictionaryRef,
+        pixel_buffer_out: *mut CVPixelBufferRef,
+    ) -> i32;
+
+    fn CVPixelBufferCreateWithBytes(
+        allocator: CFAllocatorRef,
+        width: usize,
+        height: usize,
+        pixel_format_type: u32,
+        base_address: *mut c_void,
+        bytes_per_row: usize,
+        release_callback: *const c_void,
+        release_ref_con: *const c_void,
+        pixel_buffer_attributes: CFDictionaryRef,
+        pixel_buffer_out: *mut CVPixelBufferRef,
+    ) -> i32;
+
+    fn CVPixelBufferRelease(pixel_buffer: CVPixelBufferRef);
+
+    fn CVPixelBufferLockBaseAddress(pixel_buffer: CVPixelBufferRef, lock_flags: u64) -> i32;
+    fn CVPixelBufferUnlockBaseAddress(pixel_buffer: CVPixelBufferRef, lock_flags: u64) -> i32;
+
+    fn CVPixelBufferGetBaseAddressOfPlane(pixel_buffer: CVPixelBufferRef, plane: usize) -> *mut u8;
+    fn CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer: CVPixelBufferRef, plane: usize) -> usize;
+    fn CVPixelBufferGetHeightOfPlane(pixel_buffer: CVPixelBufferRef, plane: usize) -> usize;
+    fn CVPixelBufferGetPlaneCount(pixel_buffer: CVPixelBufferRef) -> usize;
+}
+
+#[link(name = "VideoToolbox", kind = "framework")]
+unsafe extern "C" {
+    fn VTPixelTransferSessionCreate(
+        allocator: CFAllocatorRef,
+        pixel_transfer_session_out: *mut VTPixelTransferSessionRef,
+    ) -> OSStatus;
+
+    fn VTPixelTransferSessionInvalidate(session: VTPixelTransferSessionRef);
+
+    fn VTPixelTransferSessionTransferImage(
+        session: VTPixelTransferSessionRef,
+        source_buffer: CVPixelBufferRef,
+        destination_buffer: CVPixelBufferRef,
+    ) -> OSStatus;
+}
+
+fn pixel_to_cv_format(pixel: Pixel) -> Option<u32> {
+    match pixel {
+        Pixel::YUYV422 => Some(K_CV_PIXEL_FORMAT_TYPE_422_YP_CB_YP_CR8),
+        Pixel::UYVY422 => Some(K_CV_PIXEL_FORMAT_TYPE_2VUY),
+        Pixel::NV12 => Some(K_CV_PIXEL_FORMAT_TYPE_420_YP_CB_CR8_BI_PLANAR_VIDEO_RANGE),
+        _ => None,
+    }
+}
+
+pub struct VideoToolboxConverter {
+    session: VTPixelTransferSessionRef,
+    input_format: Pixel,
+    input_cv_format: u32,
+    output_format: Pixel,
+    output_cv_format: u32,
+    input_width: u32,
+    input_height: u32,
+    output_width: u32,
+    output_height: u32,
+    conversion_count: AtomicU64,
+    verified_hardware: AtomicBool,
+}
+
+impl VideoToolboxConverter {
+    pub fn new(config: ConversionConfig) -> Result<Self, ConvertError> {
+        let input_cv_format = pixel_to_cv_format(config.input_format).ok_or_else(|| {
+            ConvertError::UnsupportedFormat(config.input_format, config.output_format)
+        })?;
+
+        let output_cv_format = pixel_to_cv_format(config.output_format).ok_or_else(|| {
+            ConvertError::UnsupportedFormat(config.input_format, config.output_format)
+        })?;
+
+        let mut session: VTPixelTransferSessionRef = ptr::null_mut();
+        let status = unsafe { VTPixelTransferSessionCreate(ptr::null(), &mut session) };
+
+        if status != 0 {
+            return Err(ConvertError::HardwareUnavailable(format!(
+                "VTPixelTransferSessionCreate failed with status: {}",
+                status
+            )));
+        }
+
+        if session.is_null() {
+            return Err(ConvertError::HardwareUnavailable(
+                "VTPixelTransferSessionCreate returned null session".to_string(),
+            ));
+        }
+
+        tracing::debug!(
+            "VideoToolbox converter initialized: {:?} {}x{} -> {:?} {}x{}",
+            config.input_format,
+            config.input_width,
+            config.input_height,
+            config.output_format,
+            config.output_width,
+            config.output_height
+        );
+
+        Ok(Self {
+            session,
+            input_format: config.input_format,
+            input_cv_format,
+            output_format: config.output_format,
+            output_cv_format,
+            input_width: config.input_width,
+            input_height: config.input_height,
+            output_width: config.output_width,
+            output_height: config.output_height,
+            conversion_count: AtomicU64::new(0),
+            verified_hardware: AtomicBool::new(false),
+        })
+    }
+
+    fn create_input_pixel_buffer(
+        &self,
+        input: &frame::Video,
+    ) -> Result<CVPixelBufferRef, ConvertError> {
+        let mut pixel_buffer: CVPixelBufferRef = ptr::null_mut();
+
+        let base_address = input.data(0).as_ptr() as *mut c_void;
+        let bytes_per_row = input.stride(0);
+
+        let status = unsafe {
+            CVPixelBufferCreateWithBytes(
+                ptr::null(),
+                self.input_width as usize,
+                self.input_height as usize,
+                self.input_cv_format,
+                base_address,
+                bytes_per_row,
+                ptr::null(),
+                ptr::null(),
+                ptr::null(),
+                &mut pixel_buffer,
+            )
+        };
+
+        if status != K_CV_RETURN_SUCCESS {
+            return Err(ConvertError::ConversionFailed(format!(
+                "CVPixelBufferCreateWithBytes failed: {}",
+                status
+            )));
+        }
+
+        Ok(pixel_buffer)
+    }
+
+    fn create_output_pixel_buffer(&self) -> Result<CVPixelBufferRef, ConvertError> {
+        let mut pixel_buffer: CVPixelBufferRef = ptr::null_mut();
+
+        let status = unsafe {
+            CVPixelBufferCreate(
+                ptr::null(),
+                self.output_width as usize,
+                self.output_height as usize,
+                self.output_cv_format,
+                ptr::null(),
+                &mut pixel_buffer,
+            )
+        };
+
+        if status != K_CV_RETURN_SUCCESS {
+            return Err(ConvertError::ConversionFailed(format!(
+                "CVPixelBufferCreate failed: {}",
+                status
+            )));
+        }
+
+        Ok(pixel_buffer)
+    }
+
+    fn copy_output_to_frame(
+        &self,
+        pixel_buffer: CVPixelBufferRef,
+    ) -> Result<frame::Video, ConvertError> {
+        unsafe {
+            let lock_status = CVPixelBufferLockBaseAddress(pixel_buffer, 0);
+            if lock_status != K_CV_RETURN_SUCCESS {
+                return Err(ConvertError::ConversionFailed(format!(
+                    "CVPixelBufferLockBaseAddress failed: {}",
+                    lock_status
+                )));
+            }
+        }
+
+        let mut output =
+            frame::Video::new(self.output_format, self.output_width, self.output_height);
+
+        unsafe {
+            let plane_count = CVPixelBufferGetPlaneCount(pixel_buffer);
+
+            for plane in 0..plane_count {
+                let src_ptr = CVPixelBufferGetBaseAddressOfPlane(pixel_buffer, plane);
+                let src_stride = CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer, plane);
+                let height = CVPixelBufferGetHeightOfPlane(pixel_buffer, plane);
+                let dst_stride = output.stride(plane);
+
+                let dst_data = output.data_mut(plane);
+                let dst_ptr = dst_data.as_mut_ptr();
+
+                for row in 0..height {
+                    let src_row = src_ptr.add(row * src_stride);
+                    let dst_row = dst_ptr.add(row * dst_stride);
+                    let copy_len = src_stride.min(dst_stride);
+                    ptr::copy_nonoverlapping(src_row, dst_row, copy_len);
+                }
+            }
+
+            CVPixelBufferUnlockBaseAddress(pixel_buffer, 0);
+        }
+
+        Ok(output)
+    }
+}
+
+impl Drop for VideoToolboxConverter {
+    fn drop(&mut self) {
+        if !self.session.is_null() {
+            unsafe {
+                VTPixelTransferSessionInvalidate(self.session);
+                CFRelease(self.session as *const c_void);
+            }
+        }
+    }
+}
+
+impl FrameConverter for VideoToolboxConverter {
+    fn convert(&self, input: frame::Video) -> Result<frame::Video, ConvertError> {
+        let count = self.conversion_count.fetch_add(1, Ordering::Relaxed);
+
+        if count == 0 {
+            tracing::info!(
+                "VideoToolbox converter first frame: {:?} -> {:?}",
+                self.input_format,
+                self.output_format
+            );
+        }
+
+        let input_buffer = self.create_input_pixel_buffer(&input)?;
+        let output_buffer = self.create_output_pixel_buffer()?;
+
+        let status = unsafe {
+            VTPixelTransferSessionTransferImage(self.session, input_buffer, output_buffer)
+        };
+
+        unsafe {
+            CVPixelBufferRelease(input_buffer);
+        }
+
+        if status != 0 {
+            unsafe {
+                CVPixelBufferRelease(output_buffer);
+            }
+            return Err(ConvertError::ConversionFailed(format!(
+                "VTPixelTransferSessionTransferImage failed: {}",
+                status
+            )));
+        }
+
+        if !self.verified_hardware.swap(true, Ordering::Relaxed) {
+            tracing::info!(
+                "VideoToolbox VTPixelTransferSession succeeded - hardware acceleration confirmed"
+            );
+        }
+
+        let mut result = self.copy_output_to_frame(output_buffer)?;
+        result.set_pts(input.pts());
+
+        unsafe {
+            CVPixelBufferRelease(output_buffer);
+        }
+
+        Ok(result)
+    }
+
+    fn name(&self) -> &'static str {
+        "videotoolbox"
+    }
+
+    fn backend(&self) -> ConverterBackend {
+        ConverterBackend::VideoToolbox
+    }
+
+    fn conversion_count(&self) -> u64 {
+        self.conversion_count.load(Ordering::Relaxed)
+    }
+
+    fn verify_hardware_usage(&self) -> Option<bool> {
+        Some(self.verified_hardware.load(Ordering::Relaxed))
+    }
+}
+
+unsafe impl Send for VideoToolboxConverter {}
+unsafe impl Sync for VideoToolboxConverter {}
diff --git a/crates/project/src/configuration.rs b/crates/project/src/configuration.rs
index 9920f689c5..e5094d7549 100644
--- a/crates/project/src/configuration.rs
+++ b/crates/project/src/configuration.rs
@@ -548,6 +548,130 @@ pub enum ZoomMode {
     Manual { x: f32, y: f32 },
 }
 
+#[derive(Type, Serialize, Deserialize, Clone, Copy, Debug)]
+#[serde(rename_all = "camelCase")]
+pub enum MaskKind {
+    Sensitive,
+    Highlight,
+}
+
+#[derive(Type, Serialize, Deserialize, Clone, Debug, Default)]
+#[serde(rename_all = "camelCase")]
+pub struct MaskScalarKeyframe {
+    pub time: f64,
+    pub value: f64,
+}
+
+#[derive(Type, Serialize, Deserialize, Clone, Debug, Default)]
+#[serde(rename_all = "camelCase")]
+pub struct MaskVectorKeyframe {
+    pub time: f64,
+    pub x: f64,
+    pub y: f64,
+}
+
+#[derive(Type, Serialize, Deserialize, Clone, Debug, Default)]
+#[serde(rename_all = "camelCase")]
+pub struct MaskKeyframes {
+    #[serde(default)]
+    pub position: Vec<MaskVectorKeyframe>,
+    #[serde(default)]
+    pub size: Vec<MaskVectorKeyframe>,
+    #[serde(default)]
+    pub intensity: Vec<MaskScalarKeyframe>,
+}
+
+#[derive(Type, Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "camelCase")]
+pub struct MaskSegment {
+    pub start: f64,
+    pub end: f64,
+    #[serde(default = "MaskSegment::default_enabled")]
+    pub enabled: bool,
+    pub mask_type: MaskKind,
+    pub center: XY<f64>,
+    pub size: XY<f64>,
+    #[serde(default)]
+    pub feather: f64,
+    #[serde(default = "MaskSegment::default_opacity")]
+    pub opacity: f64,
+    #[serde(default)]
+    pub pixelation: f64,
+    #[serde(default)]
+    pub darkness: f64,
+    #[serde(default)]
+    pub keyframes: MaskKeyframes,
+}
+
+impl MaskSegment {
+    fn default_enabled() -> bool {
+        true
+    }
+
+    fn default_opacity() -> f64 {
+        1.0
+    }
+}
+
+#[derive(Type, Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "camelCase")]
+pub struct TextSegment {
+    pub start: f64,
+    pub end: f64,
+    #[serde(default = "TextSegment::default_enabled")]
+    pub enabled: bool,
+    #[serde(default = "TextSegment::default_content")]
+    pub content: String,
+    #[serde(default = "TextSegment::default_center")]
+    pub center: XY<f64>,
+    #[serde(default = "TextSegment::default_size")]
+    pub size: XY<f64>,
+    #[serde(default = "TextSegment::default_font_family")]
+    pub font_family: String,
+    #[serde(default = "TextSegment::default_font_size")]
+    pub font_size: f32,
+    #[serde(default = "TextSegment::default_font_weight")]
+    pub font_weight: f32,
+    #[serde(default)]
+    pub italic: bool,
+    #[serde(default = "TextSegment::default_color")]
+    pub color: String,
+}
+
+impl TextSegment {
+    fn default_enabled() -> bool {
+        true
+    }
+
+    fn default_content() -> String {
+        "Text".to_string()
+    }
+
+    fn default_center() -> XY<f64> {
+        XY::new(0.5, 0.5)
+    }
+
+    fn default_size() -> XY<f64> {
+        XY::new(0.35, 0.2)
+    }
+
+    fn default_font_family() -> String {
+        "sans-serif".to_string()
+    }
+
+    fn default_font_size() -> f32 {
+        48.0
+    }
+
+    fn default_font_weight() -> f32 {
+        700.0
+    }
+
+    fn default_color() -> String {
+        "#ffffff".to_string()
+    }
+}
+
 #[derive(Type, Serialize, Deserialize, Clone, Copy, Debug, Default)]
 #[serde(rename_all = "camelCase")]
 pub enum SceneMode {
@@ -573,6 +697,10 @@ pub struct TimelineConfiguration {
     pub zoom_segments: Vec<ZoomSegment>,
     #[serde(default)]
     pub scene_segments: Vec<SceneSegment>,
+    #[serde(default)]
+    pub mask_segments: Vec<MaskSegment>,
+    #[serde(default)]
+    pub text_segments: Vec<TextSegment>,
 }
 
 impl TimelineConfiguration {
@@ -599,6 +727,14 @@ impl TimelineConfiguration {
 
 pub const WALLPAPERS_PATH: &str = "assets/backgrounds/macOS";
 
+#[derive(Type, Serialize, Deserialize, Clone, Debug, Default)]
+#[serde(rename_all = "camelCase")]
+pub struct CaptionWord {
+    pub text: String,
+    pub start: f32,
+    pub end: f32,
+}
+
 #[derive(Type, Serialize, Deserialize, Clone, Debug, Default)]
 #[serde(rename_all = "camelCase")]
 pub struct CaptionSegment {
@@ -606,6 +742,20 @@ pub struct CaptionSegment {
     pub start: f32,
     pub end: f32,
     pub text: String,
+    #[serde(default)]
+    pub words: Vec<CaptionWord>,
+}
+
+#[derive(Type, Serialize, Deserialize, Clone, Copy, Debug, Default, PartialEq)]
+#[serde(rename_all = "kebab-case")]
+pub enum CaptionPosition {
+    TopLeft,
+    TopCenter,
+    TopRight,
+    #[default]
+    BottomLeft,
+    BottomCenter,
+    BottomRight,
 }
 
 #[derive(Type, Serialize, Deserialize, Clone, Debug)]
@@ -619,6 +769,7 @@ pub struct CaptionSettings {
     pub background_color: String,
     #[serde(alias = "backgroundOpacity")]
     pub background_opacity: u32,
+    #[serde(default)]
     pub position: String,
     pub bold: bool,
     pub italic: bool,
@@ -627,6 +778,26 @@ pub struct CaptionSettings {
     pub outline_color: String,
     #[serde(alias = "exportWithSubtitles")]
     pub export_with_subtitles: bool,
+    #[serde(
+        alias = "highlightColor",
+        default = "CaptionSettings::default_highlight_color"
+    )]
+    pub highlight_color: String,
+    #[serde(
+        alias = "fadeDuration",
+        default = "CaptionSettings::default_fade_duration"
+    )]
+    pub fade_duration: f32,
+}
+
+impl CaptionSettings {
+    fn default_highlight_color() -> String {
+        "#FFFFFF".to_string()
+    }
+
+    fn default_fade_duration() -> f32 {
+        0.15
+    }
 }
 
 impl Default for CaptionSettings {
@@ -635,15 +806,17 @@ impl Default for CaptionSettings {
             enabled: false,
             font: "System Sans-Serif".to_string(),
             size: 24,
-            color: "#FFFFFF".to_string(),
+            color: "#A0A0A0".to_string(),
             background_color: "#000000".to_string(),
-            background_opacity: 80,
-            position: "bottom".to_string(),
-            bold: true,
+            background_opacity: 90,
+            position: "bottom-center".to_string(),
+            bold: false,
             italic: false,
             outline: true,
             outline_color: "#000000".to_string(),
             export_with_subtitles: false,
+            highlight_color: Self::default_highlight_color(),
+            fade_duration: Self::default_fade_duration(),
         }
     }
 }
@@ -811,6 +984,8 @@ pub struct ProjectConfiguration {
     pub clips: Vec<ClipConfiguration>,
     #[serde(default)]
     pub annotations: Vec<Annotation>,
+    #[serde(default, skip_serializing)]
+    pub hidden_text_segments: Vec<usize>,
 }
 
 fn camera_config_needs_migration(value: &Value) -> bool {
diff --git a/crates/recording/Cargo.toml b/crates/recording/Cargo.toml
index e5e1d9ee00..43e712cd34 100644
--- a/crates/recording/Cargo.toml
+++ b/crates/recording/Cargo.toml
@@ -20,6 +20,7 @@ cap-cursor-info = { path = "../cursor-info" }
 cap-camera = { path = "../camera", features = ["serde", "specta"] }
 cap-camera-ffmpeg = { path = "../camera-ffmpeg" }
 cap-enc-ffmpeg = { path = "../enc-ffmpeg" }
+cap-frame-converter = { path = "../frame-converter" }
 cap-timestamp = { path = "../timestamp" }
 scap-ffmpeg = { path = "../scap-ffmpeg" }
 
diff --git a/crates/recording/examples/camera-benchmark.rs b/crates/recording/examples/camera-benchmark.rs
new file mode 100644
index 0000000000..05f00ff419
--- /dev/null
+++ b/crates/recording/examples/camera-benchmark.rs
@@ -0,0 +1,415 @@
+use cap_camera::CameraInfo;
+use cap_recording::{
+    CameraFeed,
+    benchmark::{BenchmarkConfig, EncoderInfo, PipelineMetrics},
+    feeds::camera::{self, DeviceOrModelID},
+};
+use ffmpeg::format::Pixel;
+use kameo::Actor;
+use std::{
+    fmt::Display,
+    time::{Duration, Instant},
+};
+use tracing::{info, warn};
+
+struct CameraSelection(CameraInfo);
+
+impl Display for CameraSelection {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let model_id = self
+            .0
+            .model_id()
+            .map(|m| m.to_string())
+            .unwrap_or_else(|| "unknown".to_string());
+        write!(f, "{} ({})", self.0.display_name(), model_id)
+    }
+}
+
+fn print_camera_info(info: &CameraInfo) {
+    println!("\nCamera: {}", info.display_name());
+    println!("Model ID: {:?}", info.model_id());
+
+    if let Some(formats) = info.formats() {
+        println!("\nAvailable formats:");
+        for (i, format) in formats.into_iter().take(10).enumerate() {
+            println!(
+                "  {}. {}x{} @ {:.1} fps",
+                i + 1,
+                format.width(),
+                format.height(),
+                format.frame_rate()
+            );
+        }
+    }
+}
+
+async fn run_camera_frame_rate_test(
+    camera_info: &CameraInfo,
+    duration_secs: u64,
+) -> (u64, f64, Vec<Duration>) {
+    let feed = CameraFeed::spawn(CameraFeed::default());
+
+    feed.ask(camera::SetInput {
+        id: DeviceOrModelID::from_info(camera_info),
+    })
+    .await
+    .expect("Failed to send SetInput")
+    .await
+    .expect("SetInput failed");
+
+    let (tx, rx) = flume::bounded(256);
+    feed.ask(camera::AddSender(tx))
+        .await
+        .expect("Failed to add sender");
+
+    tokio::time::sleep(Duration::from_millis(100)).await;
+
+    let start = Instant::now();
+    let deadline = start + Duration::from_secs(duration_secs);
+    let mut frame_count = 0u64;
+    let mut inter_frame_times = Vec::new();
+    let mut last_frame_time = Instant::now();
+
+    info!("Measuring camera frame rate for {}s...", duration_secs);
+
+    while Instant::now() < deadline {
+        match rx.recv_timeout(Duration::from_millis(100)) {
+            Ok(_frame) => {
+                let now = Instant::now();
+                if frame_count > 0 {
+                    inter_frame_times.push(now.duration_since(last_frame_time));
+                }
+                last_frame_time = now;
+                frame_count += 1;
+            }
+            Err(flume::RecvTimeoutError::Timeout) => continue,
+            Err(flume::RecvTimeoutError::Disconnected) => break,
+        }
+    }
+
+    let elapsed = start.elapsed();
+    let fps = frame_count as f64 / elapsed.as_secs_f64();
+
+    (frame_count, fps, inter_frame_times)
+}
+
+async fn run_camera_encoding_benchmark(
+    camera_info: &CameraInfo,
+    config: &BenchmarkConfig,
+) -> cap_recording::benchmark::MetricsSnapshot {
+    use cap_frame_converter::{
+        AsyncConverterPool, ConversionConfig, ConverterPoolConfig, DropStrategy,
+    };
+
+    let metrics = PipelineMetrics::new();
+
+    let feed = CameraFeed::spawn(CameraFeed::default());
+
+    feed.ask(camera::SetInput {
+        id: DeviceOrModelID::from_info(camera_info),
+    })
+    .await
+    .expect("Failed to send SetInput")
+    .await
+    .expect("SetInput failed");
+
+    let (tx, rx) = flume::bounded(256);
+    feed.ask(camera::AddSender(tx))
+        .await
+        .expect("Failed to add sender");
+
+    tokio::time::sleep(Duration::from_millis(500)).await;
+
+    let first_frame = rx
+        .recv_timeout(Duration::from_secs(2))
+        .expect("No frame from camera");
+    let input_format = first_frame.inner.format();
+    let width = first_frame.inner.width();
+    let height = first_frame.inner.height();
+
+    println!(
+        "\nCamera frame format: {:?} {}x{}",
+        input_format, width, height
+    );
+
+    let output_format = Pixel::NV12;
+    let needs_conversion = input_format != output_format;
+
+    let pool = if needs_conversion {
+        let conversion_config =
+            ConversionConfig::new(input_format, width, height, output_format, width, height);
+
+        let pool_config = ConverterPoolConfig {
+            worker_count: 4,
+            input_capacity: 120,
+            output_capacity: 90,
+            drop_strategy: DropStrategy::DropOldest,
+        };
+
+        let pool = AsyncConverterPool::from_config(conversion_config, pool_config)
+            .expect("Failed to create converter pool");
+
+        Some(pool)
+    } else {
+        println!("No conversion needed (passthrough)");
+        None
+    };
+
+    let mut output =
+        ffmpeg::format::output_as("/dev/null", "mp4").expect("Failed to create dummy output");
+    let video_info = cap_media_info::VideoInfo {
+        width,
+        height,
+        pixel_format: input_format,
+        frame_rate: (30, 1).into(),
+        time_base: (1, 30).into(),
+    };
+
+    let mut encoder = cap_enc_ffmpeg::h264::H264Encoder::builder(video_info)
+        .with_external_conversion()
+        .build(&mut output)
+        .expect("Failed to create encoder");
+
+    output.write_header().ok();
+
+    info!("Warming up for {}s...", config.warmup_secs);
+    let warmup_deadline = Instant::now() + Duration::from_secs(config.warmup_secs);
+    let mut warmup_submitted = 0u64;
+
+    while Instant::now() < warmup_deadline {
+        if let Ok(frame) = rx.recv_timeout(Duration::from_millis(50)) {
+            if let Some(ref pool) = pool {
+                let _ = pool.submit(frame.inner, 0);
+                warmup_submitted += 1;
+                while let Some(converted) = pool.try_recv() {
+                    let _ = encoder.queue_preconverted_frame(
+                        converted.frame,
+                        Duration::ZERO,
+                        &mut output,
+                    );
+                }
+            } else {
+                let _ = encoder.queue_frame(frame.inner, Duration::ZERO, &mut output);
+            }
+        }
+    }
+
+    if let Some(ref pool) = pool {
+        let drain_start = Instant::now();
+        loop {
+            let stats = pool.stats();
+            let pending =
+                warmup_submitted.saturating_sub(stats.frames_converted + stats.frames_dropped);
+            if pending == 0 || drain_start.elapsed() > Duration::from_secs(1) {
+                break;
+            }
+            if let Some(converted) = pool.try_recv() {
+                let _ =
+                    encoder.queue_preconverted_frame(converted.frame, Duration::ZERO, &mut output);
+            } else {
+                std::thread::sleep(Duration::from_millis(5));
+            }
+        }
+        while pool.try_recv().is_some() {}
+    }
+
+    info!("Running benchmark for {}s...", config.duration_secs);
+    metrics.start();
+
+    let benchmark_deadline = Instant::now() + Duration::from_secs(config.duration_secs);
+    let mut sequence = 1u64;
+
+    while Instant::now() < benchmark_deadline {
+        match rx.recv_timeout(Duration::from_millis(50)) {
+            Ok(frame) => {
+                metrics.record_frame_received();
+
+                if let Some(ref pool) = pool {
+                    match pool.submit(frame.inner, sequence) {
+                        Ok(()) => {}
+                        Err(_) => {
+                            metrics.record_dropped_input();
+                            continue;
+                        }
+                    }
+
+                    while let Some(converted) = pool.try_recv() {
+                        if converted.sequence == 0 {
+                            continue;
+                        }
+
+                        metrics.record_frame_converted(converted.conversion_duration);
+
+                        let encode_start = Instant::now();
+                        let timestamp =
+                            Duration::from_micros(converted.frame.pts().unwrap_or(0) as u64);
+
+                        match encoder.queue_preconverted_frame(
+                            converted.frame,
+                            timestamp,
+                            &mut output,
+                        ) {
+                            Ok(()) => {
+                                let encode_duration = encode_start.elapsed();
+                                let pipeline_latency = converted.submit_time.elapsed();
+                                metrics.record_frame_encoded(encode_duration, pipeline_latency);
+                            }
+                            Err(e) => {
+                                warn!("Encode error: {}", e);
+                                metrics.record_dropped_output();
+                            }
+                        }
+                    }
+                } else {
+                    let encode_start = Instant::now();
+                    let timestamp = Duration::from_micros(sequence * 33333);
+
+                    match encoder.queue_frame(frame.inner, timestamp, &mut output) {
+                        Ok(()) => {
+                            let encode_duration = encode_start.elapsed();
+                            metrics.record_frame_encoded(encode_duration, encode_duration);
+                        }
+                        Err(e) => {
+                            warn!("Encode error: {}", e);
+                            metrics.record_dropped_output();
+                        }
+                    }
+                }
+
+                sequence += 1;
+            }
+            Err(flume::RecvTimeoutError::Timeout) => continue,
+            Err(flume::RecvTimeoutError::Disconnected) => break,
+        }
+    }
+
+    if let Some(pool) = pool {
+        let drain_deadline = Instant::now() + Duration::from_secs(2);
+        while Instant::now() < drain_deadline {
+            if let Some(converted) = pool.recv_timeout(Duration::from_millis(50)) {
+                if converted.sequence == 0 {
+                    continue;
+                }
+
+                metrics.record_frame_converted(converted.conversion_duration);
+
+                let encode_start = Instant::now();
+                let timestamp = Duration::from_micros(converted.frame.pts().unwrap_or(0) as u64);
+                match encoder.queue_preconverted_frame(converted.frame, timestamp, &mut output) {
+                    Ok(()) => {
+                        let encode_duration = encode_start.elapsed();
+                        let pipeline_latency = converted.submit_time.elapsed();
+                        metrics.record_frame_encoded(encode_duration, pipeline_latency);
+                    }
+                    Err(_) => {}
+                }
+            } else {
+                break;
+            }
+        }
+    }
+
+    metrics.stop();
+    let _ = encoder.flush(&mut output);
+    let _ = output.write_trailer();
+
+    metrics.snapshot()
+}
+
+#[tokio::main]
+async fn main() {
+    ffmpeg::init().expect("Failed to init ffmpeg");
+    tracing_subscriber::fmt::init();
+
+    println!("=== Cap Camera Encoding Benchmark ===\n");
+
+    let encoder_info = EncoderInfo::detect();
+    encoder_info.print_info();
+
+    println!(
+        "\nCPU Cores: {}",
+        std::thread::available_parallelism()
+            .map(|p| p.get())
+            .unwrap_or(1)
+    );
+    println!("Platform: {}\n", std::env::consts::OS);
+
+    let cameras: Vec<_> = cap_camera::list_cameras().map(CameraSelection).collect();
+
+    if cameras.is_empty() {
+        println!("No cameras found!");
+        return;
+    }
+
+    println!("Available cameras:");
+    for (i, cam) in cameras.iter().enumerate() {
+        println!("  {}. {}", i + 1, cam);
+    }
+
+    let args: Vec<String> = std::env::args().collect();
+
+    let camera_index = args
+        .iter()
+        .position(|a| a == "--camera")
+        .and_then(|i| args.get(i + 1))
+        .and_then(|s| s.parse::<usize>().ok())
+        .map(|i| i.saturating_sub(1))
+        .unwrap_or(0);
+
+    let camera = cameras.get(camera_index).expect("Invalid camera index");
+    print_camera_info(&camera.0);
+
+    let config = BenchmarkConfig {
+        duration_secs: args
+            .iter()
+            .position(|a| a == "--duration")
+            .and_then(|i| args.get(i + 1))
+            .and_then(|s| s.parse().ok())
+            .unwrap_or(10),
+        warmup_secs: 2,
+        target_fps: 30,
+        camera_resolution: None,
+        output_json: args.contains(&"--json".to_string()),
+    };
+
+    println!("\n=== Frame Rate Test ===");
+    let (frames, fps, inter_frame_times) = run_camera_frame_rate_test(&camera.0, 3).await;
+    println!("Frames captured: {}", frames);
+    println!("Average FPS: {:.1}", fps);
+
+    if !inter_frame_times.is_empty() {
+        let avg_interval: Duration =
+            inter_frame_times.iter().sum::<Duration>() / inter_frame_times.len() as u32;
+        let max_interval = inter_frame_times.iter().max().unwrap();
+        let min_interval = inter_frame_times.iter().min().unwrap();
+
+        println!("Inter-frame timing:");
+        println!("  Average: {:?}", avg_interval);
+        println!("  Min: {:?}", min_interval);
+        println!("  Max: {:?}", max_interval);
+
+        let mut sorted = inter_frame_times.clone();
+        sorted.sort();
+        let p99_idx = (sorted.len() as f64 * 0.99) as usize;
+        if p99_idx < sorted.len() {
+            println!("  P99: {:?}", sorted[p99_idx]);
+        }
+
+        let jitter: f64 = inter_frame_times
+            .iter()
+            .map(|d| (d.as_secs_f64() - avg_interval.as_secs_f64()).powi(2))
+            .sum::<f64>()
+            / inter_frame_times.len() as f64;
+        println!("  Jitter (stddev): {:.2}ms", jitter.sqrt() * 1000.0);
+    }
+
+    println!("\n=== Full Encoding Pipeline Benchmark ===");
+    let result = run_camera_encoding_benchmark(&camera.0, &config).await;
+
+    result.print_report();
+
+    if config.output_json {
+        println!("\n--- JSON Output ---");
+        println!("{}", result.to_json());
+    }
+}
diff --git a/crates/recording/examples/encoding-benchmark.rs b/crates/recording/examples/encoding-benchmark.rs
new file mode 100644
index 0000000000..6b1219566f
--- /dev/null
+++ b/crates/recording/examples/encoding-benchmark.rs
@@ -0,0 +1,420 @@
+use cap_frame_converter::{
+    AsyncConverterPool, ConversionConfig, ConverterPoolConfig, DropStrategy,
+};
+use cap_recording::benchmark::{BenchmarkConfig, EncoderInfo, MetricsSnapshot, PipelineMetrics};
+use ffmpeg::format::Pixel;
+use std::{
+    sync::{
+        Arc,
+        atomic::{AtomicU64, Ordering},
+    },
+    time::{Duration, Instant},
+};
+use tracing::info;
+
+fn create_test_frame(
+    format: Pixel,
+    width: u32,
+    height: u32,
+    frame_num: u64,
+) -> ffmpeg::frame::Video {
+    let mut frame = ffmpeg::frame::Video::new(format, width, height);
+    for plane_idx in 0..frame.planes() {
+        let data = frame.data_mut(plane_idx);
+        for (i, byte) in data.iter_mut().enumerate() {
+            *byte = ((i
+                .wrapping_mul(17)
+                .wrapping_add(plane_idx * 31)
+                .wrapping_add(frame_num as usize))
+                % 256) as u8;
+        }
+    }
+    frame.set_pts(Some((frame_num * 33333) as i64));
+    frame
+}
+
+struct MockEncoder {
+    encode_time: Duration,
+    frames_encoded: AtomicU64,
+}
+
+impl MockEncoder {
+    fn new(encode_time: Duration) -> Self {
+        Self {
+            encode_time,
+            frames_encoded: AtomicU64::new(0),
+        }
+    }
+
+    fn encode(&self, _frame: ffmpeg::frame::Video) -> Duration {
+        let start = Instant::now();
+        std::thread::sleep(self.encode_time);
+        self.frames_encoded.fetch_add(1, Ordering::Relaxed);
+        start.elapsed()
+    }
+}
+
+fn run_synthetic_benchmark(
+    config: &BenchmarkConfig,
+    input_format: Pixel,
+    output_format: Pixel,
+    width: u32,
+    height: u32,
+    simulated_encode_time: Duration,
+    worker_count: usize,
+) -> MetricsSnapshot {
+    let metrics = PipelineMetrics::new();
+    let conversion_config =
+        ConversionConfig::new(input_format, width, height, output_format, width, height);
+
+    let pool_config = ConverterPoolConfig {
+        worker_count,
+        input_capacity: 120,
+        output_capacity: 90,
+        drop_strategy: DropStrategy::DropOldest,
+    };
+
+    let pool = AsyncConverterPool::from_config(conversion_config, pool_config)
+        .expect("Failed to create converter pool");
+
+    let encoder = Arc::new(MockEncoder::new(simulated_encode_time));
+
+    let frame_interval = Duration::from_secs_f64(1.0 / config.target_fps as f64);
+    let total_frames = config.duration_secs * config.target_fps as u64;
+    let warmup_frames = config.warmup_secs * config.target_fps as u64;
+
+    info!(
+        "Running synthetic benchmark: {}x{} {:?} -> {:?}",
+        width, height, input_format, output_format
+    );
+    info!(
+        "Duration: {}s, Target FPS: {}, Simulated encode time: {:?}",
+        config.duration_secs, config.target_fps, simulated_encode_time
+    );
+
+    if config.warmup_secs > 0 {
+        info!("Warmup: {} frames...", warmup_frames);
+        for i in 0..warmup_frames {
+            let frame = create_test_frame(input_format, width, height, i);
+            let _ = pool.submit(frame, i);
+            std::thread::sleep(frame_interval);
+            while let Some(converted) = pool.try_recv() {
+                let _ = encoder.encode(converted.frame);
+            }
+        }
+        std::thread::sleep(Duration::from_millis(100));
+        while pool.try_recv().is_some() {}
+    }
+
+    metrics.start();
+
+    let start = Instant::now();
+    let mut next_frame_time = start;
+    let mut frame_sequence = 0u64;
+
+    for _ in 0..total_frames {
+        let now = Instant::now();
+        if now < next_frame_time {
+            std::thread::sleep(next_frame_time - now);
+        }
+        next_frame_time += frame_interval;
+
+        let frame = create_test_frame(input_format, width, height, frame_sequence);
+        let receive_time = Instant::now();
+        metrics.record_frame_received();
+
+        match pool.submit(frame, frame_sequence) {
+            Ok(()) => {}
+            Err(_) => {
+                metrics.record_dropped_input();
+            }
+        }
+
+        while let Some(converted) = pool.try_recv() {
+            let conversion_end = Instant::now();
+            let conversion_duration = conversion_end.duration_since(receive_time);
+            metrics.record_frame_converted(conversion_duration);
+
+            let encode_start = Instant::now();
+            let encode_duration = encoder.encode(converted.frame);
+            let pipeline_latency = encode_start.elapsed() + conversion_duration;
+            metrics.record_frame_encoded(encode_duration, pipeline_latency);
+        }
+
+        frame_sequence += 1;
+    }
+
+    let drain_deadline = Instant::now() + Duration::from_secs(5);
+    while Instant::now() < drain_deadline {
+        if let Some(converted) = pool.recv_timeout(Duration::from_millis(100)) {
+            let encode_start = Instant::now();
+            let encode_duration = encoder.encode(converted.frame);
+            metrics.record_frame_converted(Duration::from_millis(1));
+            metrics.record_frame_encoded(encode_duration, encode_start.elapsed());
+        } else {
+            let stats = pool.stats();
+            let pending = stats
+                .frames_received
+                .saturating_sub(stats.frames_converted + stats.frames_dropped);
+            if pending == 0 {
+                break;
+            }
+        }
+    }
+
+    metrics.stop();
+    let pool_stats = pool.stats();
+
+    let snapshot = metrics.snapshot();
+
+    info!(
+        "Pool stats: received={}, converted={}, dropped={}",
+        pool_stats.frames_received, pool_stats.frames_converted, pool_stats.frames_dropped
+    );
+
+    snapshot
+}
+
+fn benchmark_conversion_formats(config: &BenchmarkConfig) {
+    let formats = [
+        (
+            Pixel::UYVY422,
+            Pixel::NV12,
+            "UYVY422 -> NV12 (macOS camera typical)",
+        ),
+        (
+            Pixel::YUYV422,
+            Pixel::NV12,
+            "YUYV422 -> NV12 (Windows camera typical)",
+        ),
+        (Pixel::BGRA, Pixel::NV12, "BGRA -> NV12 (screen capture)"),
+        (Pixel::NV12, Pixel::NV12, "NV12 -> NV12 (passthrough)"),
+        (Pixel::YUV420P, Pixel::NV12, "YUV420P -> NV12"),
+    ];
+
+    println!("\n=== Format Conversion Benchmarks ===\n");
+
+    for (input, output, name) in formats {
+        println!("Testing: {}", name);
+
+        let mut cfg = config.clone();
+        cfg.duration_secs = 5;
+
+        let result = run_synthetic_benchmark(
+            &cfg,
+            input,
+            output,
+            1920,
+            1080,
+            Duration::from_micros(500),
+            4,
+        );
+
+        println!(
+            "  FPS: {:.1}, Dropped: {} ({:.2}%), Avg Conv: {:?}",
+            result.effective_fps(),
+            result.total_frames_dropped(),
+            result.drop_rate(),
+            result.avg_conversion_time().unwrap_or_default()
+        );
+        println!();
+    }
+}
+
+fn benchmark_encode_times(config: &BenchmarkConfig) {
+    let encode_times = [
+        Duration::from_micros(100),
+        Duration::from_micros(500),
+        Duration::from_millis(1),
+        Duration::from_millis(2),
+        Duration::from_millis(5),
+        Duration::from_millis(10),
+        Duration::from_millis(16),
+        Duration::from_millis(33),
+    ];
+
+    println!("\n=== Encode Time Impact Analysis ===\n");
+    println!("Testing how different encode times affect frame drops at 30 FPS");
+    println!("(Frame budget at 30 FPS = 33.3ms)\n");
+
+    for encode_time in encode_times {
+        let mut cfg = config.clone();
+        cfg.duration_secs = 5;
+
+        let result = run_synthetic_benchmark(
+            &cfg,
+            Pixel::UYVY422,
+            Pixel::NV12,
+            1920,
+            1080,
+            encode_time,
+            4,
+        );
+
+        let status = if result.drop_rate() < 1.0 {
+            "✓"
+        } else {
+            "✗"
+        };
+
+        println!(
+            "  {:>6?} encode: FPS {:.1}, Drops {:.1}% {}",
+            encode_time,
+            result.effective_fps(),
+            result.drop_rate(),
+            status
+        );
+    }
+}
+
+fn benchmark_worker_counts(config: &BenchmarkConfig) {
+    let worker_counts = [1, 2, 4, 6, 8];
+
+    println!("\n=== Worker Count Optimization ===\n");
+    println!("Testing conversion pool with different worker counts\n");
+
+    for workers in worker_counts {
+        let mut cfg = config.clone();
+        cfg.duration_secs = 5;
+
+        let result = run_synthetic_benchmark(
+            &cfg,
+            Pixel::UYVY422,
+            Pixel::NV12,
+            1920,
+            1080,
+            Duration::from_millis(2),
+            workers,
+        );
+
+        println!(
+            "  {} workers: FPS {:.1}, Drops {:.1}%, Avg Conv {:?}",
+            workers,
+            result.effective_fps(),
+            result.drop_rate(),
+            result.avg_conversion_time().unwrap_or_default()
+        );
+    }
+}
+
+fn benchmark_resolutions(config: &BenchmarkConfig) {
+    let resolutions = [
+        (640, 480, "480p"),
+        (1280, 720, "720p"),
+        (1920, 1080, "1080p"),
+        (2560, 1440, "1440p"),
+        (3840, 2160, "4K"),
+    ];
+
+    println!("\n=== Resolution Impact ===\n");
+
+    for (width, height, name) in resolutions {
+        let mut cfg = config.clone();
+        cfg.duration_secs = 5;
+
+        let result = run_synthetic_benchmark(
+            &cfg,
+            Pixel::UYVY422,
+            Pixel::NV12,
+            width,
+            height,
+            Duration::from_millis(2),
+            4,
+        );
+
+        println!(
+            "  {} ({}x{}): FPS {:.1}, Drops {:.1}%, Conv {:?}",
+            name,
+            width,
+            height,
+            result.effective_fps(),
+            result.drop_rate(),
+            result.avg_conversion_time().unwrap_or_default()
+        );
+    }
+}
+
+fn run_full_benchmark(config: &BenchmarkConfig) {
+    println!("\n=== Full Production Simulation ===\n");
+
+    let encoder_info = EncoderInfo::detect();
+    encoder_info.print_info();
+    println!();
+
+    let result = run_synthetic_benchmark(
+        config,
+        Pixel::UYVY422,
+        Pixel::NV12,
+        1920,
+        1080,
+        Duration::from_millis(2),
+        4,
+    );
+
+    result.print_report();
+
+    if config.output_json {
+        println!("\n--- JSON Output ---");
+        println!("{}", result.to_json());
+    }
+}
+
+fn main() {
+    ffmpeg::init().expect("Failed to init ffmpeg");
+    tracing_subscriber::fmt::init();
+
+    println!("=== Cap Encoding Pipeline Benchmark ===\n");
+
+    let encoder_info = EncoderInfo::detect();
+    encoder_info.print_info();
+
+    println!("\nSystem Info:");
+    println!(
+        "  CPU Cores: {}",
+        std::thread::available_parallelism()
+            .map(|p| p.get())
+            .unwrap_or(1)
+    );
+    println!("  Platform: {}", std::env::consts::OS);
+    println!();
+
+    let args: Vec<String> = std::env::args().collect();
+
+    let config = BenchmarkConfig {
+        duration_secs: args
+            .iter()
+            .position(|a| a == "--duration")
+            .and_then(|i| args.get(i + 1))
+            .and_then(|s| s.parse().ok())
+            .unwrap_or(10),
+        warmup_secs: 2,
+        target_fps: args
+            .iter()
+            .position(|a| a == "--fps")
+            .and_then(|i| args.get(i + 1))
+            .and_then(|s| s.parse().ok())
+            .unwrap_or(30),
+        camera_resolution: None,
+        output_json: args.contains(&"--json".to_string()),
+    };
+
+    let mode = args
+        .iter()
+        .position(|a| a == "--mode")
+        .and_then(|i| args.get(i + 1).map(|s| s.as_str()))
+        .unwrap_or("full");
+
+    match mode {
+        "formats" => benchmark_conversion_formats(&config),
+        "encode" => benchmark_encode_times(&config),
+        "workers" => benchmark_worker_counts(&config),
+        "resolutions" => benchmark_resolutions(&config),
+        "full" | _ => {
+            benchmark_conversion_formats(&config);
+            benchmark_encode_times(&config);
+            benchmark_worker_counts(&config);
+            benchmark_resolutions(&config);
+            run_full_benchmark(&config);
+        }
+    }
+}
diff --git a/crates/recording/examples/recording-benchmark.rs b/crates/recording/examples/recording-benchmark.rs
new file mode 100644
index 0000000000..75cf115769
--- /dev/null
+++ b/crates/recording/examples/recording-benchmark.rs
@@ -0,0 +1,290 @@
+use cap_recording::{
+    CameraFeed,
+    benchmark::{BenchmarkConfig, EncoderInfo},
+    feeds::camera::{self, DeviceOrModelID},
+    screen_capture::ScreenCaptureTarget,
+};
+use kameo::Actor;
+use scap_targets::Display;
+use std::{
+    sync::Arc,
+    time::{Duration, Instant},
+};
+use tracing::info;
+
+async fn run_recording_benchmark(
+    config: &BenchmarkConfig,
+    include_camera: bool,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let encoder_info = EncoderInfo::detect();
+    encoder_info.print_info();
+
+    let dir = tempfile::tempdir()?;
+    info!("Recording to: {}", dir.path().display());
+
+    let mut builder = cap_recording::studio_recording::Actor::builder(
+        dir.path().into(),
+        ScreenCaptureTarget::Display {
+            id: Display::primary().id(),
+        },
+    );
+
+    if include_camera {
+        if let Some(camera_info) = cap_camera::list_cameras().next() {
+            println!("\nUsing camera: {}", camera_info.display_name());
+
+            let feed = CameraFeed::spawn(CameraFeed::default());
+
+            feed.ask(camera::SetInput {
+                id: DeviceOrModelID::from_info(&camera_info),
+            })
+            .await?
+            .await?;
+
+            tokio::time::sleep(Duration::from_millis(500)).await;
+
+            let lock = feed.ask(camera::Lock).await?;
+            builder = builder.with_camera_feed(Arc::new(lock));
+        } else {
+            println!("\nNo camera found, running without camera");
+        }
+    }
+
+    println!("\nStarting recording...");
+    let start = Instant::now();
+
+    let handle = builder
+        .build(
+            #[cfg(target_os = "macos")]
+            cidre::sc::ShareableContent::current().await?,
+        )
+        .await?;
+
+    tokio::time::sleep(Duration::from_secs(config.duration_secs)).await;
+
+    println!("Stopping recording...");
+    let stop_start = Instant::now();
+
+    let result = handle.stop().await?;
+    let stop_duration = stop_start.elapsed();
+    let total_duration = start.elapsed();
+
+    println!("\n=== Recording Benchmark Results ===\n");
+    println!("Recording duration: {:.2}s", config.duration_secs);
+    println!("Stop/finalize time: {:?}", stop_duration);
+    println!("Total time: {:?}", total_duration);
+    println!("Output path: {}", result.project_path.display());
+
+    let content_dir = result
+        .project_path
+        .join("content")
+        .join("segments")
+        .join("segment-0");
+
+    if let Ok(metadata) = std::fs::metadata(content_dir.join("display.mp4")) {
+        let size_mb = metadata.len() as f64 / (1024.0 * 1024.0);
+        let bitrate_mbps = size_mb * 8.0 / config.duration_secs as f64;
+        println!("\nScreen recording:");
+        println!("  Size: {:.2} MB", size_mb);
+        println!("  Bitrate: {:.2} Mbps", bitrate_mbps);
+    }
+
+    if include_camera {
+        if let Ok(metadata) = std::fs::metadata(content_dir.join("camera.mp4")) {
+            let size_mb = metadata.len() as f64 / (1024.0 * 1024.0);
+            let bitrate_mbps = size_mb * 8.0 / config.duration_secs as f64;
+            println!("\nCamera recording:");
+            println!("  Size: {:.2} MB", size_mb);
+            println!("  Bitrate: {:.2} Mbps", bitrate_mbps);
+        }
+    }
+
+    std::mem::forget(dir);
+
+    Ok(())
+}
+
+async fn run_pause_resume_benchmark(duration_secs: u64) -> Result<(), Box<dyn std::error::Error>> {
+    println!("\n=== Pause/Resume Benchmark ===\n");
+
+    let dir = tempfile::tempdir()?;
+
+    let handle = cap_recording::studio_recording::Actor::builder(
+        dir.path().into(),
+        ScreenCaptureTarget::Display {
+            id: Display::primary().id(),
+        },
+    )
+    .build(
+        #[cfg(target_os = "macos")]
+        cidre::sc::ShareableContent::current().await?,
+    )
+    .await?;
+
+    let segment_duration = duration_secs / 4;
+
+    println!("Recording segment 1...");
+    tokio::time::sleep(Duration::from_secs(segment_duration)).await;
+
+    println!("Pausing...");
+    let pause_start = Instant::now();
+    handle.pause().await?;
+    println!("Pause took: {:?}", pause_start.elapsed());
+
+    tokio::time::sleep(Duration::from_secs(1)).await;
+
+    println!("Resuming...");
+    let resume_start = Instant::now();
+    handle.resume().await?;
+    println!("Resume took: {:?}", resume_start.elapsed());
+
+    println!("Recording segment 2...");
+    tokio::time::sleep(Duration::from_secs(segment_duration)).await;
+
+    println!("Stopping...");
+    let stop_start = Instant::now();
+    let _ = handle.stop().await?;
+    println!("Stop took: {:?}", stop_start.elapsed());
+
+    std::mem::forget(dir);
+
+    Ok(())
+}
+
+async fn stress_test_recording(
+    cycles: u32,
+    cycle_duration_secs: u64,
+) -> Result<(), Box<dyn std::error::Error>> {
+    println!("\n=== Recording Stress Test ===\n");
+    println!(
+        "Running {} cycles of {}s recordings\n",
+        cycles, cycle_duration_secs
+    );
+
+    let mut start_times = Vec::new();
+    let mut stop_times = Vec::new();
+
+    for i in 0..cycles {
+        println!("Cycle {}/{}...", i + 1, cycles);
+
+        let dir = tempfile::tempdir()?;
+
+        let start = Instant::now();
+        let handle = cap_recording::studio_recording::Actor::builder(
+            dir.path().into(),
+            ScreenCaptureTarget::Display {
+                id: Display::primary().id(),
+            },
+        )
+        .build(
+            #[cfg(target_os = "macos")]
+            cidre::sc::ShareableContent::current().await?,
+        )
+        .await?;
+        start_times.push(start.elapsed());
+
+        tokio::time::sleep(Duration::from_secs(cycle_duration_secs)).await;
+
+        let stop_start = Instant::now();
+        let _ = handle.stop().await?;
+        stop_times.push(stop_start.elapsed());
+
+        std::mem::forget(dir);
+    }
+
+    println!("\n=== Stress Test Results ===\n");
+
+    let avg_start: Duration = start_times.iter().sum::<Duration>() / cycles;
+    let max_start = start_times.iter().max().unwrap();
+    let min_start = start_times.iter().min().unwrap();
+
+    println!("Start times:");
+    println!("  Average: {:?}", avg_start);
+    println!("  Min: {:?}", min_start);
+    println!("  Max: {:?}", max_start);
+
+    let avg_stop: Duration = stop_times.iter().sum::<Duration>() / cycles;
+    let max_stop = stop_times.iter().max().unwrap();
+    let min_stop = stop_times.iter().min().unwrap();
+
+    println!("\nStop times:");
+    println!("  Average: {:?}", avg_stop);
+    println!("  Min: {:?}", min_stop);
+    println!("  Max: {:?}", max_stop);
+
+    Ok(())
+}
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    unsafe { std::env::set_var("RUST_LOG", "info") };
+    tracing_subscriber::fmt::init();
+
+    println!("=== Cap Recording Benchmark ===\n");
+
+    let args: Vec<String> = std::env::args().collect();
+
+    let config = BenchmarkConfig {
+        duration_secs: args
+            .iter()
+            .position(|a| a == "--duration")
+            .and_then(|i| args.get(i + 1))
+            .and_then(|s| s.parse().ok())
+            .unwrap_or(10),
+        warmup_secs: 0,
+        target_fps: 30,
+        camera_resolution: None,
+        output_json: args.contains(&"--json".to_string()),
+    };
+
+    let mode = args
+        .iter()
+        .position(|a| a == "--mode")
+        .and_then(|i| args.get(i + 1).map(|s| s.as_str()))
+        .unwrap_or("full");
+
+    let include_camera = args.contains(&"--camera".to_string());
+
+    match mode {
+        "screen" => {
+            println!("Mode: Screen only recording");
+            run_recording_benchmark(&config, false).await?;
+        }
+        "camera" => {
+            println!("Mode: Screen + Camera recording");
+            run_recording_benchmark(&config, true).await?;
+        }
+        "pause" => {
+            println!("Mode: Pause/Resume test");
+            run_pause_resume_benchmark(config.duration_secs).await?;
+        }
+        "stress" => {
+            println!("Mode: Stress test");
+            let cycles = args
+                .iter()
+                .position(|a| a == "--cycles")
+                .and_then(|i| args.get(i + 1))
+                .and_then(|s| s.parse().ok())
+                .unwrap_or(5);
+            stress_test_recording(cycles, config.duration_secs).await?;
+        }
+        "full" | _ => {
+            println!("Mode: Full benchmark suite\n");
+
+            println!("--- Screen Recording ---");
+            run_recording_benchmark(&config, false).await?;
+
+            if include_camera {
+                println!("\n--- Screen + Camera Recording ---");
+                run_recording_benchmark(&config, true).await?;
+            }
+
+            println!("\n--- Pause/Resume Test ---");
+            run_pause_resume_benchmark(8).await?;
+        }
+    }
+
+    println!("\n=== Benchmark Complete ===");
+
+    Ok(())
+}
diff --git a/crates/recording/src/benchmark.rs b/crates/recording/src/benchmark.rs
new file mode 100644
index 0000000000..411c9d6d72
--- /dev/null
+++ b/crates/recording/src/benchmark.rs
@@ -0,0 +1,484 @@
+use std::{
+    sync::{
+        Arc, RwLock,
+        atomic::{AtomicU64, Ordering},
+    },
+    time::{Duration, Instant},
+};
+
+#[derive(Debug, Clone)]
+pub struct FrameTiming {
+    pub receive_time: Instant,
+    pub conversion_start: Option<Instant>,
+    pub conversion_end: Option<Instant>,
+    pub encode_start: Option<Instant>,
+    pub encode_end: Option<Instant>,
+    pub sequence: u64,
+}
+
+impl FrameTiming {
+    pub fn new(sequence: u64) -> Self {
+        Self {
+            receive_time: Instant::now(),
+            conversion_start: None,
+            conversion_end: None,
+            encode_start: None,
+            encode_end: None,
+            sequence,
+        }
+    }
+
+    pub fn conversion_duration(&self) -> Option<Duration> {
+        match (self.conversion_start, self.conversion_end) {
+            (Some(start), Some(end)) => Some(end.duration_since(start)),
+            _ => None,
+        }
+    }
+
+    pub fn encode_duration(&self) -> Option<Duration> {
+        match (self.encode_start, self.encode_end) {
+            (Some(start), Some(end)) => Some(end.duration_since(start)),
+            _ => None,
+        }
+    }
+
+    pub fn total_pipeline_latency(&self) -> Option<Duration> {
+        self.encode_end
+            .map(|end| end.duration_since(self.receive_time))
+    }
+}
+
+#[derive(Default)]
+pub struct PipelineMetrics {
+    pub frames_received: AtomicU64,
+    pub frames_converted: AtomicU64,
+    pub frames_encoded: AtomicU64,
+    pub frames_dropped_input: AtomicU64,
+    pub frames_dropped_output: AtomicU64,
+    pub frames_dropped_conversion: AtomicU64,
+    conversion_times_ns: RwLock<Vec<u64>>,
+    encode_times_ns: RwLock<Vec<u64>>,
+    pipeline_latencies_ns: RwLock<Vec<u64>>,
+    pub start_time: RwLock<Option<Instant>>,
+    pub end_time: RwLock<Option<Instant>>,
+}
+
+impl PipelineMetrics {
+    pub fn new() -> Arc<Self> {
+        Arc::new(Self::default())
+    }
+
+    pub fn start(&self) {
+        *self.start_time.write().unwrap() = Some(Instant::now());
+    }
+
+    pub fn stop(&self) {
+        *self.end_time.write().unwrap() = Some(Instant::now());
+    }
+
+    pub fn record_frame_received(&self) {
+        self.frames_received.fetch_add(1, Ordering::Relaxed);
+    }
+
+    pub fn record_frame_converted(&self, duration: Duration) {
+        self.frames_converted.fetch_add(1, Ordering::Relaxed);
+        self.conversion_times_ns
+            .write()
+            .unwrap()
+            .push(duration.as_nanos() as u64);
+    }
+
+    pub fn record_frame_encoded(&self, encode_duration: Duration, pipeline_latency: Duration) {
+        self.frames_encoded.fetch_add(1, Ordering::Relaxed);
+        self.encode_times_ns
+            .write()
+            .unwrap()
+            .push(encode_duration.as_nanos() as u64);
+        self.pipeline_latencies_ns
+            .write()
+            .unwrap()
+            .push(pipeline_latency.as_nanos() as u64);
+    }
+
+    pub fn record_dropped_input(&self) {
+        self.frames_dropped_input.fetch_add(1, Ordering::Relaxed);
+    }
+
+    pub fn record_dropped_output(&self) {
+        self.frames_dropped_output.fetch_add(1, Ordering::Relaxed);
+    }
+
+    pub fn record_dropped_conversion(&self) {
+        self.frames_dropped_conversion
+            .fetch_add(1, Ordering::Relaxed);
+    }
+
+    pub fn snapshot(&self) -> MetricsSnapshot {
+        let conversion_times = self.conversion_times_ns.read().unwrap().clone();
+        let encode_times = self.encode_times_ns.read().unwrap().clone();
+        let pipeline_latencies = self.pipeline_latencies_ns.read().unwrap().clone();
+
+        let start = *self.start_time.read().unwrap();
+        let end = *self.end_time.read().unwrap();
+        let duration = match (start, end) {
+            (Some(s), Some(e)) => Some(e.duration_since(s)),
+            (Some(s), None) => Some(Instant::now().duration_since(s)),
+            _ => None,
+        };
+
+        MetricsSnapshot {
+            frames_received: self.frames_received.load(Ordering::Relaxed),
+            frames_converted: self.frames_converted.load(Ordering::Relaxed),
+            frames_encoded: self.frames_encoded.load(Ordering::Relaxed),
+            frames_dropped_input: self.frames_dropped_input.load(Ordering::Relaxed),
+            frames_dropped_output: self.frames_dropped_output.load(Ordering::Relaxed),
+            frames_dropped_conversion: self.frames_dropped_conversion.load(Ordering::Relaxed),
+            conversion_times_ns: conversion_times,
+            encode_times_ns: encode_times,
+            pipeline_latencies_ns: pipeline_latencies,
+            duration,
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct MetricsSnapshot {
+    pub frames_received: u64,
+    pub frames_converted: u64,
+    pub frames_encoded: u64,
+    pub frames_dropped_input: u64,
+    pub frames_dropped_output: u64,
+    pub frames_dropped_conversion: u64,
+    pub conversion_times_ns: Vec<u64>,
+    pub encode_times_ns: Vec<u64>,
+    pub pipeline_latencies_ns: Vec<u64>,
+    pub duration: Option<Duration>,
+}
+
+impl MetricsSnapshot {
+    pub fn total_frames_dropped(&self) -> u64 {
+        self.frames_dropped_input + self.frames_dropped_output + self.frames_dropped_conversion
+    }
+
+    pub fn drop_rate(&self) -> f64 {
+        if self.frames_received == 0 {
+            0.0
+        } else {
+            self.total_frames_dropped() as f64 / self.frames_received as f64 * 100.0
+        }
+    }
+
+    pub fn effective_fps(&self) -> f64 {
+        match self.duration {
+            Some(d) if d.as_secs_f64() > 0.0 => self.frames_encoded as f64 / d.as_secs_f64(),
+            _ => 0.0,
+        }
+    }
+
+    pub fn avg_conversion_time(&self) -> Option<Duration> {
+        if self.conversion_times_ns.is_empty() {
+            None
+        } else {
+            let sum: u64 = self.conversion_times_ns.iter().sum();
+            Some(Duration::from_nanos(
+                sum / self.conversion_times_ns.len() as u64,
+            ))
+        }
+    }
+
+    pub fn avg_encode_time(&self) -> Option<Duration> {
+        if self.encode_times_ns.is_empty() {
+            None
+        } else {
+            let sum: u64 = self.encode_times_ns.iter().sum();
+            Some(Duration::from_nanos(
+                sum / self.encode_times_ns.len() as u64,
+            ))
+        }
+    }
+
+    pub fn avg_pipeline_latency(&self) -> Option<Duration> {
+        if self.pipeline_latencies_ns.is_empty() {
+            None
+        } else {
+            let sum: u64 = self.pipeline_latencies_ns.iter().sum();
+            Some(Duration::from_nanos(
+                sum / self.pipeline_latencies_ns.len() as u64,
+            ))
+        }
+    }
+
+    pub fn percentile_encode_time(&self, p: f64) -> Option<Duration> {
+        percentile_duration(&self.encode_times_ns, p)
+    }
+
+    pub fn percentile_conversion_time(&self, p: f64) -> Option<Duration> {
+        percentile_duration(&self.conversion_times_ns, p)
+    }
+
+    pub fn percentile_pipeline_latency(&self, p: f64) -> Option<Duration> {
+        percentile_duration(&self.pipeline_latencies_ns, p)
+    }
+
+    pub fn max_encode_time(&self) -> Option<Duration> {
+        self.encode_times_ns
+            .iter()
+            .max()
+            .map(|&ns| Duration::from_nanos(ns))
+    }
+
+    pub fn max_conversion_time(&self) -> Option<Duration> {
+        self.conversion_times_ns
+            .iter()
+            .max()
+            .map(|&ns| Duration::from_nanos(ns))
+    }
+
+    pub fn max_pipeline_latency(&self) -> Option<Duration> {
+        self.pipeline_latencies_ns
+            .iter()
+            .max()
+            .map(|&ns| Duration::from_nanos(ns))
+    }
+
+    pub fn print_report(&self) {
+        println!("\n=== Pipeline Performance Report ===\n");
+
+        if let Some(duration) = self.duration {
+            println!("Recording Duration: {:.2}s", duration.as_secs_f64());
+        }
+
+        println!("\n--- Frame Statistics ---");
+        println!("  Frames Received:    {}", self.frames_received);
+        println!("  Frames Converted:   {}", self.frames_converted);
+        println!("  Frames Encoded:     {}", self.frames_encoded);
+        println!("  Effective FPS:      {:.1}", self.effective_fps());
+
+        println!("\n--- Drop Statistics ---");
+        println!("  Dropped (Input):      {}", self.frames_dropped_input);
+        println!("  Dropped (Conversion): {}", self.frames_dropped_conversion);
+        println!("  Dropped (Output):     {}", self.frames_dropped_output);
+        println!("  Total Dropped:        {}", self.total_frames_dropped());
+        println!("  Drop Rate:            {:.2}%", self.drop_rate());
+
+        println!("\n--- Conversion Timing ---");
+        if let Some(avg) = self.avg_conversion_time() {
+            println!("  Average:  {:?}", avg);
+        }
+        if let Some(p50) = self.percentile_conversion_time(50.0) {
+            println!("  P50:      {:?}", p50);
+        }
+        if let Some(p95) = self.percentile_conversion_time(95.0) {
+            println!("  P95:      {:?}", p95);
+        }
+        if let Some(p99) = self.percentile_conversion_time(99.0) {
+            println!("  P99:      {:?}", p99);
+        }
+        if let Some(max) = self.max_conversion_time() {
+            println!("  Max:      {:?}", max);
+        }
+
+        println!("\n--- Encoding Timing ---");
+        if let Some(avg) = self.avg_encode_time() {
+            println!("  Average:  {:?}", avg);
+        }
+        if let Some(p50) = self.percentile_encode_time(50.0) {
+            println!("  P50:      {:?}", p50);
+        }
+        if let Some(p95) = self.percentile_encode_time(95.0) {
+            println!("  P95:      {:?}", p95);
+        }
+        if let Some(p99) = self.percentile_encode_time(99.0) {
+            println!("  P99:      {:?}", p99);
+        }
+        if let Some(max) = self.max_encode_time() {
+            println!("  Max:      {:?}", max);
+        }
+
+        println!("\n--- Total Pipeline Latency ---");
+        if let Some(avg) = self.avg_pipeline_latency() {
+            println!("  Average:  {:?}", avg);
+        }
+        if let Some(p50) = self.percentile_pipeline_latency(50.0) {
+            println!("  P50:      {:?}", p50);
+        }
+        if let Some(p95) = self.percentile_pipeline_latency(95.0) {
+            println!("  P95:      {:?}", p95);
+        }
+        if let Some(p99) = self.percentile_pipeline_latency(99.0) {
+            println!("  P99:      {:?}", p99);
+        }
+        if let Some(max) = self.max_pipeline_latency() {
+            println!("  Max:      {:?}", max);
+        }
+
+        println!("\n--- Performance Assessment ---");
+        let can_sustain_30fps = self.effective_fps() >= 29.5;
+        let low_drop_rate = self.drop_rate() < 1.0;
+        let acceptable_latency = self
+            .percentile_pipeline_latency(95.0)
+            .map(|l| l < Duration::from_millis(100))
+            .unwrap_or(true);
+
+        println!(
+            "  Can sustain 30 FPS:   {}",
+            if can_sustain_30fps {
+                "YES ✓"
+            } else {
+                "NO ✗"
+            }
+        );
+        println!(
+            "  Low drop rate (<1%):  {}",
+            if low_drop_rate { "YES ✓" } else { "NO ✗" }
+        );
+        println!(
+            "  Acceptable latency:   {}",
+            if acceptable_latency {
+                "YES ✓"
+            } else {
+                "NO ✗"
+            }
+        );
+
+        println!("\n=== End Report ===\n");
+    }
+
+    pub fn to_json(&self) -> String {
+        serde_json::json!({
+            "duration_secs": self.duration.map(|d| d.as_secs_f64()),
+            "frames": {
+                "received": self.frames_received,
+                "converted": self.frames_converted,
+                "encoded": self.frames_encoded,
+                "dropped_input": self.frames_dropped_input,
+                "dropped_conversion": self.frames_dropped_conversion,
+                "dropped_output": self.frames_dropped_output,
+                "total_dropped": self.total_frames_dropped(),
+                "drop_rate_percent": self.drop_rate(),
+                "effective_fps": self.effective_fps(),
+            },
+            "conversion_ms": {
+                "avg": self.avg_conversion_time().map(|d| d.as_secs_f64() * 1000.0),
+                "p50": self.percentile_conversion_time(50.0).map(|d| d.as_secs_f64() * 1000.0),
+                "p95": self.percentile_conversion_time(95.0).map(|d| d.as_secs_f64() * 1000.0),
+                "p99": self.percentile_conversion_time(99.0).map(|d| d.as_secs_f64() * 1000.0),
+                "max": self.max_conversion_time().map(|d| d.as_secs_f64() * 1000.0),
+            },
+            "encoding_ms": {
+                "avg": self.avg_encode_time().map(|d| d.as_secs_f64() * 1000.0),
+                "p50": self.percentile_encode_time(50.0).map(|d| d.as_secs_f64() * 1000.0),
+                "p95": self.percentile_encode_time(95.0).map(|d| d.as_secs_f64() * 1000.0),
+                "p99": self.percentile_encode_time(99.0).map(|d| d.as_secs_f64() * 1000.0),
+                "max": self.max_encode_time().map(|d| d.as_secs_f64() * 1000.0),
+            },
+            "pipeline_latency_ms": {
+                "avg": self.avg_pipeline_latency().map(|d| d.as_secs_f64() * 1000.0),
+                "p50": self.percentile_pipeline_latency(50.0).map(|d| d.as_secs_f64() * 1000.0),
+                "p95": self.percentile_pipeline_latency(95.0).map(|d| d.as_secs_f64() * 1000.0),
+                "p99": self.percentile_pipeline_latency(99.0).map(|d| d.as_secs_f64() * 1000.0),
+                "max": self.max_pipeline_latency().map(|d| d.as_secs_f64() * 1000.0),
+            },
+        })
+        .to_string()
+    }
+}
+
+fn percentile_duration(times_ns: &[u64], p: f64) -> Option<Duration> {
+    if times_ns.is_empty() {
+        return None;
+    }
+    let mut sorted: Vec<u64> = times_ns.to_vec();
+    sorted.sort_unstable();
+    let idx = ((p / 100.0) * (sorted.len() - 1) as f64).round() as usize;
+    Some(Duration::from_nanos(sorted[idx]))
+}
+
+#[derive(Debug, Clone)]
+pub struct BenchmarkConfig {
+    pub duration_secs: u64,
+    pub warmup_secs: u64,
+    pub target_fps: u32,
+    pub camera_resolution: Option<(u32, u32)>,
+    pub output_json: bool,
+}
+
+impl Default for BenchmarkConfig {
+    fn default() -> Self {
+        Self {
+            duration_secs: 10,
+            warmup_secs: 2,
+            target_fps: 30,
+            camera_resolution: None,
+            output_json: false,
+        }
+    }
+}
+
+pub struct EncoderInfo {
+    pub name: String,
+    pub is_hardware: bool,
+    pub gpu_type: Option<String>,
+}
+
+impl EncoderInfo {
+    pub fn detect() -> Self {
+        #[cfg(target_os = "macos")]
+        {
+            Self {
+                name: "h264_videotoolbox".to_string(),
+                is_hardware: true,
+                gpu_type: Some(detect_macos_gpu()),
+            }
+        }
+
+        #[cfg(target_os = "windows")]
+        {
+            let (name, gpu) = detect_windows_encoder();
+            Self {
+                name,
+                is_hardware: true,
+                gpu_type: gpu,
+            }
+        }
+
+        #[cfg(not(any(target_os = "macos", target_os = "windows")))]
+        {
+            Self {
+                name: "libx264".to_string(),
+                is_hardware: false,
+                gpu_type: None,
+            }
+        }
+    }
+
+    pub fn print_info(&self) {
+        println!("Encoder: {} (Hardware: {})", self.name, self.is_hardware);
+        if let Some(gpu) = &self.gpu_type {
+            println!("GPU: {}", gpu);
+        }
+    }
+}
+
+#[cfg(target_os = "macos")]
+fn detect_macos_gpu() -> String {
+    "Apple Silicon / Intel UHD".to_string()
+}
+
+#[cfg(target_os = "windows")]
+fn detect_windows_encoder() -> (String, Option<String>) {
+    let encoders = ["h264_nvenc", "h264_qsv", "h264_amf", "h264_mf"];
+    for name in encoders {
+        if ffmpeg::codec::encoder::find_by_name(name).is_some() {
+            let gpu = match name {
+                "h264_nvenc" => Some("NVIDIA".to_string()),
+                "h264_qsv" => Some("Intel QuickSync".to_string()),
+                "h264_amf" => Some("AMD".to_string()),
+                "h264_mf" => Some("Windows Media Foundation".to_string()),
+                _ => None,
+            };
+            return (name.to_string(), gpu);
+        }
+    }
+    ("libx264".to_string(), None)
+}
diff --git a/crates/recording/src/feeds/camera.rs b/crates/recording/src/feeds/camera.rs
index e8baae2948..8de67cc35a 100644
--- a/crates/recording/src/feeds/camera.rs
+++ b/crates/recording/src/feeds/camera.rs
@@ -19,6 +19,7 @@ use tokio::{runtime::Runtime, sync::oneshot, task::LocalSet};
 use tracing::{debug, error, info, trace, warn};
 
 use crate::ffmpeg::FFmpegVideoFrame;
+use crate::output_pipeline::NativeCameraFrame;
 
 const CAMERA_INIT_TIMEOUT: Duration = Duration::from_secs(4);
 
@@ -26,6 +27,7 @@ const CAMERA_INIT_TIMEOUT: Duration = Duration::from_secs(4);
 pub struct CameraFeed {
     state: State,
     senders: Vec<flume::Sender<FFmpegVideoFrame>>,
+    native_senders: Vec<flume::Sender<NativeCameraFrame>>,
     on_ready: Vec<oneshot::Sender<()>>,
     on_disconnect: Vec<Box<dyn Fn() + Send>>,
 }
@@ -139,6 +141,7 @@ impl Default for CameraFeed {
                 attached: None,
             }),
             senders: Vec::new(),
+            native_senders: Vec::new(),
             on_ready: Vec::new(),
             on_disconnect: Vec::new(),
         }
@@ -203,6 +206,8 @@ pub struct RemoveInput;
 
 pub struct AddSender(pub flume::Sender<FFmpegVideoFrame>);
 
+pub struct AddNativeSender(pub flume::Sender<NativeCameraFrame>);
+
 pub struct ListenForReady(pub oneshot::Sender<()>);
 
 pub struct OnFeedDisconnect(pub Box<dyn Fn() + Send>);
@@ -239,6 +244,8 @@ struct LockedCameraInputReconnected {
 
 struct NewFrame(FFmpegVideoFrame);
 
+struct NewNativeFrame(NativeCameraFrame);
+
 struct Unlock;
 
 struct FinalizePendingRelease {
@@ -249,6 +256,7 @@ fn spawn_camera_setup(
     id: DeviceOrModelID,
     actor_ref: ActorRef<CameraFeed>,
     new_frame_recipient: Recipient<NewFrame>,
+    native_frame_recipient: Recipient<NewNativeFrame>,
     flow: CameraSetupFlow,
 ) -> (ReadyFuture, SyncSender<()>) {
     let (ready_tx, ready_rx) = oneshot::channel::<Result<InputConnected, SetInputError>>();
@@ -269,7 +277,9 @@ fn spawn_camera_setup(
 
     std::thread::spawn(move || {
         LocalSet::new().block_on(&runtime, async move {
-            let handle = match setup_camera(&id, new_frame_recipient).await {
+            let setup_result = setup_camera(&id, new_frame_recipient, native_frame_recipient).await;
+
+            let handle = match setup_result {
                 Ok(result) => {
                     let SetupCameraResult {
                         handle,
@@ -342,15 +352,22 @@ fn spawn_camera_setup(
                 }
             };
 
-            trace!("Waiting for camera to be done");
+            info!(
+                "Camera capture thread: waiting for done signal for {:?}",
+                &id
+            );
 
-            let _ = done_rx_thread.recv();
+            drop(done_tx_thread);
+            let recv_result = done_rx_thread.recv();
 
-            trace!("Stoppping capture of {:?}", &id);
+            warn!(
+                "Camera capture thread: done signal received for {:?}, result={:?}",
+                &id, recv_result
+            );
 
             let _ = handle.stop_capturing();
 
-            info!("Stopped capture of {:?}", &id);
+            warn!("Camera capture thread: stopped capture of {:?}", &id);
         })
     });
 
@@ -394,11 +411,11 @@ struct SetupCameraResult {
     video_info: VideoInfo,
 }
 
-async fn setup_camera(
-    id: &DeviceOrModelID,
-    recipient: Recipient<NewFrame>,
-) -> Result<SetupCameraResult, SetInputError> {
-    let camera = find_camera(id).ok_or(SetInputError::DeviceNotFound)?;
+static CAMERA_CALLBACK_COUNTER: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0);
+
+fn select_camera_format(
+    camera: &cap_camera::CameraInfo,
+) -> Result<cap_camera::Format, SetInputError> {
     let formats = camera.formats().ok_or(SetInputError::InvalidFormat)?;
     if formats.is_empty() {
         return Err(SetInputError::InvalidFormat);
@@ -433,8 +450,17 @@ async fn setup_camera(
             .then(fr_cmp.unwrap_or(Ordering::Equal).reverse())
     });
 
-    let format = ideal_formats.swap_remove(0);
+    Ok(ideal_formats.swap_remove(0))
+}
 
+#[cfg(target_os = "macos")]
+async fn setup_camera(
+    id: &DeviceOrModelID,
+    recipient: Recipient<NewFrame>,
+    native_recipient: Recipient<NewNativeFrame>,
+) -> Result<SetupCameraResult, SetInputError> {
+    let camera = find_camera(id).ok_or(SetInputError::DeviceNotFound)?;
+    let format = select_camera_format(&camera)?;
     let frame_rate = format.frame_rate() as u32;
 
     let (ready_tx, ready_rx) = oneshot::channel();
@@ -442,6 +468,20 @@ async fn setup_camera(
 
     let capture_handle = camera
         .start_capturing(format.clone(), move |frame| {
+            let callback_num =
+                CAMERA_CALLBACK_COUNTER.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
+
+            let timestamp = Timestamp::MachAbsoluteTime(cap_timestamp::MachAbsoluteTimestamp::new(
+                cidre::cm::Clock::convert_host_time_to_sys_units(frame.native().sample_buf().pts()),
+            ));
+
+            let _ = native_recipient
+                .tell(NewNativeFrame(NativeCameraFrame {
+                    sample_buf: frame.native().sample_buf().retained(),
+                    timestamp,
+                }))
+                .try_send();
+
             let Ok(mut ff_frame) = frame.as_ffmpeg() else {
                 return;
             };
@@ -459,25 +499,129 @@ async fn setup_camera(
                 let _ = signal.send(video_info);
             }
 
-            let _ = recipient
+            let send_result = recipient
                 .tell(NewFrame(FFmpegVideoFrame {
                     inner: ff_frame,
-                    #[cfg(windows)]
-                    timestamp: Timestamp::PerformanceCounter(
-                        cap_timestamp::PerformanceCounterTimestamp::new(
-                            frame.native().perf_counter,
-                        ),
-                    ),
-                    #[cfg(target_os = "macos")]
-                    timestamp: Timestamp::MachAbsoluteTime(
-                        cap_timestamp::MachAbsoluteTimestamp::new(
-                            cidre::cm::Clock::convert_host_time_to_sys_units(
-                                frame.native().sample_buf().pts(),
-                            ),
-                        ),
-                    ),
+                    timestamp,
                 }))
                 .try_send();
+
+            if callback_num % 30 == 0 {
+                tracing::debug!(
+                    "Camera callback: sent frame {} to actor, result={:?}",
+                    callback_num,
+                    send_result.is_ok()
+                );
+            }
+
+            if send_result.is_err() && callback_num % 30 == 0 {
+                tracing::warn!(
+                    "Camera callback: failed to send frame {} to actor (mailbox full?)",
+                    callback_num
+                );
+            }
+        })
+        .map_err(|e| SetInputError::StartCapturing(e.to_string()))?;
+
+    let video_info = tokio::time::timeout(CAMERA_INIT_TIMEOUT, ready_rx)
+        .await
+        .map_err(|e| SetInputError::Timeout(e.to_string()))?
+        .map_err(|_| SetInputError::Initialisation)?;
+
+    Ok(SetupCameraResult {
+        handle: capture_handle,
+        camera_info: camera,
+        video_info,
+    })
+}
+
+#[cfg(not(target_os = "macos"))]
+async fn setup_camera(
+    id: &DeviceOrModelID,
+    recipient: Recipient<NewFrame>,
+    native_recipient: Recipient<NewNativeFrame>,
+) -> Result<SetupCameraResult, SetInputError> {
+    let camera = find_camera(id).ok_or(SetInputError::DeviceNotFound)?;
+    let format = select_camera_format(&camera)?;
+    let frame_rate = format.frame_rate() as u32;
+
+    let (ready_tx, ready_rx) = oneshot::channel();
+    let mut ready_signal = Some(ready_tx);
+
+    let capture_handle = camera
+        .start_capturing(format.clone(), move |frame| {
+            let callback_num =
+                CAMERA_CALLBACK_COUNTER.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
+
+            let timestamp = Timestamp::PerformanceCounter(
+                cap_timestamp::PerformanceCounterTimestamp::new(frame.native().perf_counter),
+            );
+
+            if let Ok(bytes) = frame.native().bytes() {
+                use cap_mediafoundation_utils::IMFMediaBufferExt;
+                use windows::Win32::Media::MediaFoundation::{
+                    IMFMediaBuffer, MFCreateMemoryBuffer,
+                };
+                use windows::core::Interface;
+
+                let data_len = bytes.len();
+                if let Ok(buffer) = (unsafe { MFCreateMemoryBuffer(data_len as u32) }) {
+                    if let Ok(mut lock) = buffer.lock_mut() {
+                        lock.copy_from_slice(&*bytes);
+                        drop(lock);
+                        let _ = unsafe { buffer.SetCurrentLength(data_len as u32) };
+
+                        let _ = native_recipient
+                            .tell(NewNativeFrame(NativeCameraFrame {
+                                buffer,
+                                pixel_format: frame.native().pixel_format,
+                                width: frame.native().width as u32,
+                                height: frame.native().height as u32,
+                                timestamp,
+                            }))
+                            .try_send();
+                    }
+                }
+            }
+
+            let Ok(mut ff_frame) = frame.as_ffmpeg() else {
+                return;
+            };
+
+            ff_frame.set_pts(Some(frame.timestamp.as_micros() as i64));
+
+            if let Some(signal) = ready_signal.take() {
+                let video_info = VideoInfo::from_raw_ffmpeg(
+                    ff_frame.format(),
+                    ff_frame.width(),
+                    ff_frame.height(),
+                    frame_rate,
+                );
+
+                let _ = signal.send(video_info);
+            }
+
+            let send_result = recipient
+                .tell(NewFrame(FFmpegVideoFrame {
+                    inner: ff_frame,
+                    timestamp,
+                }))
+                .try_send();
+
+            if callback_num % 30 == 0 {
+                tracing::debug!(
+                    "Camera callback: sent frame {} to actor, result={:?}",
+                    callback_num,
+                    send_result.is_ok()
+                );
+            }
+
+            if send_result.is_err() && callback_num % 30 == 0 {
+                tracing::warn!(
+                    "Camera callback: failed to send frame {} to actor (mailbox full?)",
+                    callback_num
+                );
+            }
         })
         .map_err(|e| SetInputError::StartCapturing(e.to_string()))?;
 
@@ -509,12 +653,14 @@ impl Message<SetInput> for CameraFeed {
             State::Open(state) => {
                 let actor_ref = ctx.actor_ref();
                 let new_frame_recipient = actor_ref.clone().recipient();
+                let native_frame_recipient = actor_ref.clone().recipient();
                 let id = msg.id.clone();
 
                 let (ready, _done_tx) = spawn_camera_setup(
                     id.clone(),
                     actor_ref,
                     new_frame_recipient,
+                    native_frame_recipient,
                     CameraSetupFlow::Open,
                 );
 
@@ -534,11 +680,13 @@ impl Message<SetInput> for CameraFeed {
 
                 let actor_ref = ctx.actor_ref();
                 let new_frame_recipient = actor_ref.clone().recipient();
+                let native_frame_recipient = actor_ref.clone().recipient();
 
                 let (ready, _done_tx) = spawn_camera_setup(
                     msg.id.clone(),
                     actor_ref,
                     new_frame_recipient,
+                    native_frame_recipient,
                     CameraSetupFlow::Locked,
                 );
 
@@ -582,6 +730,19 @@ impl Message<AddSender> for CameraFeed {
     }
 }
 
+impl Message<AddNativeSender> for CameraFeed {
+    type Reply = ();
+
+    async fn handle(
+        &mut self,
+        msg: AddNativeSender,
+        _: &mut Context<Self, Self::Reply>,
+    ) -> Self::Reply {
+        debug!("CameraFeed: Adding new native sender");
+        self.native_senders.push(msg.0);
+    }
+}
+
 impl Message<ListenForReady> for CameraFeed {
     type Reply = ();
 
@@ -616,21 +777,43 @@ impl Message<OnFeedDisconnect> for CameraFeed {
     }
 }
 
+static CAMERA_FRAME_COUNTER: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0);
+
 impl Message<NewFrame> for CameraFeed {
     type Reply = ();
 
     async fn handle(&mut self, msg: NewFrame, _: &mut Context<Self, Self::Reply>) -> Self::Reply {
+        let frame_num = CAMERA_FRAME_COUNTER.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
+
+        if frame_num % 30 == 0 {
+            debug!(
+                "CameraFeed: received frame {}, broadcasting to {} senders",
+                frame_num,
+                self.senders.len()
+            );
+        }
+
         let mut to_remove = vec![];
 
         for (i, sender) in self.senders.iter().enumerate() {
-            if let Err(flume::TrySendError::Disconnected(_)) = sender.try_send(msg.0.clone()) {
-                warn!("Camera sender {} disconnected, will be removed", i);
-                info!(
-                    "Camera sender {} disconnected (rx dropped), removing from list",
-                    i
-                );
-                to_remove.push(i);
-            };
+            match sender.try_send(msg.0.clone()) {
+                Ok(()) => {}
+                Err(flume::TrySendError::Full(_)) => {
+                    if frame_num % 30 == 0 {
+                        warn!(
+                            "Camera sender {} channel full at frame {}, dropping frame",
+                            i, frame_num
+                        );
+                    }
+                }
+                Err(flume::TrySendError::Disconnected(_)) => {
+                    warn!(
+                        "Camera sender {} disconnected at frame {}, will be removed",
+                        i, frame_num
+                    );
+                    to_remove.push(i);
+                }
+            }
         }
 
         if !to_remove.is_empty() {
@@ -642,6 +825,63 @@ impl Message<NewFrame> for CameraFeed {
     }
 }
 
+static NATIVE_CAMERA_FRAME_COUNTER: std::sync::atomic::AtomicU64 =
+    std::sync::atomic::AtomicU64::new(0);
+
+impl Message<NewNativeFrame> for CameraFeed {
+    type Reply = ();
+
+    async fn handle(
+        &mut self,
+        msg: NewNativeFrame,
+        _: &mut Context<Self, Self::Reply>,
+    ) -> Self::Reply {
+        let frame_num =
+            NATIVE_CAMERA_FRAME_COUNTER.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
+
+        if frame_num % 30 == 0 {
+            debug!(
+                "CameraFeed: received native frame {}, broadcasting to {} native senders",
+                frame_num,
+                self.native_senders.len()
+            );
+        }
+
+        let mut to_remove = vec![];
+
+        for (i, sender) in self.native_senders.iter().enumerate() {
+            match sender.try_send(msg.0.clone()) {
+                Ok(()) => {}
+                Err(flume::TrySendError::Full(_)) => {
+                    if frame_num % 30 == 0 {
+                        warn!(
+                            "Native camera sender {} channel full at frame {}, dropping frame",
+                            i, frame_num
+                        );
+                    }
+                }
+                Err(flume::TrySendError::Disconnected(_)) => {
+                    warn!(
+                        "Native camera sender {} disconnected at frame {}, will be removed",
+                        i, frame_num
+                    );
+                    to_remove.push(i);
+                }
+            }
+        }
+
+        if !to_remove.is_empty() {
+            debug!(
+                "Removing {} disconnected native camera senders",
+                to_remove.len()
+            );
+            for i in to_remove.into_iter().rev() {
+                self.native_senders.swap_remove(i);
+            }
+        }
+    }
+}
+
 #[derive(Clone, Debug, thiserror::Error)]
 pub enum LockFeedError {
     #[error(transparent)]
diff --git a/crates/recording/src/lib.rs b/crates/recording/src/lib.rs
index 0b7f013835..e57f795aa6 100644
--- a/crates/recording/src/lib.rs
+++ b/crates/recording/src/lib.rs
@@ -1,3 +1,4 @@
+pub mod benchmark;
 mod capture_pipeline;
 pub mod cursor;
 pub mod feeds;
diff --git a/crates/recording/src/output_pipeline/async_camera.rs b/crates/recording/src/output_pipeline/async_camera.rs
new file mode 100644
index 0000000000..c62021f872
--- /dev/null
+++ b/crates/recording/src/output_pipeline/async_camera.rs
@@ -0,0 +1,293 @@
+use crate::{
+    TaskPool,
+    output_pipeline::{AudioFrame, AudioMuxer, Muxer, VideoMuxer},
+};
+use anyhow::{Context, anyhow};
+use cap_enc_ffmpeg::{aac::AACEncoder, h264::*};
+use cap_frame_converter::{
+    AsyncConverterPool, ConversionConfig, ConvertError, ConverterPoolConfig, DropStrategy,
+};
+use cap_media_info::{AudioInfo, VideoInfo};
+use std::{
+    path::PathBuf,
+    sync::{
+        Arc,
+        atomic::{AtomicBool, AtomicU64, Ordering},
+    },
+    time::Duration,
+};
+use tracing::{debug, info, trace, warn};
+
+use super::FFmpegVideoFrame;
+
+pub struct AsyncCameraMp4Muxer {
+    output: ffmpeg::format::context::Output,
+    video_encoder: Option<H264Encoder>,
+    audio_encoder: Option<AACEncoder>,
+    converter_pool: Option<AsyncConverterPool>,
+    frame_sequence: AtomicU64,
+    use_preconverted: bool,
+    last_pts: Option<i64>,
+    frames_submitted: u64,
+    frames_encoded: u64,
+}
+
+pub struct AsyncCameraMuxerConfig {
+    pub worker_count: usize,
+    pub input_capacity: usize,
+    pub output_capacity: usize,
+}
+
+impl Default for AsyncCameraMuxerConfig {
+    fn default() -> Self {
+        Self {
+            worker_count: 4,
+            input_capacity: 120,
+            output_capacity: 90,
+        }
+    }
+}
+
+impl Muxer for AsyncCameraMp4Muxer {
+    type Config = AsyncCameraMuxerConfig;
+
+    async fn setup(
+        config: Self::Config,
+        output_path: PathBuf,
+        video_config: Option<VideoInfo>,
+        audio_config: Option<AudioInfo>,
+        _: Arc<AtomicBool>,
+        _: &mut TaskPool,
+    ) -> anyhow::Result<Self>
+    where
+        Self: Sized,
+    {
+        let mut output = ffmpeg::format::output(&output_path)?;
+
+        let (video_encoder, converter_pool, use_preconverted) =
+            if let Some(video_config) = video_config {
+                let encoder = H264Encoder::builder(video_config)
+                    .with_external_conversion()
+                    .build(&mut output)
+                    .context("video encoder")?;
+
+                let requirements = encoder.conversion_requirements();
+
+                let pool = if requirements.needs_conversion {
+                    let conversion_config = ConversionConfig::new(
+                        requirements.input_format,
+                        requirements.input_width,
+                        requirements.input_height,
+                        requirements.output_format,
+                        requirements.output_width,
+                        requirements.output_height,
+                    );
+
+                    let pool_config = ConverterPoolConfig {
+                        worker_count: config.worker_count,
+                        input_capacity: config.input_capacity,
+                        output_capacity: config.output_capacity,
+                        drop_strategy: DropStrategy::DropOldest,
+                    };
+
+                    let pool = AsyncConverterPool::from_config(conversion_config, pool_config)
+                        .map_err(|e| anyhow!("Failed to create converter pool: {e}"))?;
+
+                    info!(
+                        "Created async converter pool with {} workers for camera encoding",
+                        config.worker_count
+                    );
+
+                    Some(pool)
+                } else {
+                    debug!("No conversion needed for camera encoding");
+                    None
+                };
+
+                (Some(encoder), pool, requirements.needs_conversion)
+            } else {
+                (None, None, false)
+            };
+
+        let audio_encoder = audio_config
+            .map(|config| AACEncoder::init(config, &mut output))
+            .transpose()
+            .context("audio encoder")?;
+
+        output.write_header()?;
+
+        Ok(Self {
+            output,
+            video_encoder,
+            audio_encoder,
+            converter_pool,
+            frame_sequence: AtomicU64::new(0),
+            use_preconverted,
+            last_pts: None,
+            frames_submitted: 0,
+            frames_encoded: 0,
+        })
+    }
+
+    fn finish(&mut self, _: Duration) -> anyhow::Result<anyhow::Result<()>> {
+        if let Some(mut pool) = self.converter_pool.take() {
+            let initial_stats = pool.stats();
+            info!(
+                "Converter pool finishing: {} received, {} converted, {} dropped so far (muxer: submitted={}, encoded={})",
+                initial_stats.frames_received,
+                initial_stats.frames_converted,
+                initial_stats.frames_dropped,
+                self.frames_submitted,
+                self.frames_encoded
+            );
+
+            let mut frames_encoded_in_drain = 0u64;
+            let drain_timeout = Duration::from_secs(5);
+
+            if let Some(encoder) = &mut self.video_encoder {
+                let encoder_ref = encoder;
+                let output_ref = &mut self.output;
+                let last_pts = &mut self.last_pts;
+
+                pool.drain_with_timeout(
+                    |converted| {
+                        let pts = converted.frame.pts().unwrap_or(0);
+                        if last_pts.map_or(true, |last| pts > last) {
+                            let timestamp = Duration::from_micros(pts as u64);
+                            if let Err(e) = encoder_ref.queue_preconverted_frame(
+                                converted.frame,
+                                timestamp,
+                                output_ref,
+                            ) {
+                                warn!("Failed to encode drained frame: {e}");
+                            } else {
+                                frames_encoded_in_drain += 1;
+                            }
+                            *last_pts = Some(pts);
+                        }
+                    },
+                    drain_timeout,
+                );
+            } else {
+                pool.drain_with_timeout(|_| {}, drain_timeout);
+            }
+
+            info!(
+                "Converter pool drain complete: {} frames encoded during drain",
+                frames_encoded_in_drain
+            );
+        }
+
+        let video_result = self
+            .video_encoder
+            .as_mut()
+            .map(|enc| enc.flush(&mut self.output))
+            .unwrap_or(Ok(()));
+
+        let audio_result = self
+            .audio_encoder
+            .as_mut()
+            .map(|enc| enc.flush(&mut self.output))
+            .unwrap_or(Ok(()));
+
+        self.output.write_trailer().context("write_trailer")?;
+
+        if video_result.is_ok() && audio_result.is_ok() {
+            return Ok(Ok(()));
+        }
+
+        Ok(Err(anyhow!(
+            "Video: {video_result:#?}, Audio: {audio_result:#?}"
+        )))
+    }
+}
+
+impl VideoMuxer for AsyncCameraMp4Muxer {
+    type VideoFrame = FFmpegVideoFrame;
+
+    fn send_video_frame(
+        &mut self,
+        frame: Self::VideoFrame,
+        timestamp: Duration,
+    ) -> anyhow::Result<()> {
+        let Some(encoder) = self.video_encoder.as_mut() else {
+            return Ok(());
+        };
+
+        if let Some(pool) = &self.converter_pool {
+            let sequence = self.frame_sequence.fetch_add(1, Ordering::Relaxed);
+
+            let mut input_frame = frame.inner;
+            input_frame.set_pts(Some(timestamp.as_micros() as i64));
+
+            match pool.submit(input_frame, sequence) {
+                Ok(()) => {
+                    self.frames_submitted += 1;
+                }
+                Err(ConvertError::PoolShutdown) => {
+                    debug!("Converter pool shutting down, frame will not be encoded");
+                    return Ok(());
+                }
+                Err(e) => {
+                    return Err(anyhow!("Failed to submit frame to converter: {e}"));
+                }
+            }
+
+            let mut encoded_this_call = 0u64;
+            while let Some(converted) = pool.try_recv() {
+                let pts = converted.frame.pts().unwrap_or(0);
+                if self.last_pts.map_or(true, |last| pts > last) {
+                    let frame_timestamp = Duration::from_micros(pts as u64);
+                    encoder.queue_preconverted_frame(
+                        converted.frame,
+                        frame_timestamp,
+                        &mut self.output,
+                    )?;
+                    self.last_pts = Some(pts);
+                    encoded_this_call += 1;
+                    self.frames_encoded += 1;
+                }
+            }
+
+            let backlog = self.frames_submitted.saturating_sub(self.frames_encoded);
+            if encoded_this_call == 0 && backlog > 10 {
+                if let Some(converted) = pool.recv_timeout(Duration::from_millis(5)) {
+                    let pts = converted.frame.pts().unwrap_or(0);
+                    if self.last_pts.map_or(true, |last| pts > last) {
+                        let frame_timestamp = Duration::from_micros(pts as u64);
+                        encoder.queue_preconverted_frame(
+                            converted.frame,
+                            frame_timestamp,
+                            &mut self.output,
+                        )?;
+                        self.last_pts = Some(pts);
+                        self.frames_encoded += 1;
+                    }
+                }
+            }
+
+            if self.frames_submitted % 60 == 0 {
+                trace!(
+                    "Camera encoder progress: submitted={}, encoded={}, backlog={}",
+                    self.frames_submitted, self.frames_encoded, backlog
+                );
+            }
+        } else if self.use_preconverted {
+            encoder.queue_preconverted_frame(frame.inner, timestamp, &mut self.output)?;
+        } else {
+            encoder.queue_frame(frame.inner, timestamp, &mut self.output)?;
+        }
+
+        Ok(())
+    }
+}
+
+impl AudioMuxer for AsyncCameraMp4Muxer {
+    fn send_audio_frame(&mut self, frame: AudioFrame, timestamp: Duration) -> anyhow::Result<()> {
+        if let Some(audio_encoder) = self.audio_encoder.as_mut() {
+            audio_encoder.send_frame(frame.inner, timestamp, &mut self.output)?;
+        }
+
+        Ok(())
+    }
+}
diff --git a/crates/recording/src/output_pipeline/core.rs b/crates/recording/src/output_pipeline/core.rs
index a3d79c8a67..44148ec89c 100644
--- a/crates/recording/src/output_pipeline/core.rs
+++ b/crates/recording/src/output_pipeline/core.rs
@@ -397,7 +397,7 @@ async fn setup_video_source<TVideo: VideoSource>(
     video_config: TVideo::Config,
     setup_ctx: &mut SetupCtx,
 ) -> anyhow::Result<(TVideo, mpsc::Receiver<TVideo::Frame>)> {
-    let (video_tx, video_rx) = mpsc::channel(8);
+    let (video_tx, video_rx) = mpsc::channel(128);
     let video_source = TVideo::setup(video_config, video_tx, setup_ctx).await?;
 
     Ok((video_source, video_rx))
@@ -454,10 +454,13 @@ fn spawn_video_encoder<TMutex: VideoMuxer<VideoFrame = TVideo::Frame>, TVideo: V
         use futures::StreamExt;
 
         let mut first_tx = Some(first_tx);
+        let mut frame_count = 0u64;
 
         let res = stop_token
             .run_until_cancelled(async {
                 while let Some(frame) = video_rx.next().await {
+                    frame_count += 1;
+
                     let timestamp = frame.timestamp();
 
                     if let Some(first_tx) = first_tx.take() {
@@ -480,14 +483,52 @@ fn spawn_video_encoder<TMutex: VideoMuxer<VideoFrame = TVideo::Frame>, TVideo: V
             })
             .await;
 
+        let was_cancelled = res.is_none();
+
+        if was_cancelled {
+            info!("mux-video cancelled, draining remaining frames from channel");
+            let mut drained = 0u64;
+            while let Some(frame) = video_rx.next().await {
+                frame_count += 1;
+                drained += 1;
+
+                let timestamp = frame.timestamp();
+
+                if let Some(first_tx) = first_tx.take() {
+                    let _ = first_tx.send(timestamp);
+                }
+
+                let duration = timestamp
+                    .checked_duration_since(timestamps)
+                    .unwrap_or(Duration::ZERO);
+
+                match muxer.lock().await.send_video_frame(frame, duration) {
+                    Ok(()) => {}
+                    Err(e) => {
+                        warn!("Error processing drained frame: {e}");
+                        break;
+                    }
+                }
+            }
+            if drained > 0 {
+                info!(
+                    "mux-video drained {} additional frames after cancellation",
+                    drained
+                );
+            }
+        }
+
         muxer.lock().await.stop();
 
         if let Some(Err(e)) = res {
             return Err(e);
         }
 
-        if res.is_none() {
-            info!("mux-video cancelled");
+        if was_cancelled {
+            info!(
+                "mux-video finished after cancellation, total {} frames",
+                frame_count
+            );
         }
 
         Ok(())
diff --git a/crates/recording/src/output_pipeline/macos.rs b/crates/recording/src/output_pipeline/macos.rs
index 6c67459db3..936b730098 100644
--- a/crates/recording/src/output_pipeline/macos.rs
+++ b/crates/recording/src/output_pipeline/macos.rs
@@ -1,16 +1,33 @@
 use crate::{
-    output_pipeline::{AudioFrame, AudioMuxer, Muxer, TaskPool, VideoMuxer},
+    output_pipeline::{AudioFrame, AudioMuxer, Muxer, TaskPool, VideoFrame, VideoMuxer},
     sources::screen_capture,
 };
 use anyhow::anyhow;
 use cap_enc_avfoundation::QueueFrameError;
 use cap_media_info::{AudioInfo, VideoInfo};
+use cap_timestamp::Timestamp;
+use cidre::arc;
 use std::{
     path::PathBuf,
     sync::{Arc, Mutex, atomic::AtomicBool},
     time::Duration,
 };
 
+#[derive(Clone)]
+pub struct NativeCameraFrame {
+    pub sample_buf: arc::R<cidre::cm::SampleBuf>,
+    pub timestamp: Timestamp,
+}
+
+unsafe impl Send for NativeCameraFrame {}
+unsafe impl Sync for NativeCameraFrame {}
+
+impl VideoFrame for NativeCameraFrame {
+    fn timestamp(&self) -> Timestamp {
+        self.timestamp
+    }
+}
+
 #[derive(Clone)]
 pub struct AVFoundationMp4Muxer(
     Arc<Mutex<cap_enc_avfoundation::MP4Encoder>>,
@@ -121,3 +138,114 @@ impl AudioMuxer for AVFoundationMp4Muxer {
         Ok(())
     }
 }
+
+#[derive(Clone)]
+pub struct AVFoundationCameraMuxer(
+    Arc<Mutex<cap_enc_avfoundation::MP4Encoder>>,
+    Arc<AtomicBool>,
+);
+
+impl AVFoundationCameraMuxer {
+    const MAX_QUEUE_RETRIES: u32 = 500;
+}
+
+#[derive(Default)]
+pub struct AVFoundationCameraMuxerConfig {
+    pub output_height: Option<u32>,
+}
+
+impl Muxer for AVFoundationCameraMuxer {
+    type Config = AVFoundationCameraMuxerConfig;
+
+    async fn setup(
+        config: Self::Config,
+        output_path: PathBuf,
+        video_config: Option<VideoInfo>,
+        audio_config: Option<AudioInfo>,
+        pause_flag: Arc<AtomicBool>,
+        _tasks: &mut TaskPool,
+    ) -> anyhow::Result<Self> {
+        let video_config =
+            video_config.ok_or_else(|| anyhow!("Invariant: No video source provided"))?;
+
+        Ok(Self(
+            Arc::new(Mutex::new(
+                cap_enc_avfoundation::MP4Encoder::init(
+                    output_path,
+                    video_config,
+                    audio_config,
+                    config.output_height,
+                )
+                .map_err(|e| anyhow!("{e}"))?,
+            )),
+            pause_flag,
+        ))
+    }
+
+    fn finish(&mut self, timestamp: Duration) -> anyhow::Result<anyhow::Result<()>> {
+        Ok(self
+            .0
+            .lock()
+            .map_err(|e| anyhow!("{e}"))?
+            .finish(Some(timestamp))
+            .map(Ok)?)
+    }
+}
+
+impl VideoMuxer for AVFoundationCameraMuxer {
+    type VideoFrame = NativeCameraFrame;
+
+    fn send_video_frame(
+        &mut self,
+        frame: Self::VideoFrame,
+        timestamp: Duration,
+    ) -> anyhow::Result<()> {
+        let mut mp4 = self.0.lock().map_err(|e| anyhow!("MuxerLock/{e}"))?;
+
+        if self.1.load(std::sync::atomic::Ordering::Relaxed) {
+            mp4.pause();
+        } else {
+            mp4.resume();
+        }
+
+        let mut retry_count = 0;
+        loop {
+            match mp4.queue_video_frame(frame.sample_buf.clone(), timestamp) {
+                Ok(()) => break,
+                Err(QueueFrameError::NotReadyForMore) => {
+                    retry_count += 1;
+                    if retry_count >= Self::MAX_QUEUE_RETRIES {
+                        return Err(anyhow!(
+                            "send_video_frame/timeout after {} retries",
+                            Self::MAX_QUEUE_RETRIES
+                        ));
+                    }
+                    std::thread::sleep(Duration::from_millis(2));
+                    continue;
+                }
+                Err(e) => return Err(anyhow!("send_video_frame/{e}")),
+            }
+        }
+
+        Ok(())
+    }
+}
+
+impl AudioMuxer for AVFoundationCameraMuxer {
+    fn send_audio_frame(&mut self, frame: AudioFrame, timestamp: Duration) -> anyhow::Result<()> {
+        let mut mp4 = self.0.lock().map_err(|e| anyhow!("{e}"))?;
+
+        loop {
+            match mp4.queue_audio_frame(&frame.inner, timestamp) {
+                Ok(()) => break,
+                Err(QueueFrameError::NotReadyForMore) => {
+                    std::thread::sleep(Duration::from_millis(2));
+                    continue;
+                }
+                Err(e) => return Err(anyhow!("send_audio_frame/{e}")),
+            }
+        }
+
+        Ok(())
+    }
+}
diff --git a/crates/recording/src/output_pipeline/mod.rs b/crates/recording/src/output_pipeline/mod.rs
index 21687aca1b..dece16d9ab 100644
--- a/crates/recording/src/output_pipeline/mod.rs
+++ b/crates/recording/src/output_pipeline/mod.rs
@@ -1,6 +1,8 @@
+mod async_camera;
 mod core;
 pub mod ffmpeg;
 
+pub use async_camera::*;
 pub use core::*;
 pub use ffmpeg::*;
 
diff --git a/crates/recording/src/output_pipeline/win.rs b/crates/recording/src/output_pipeline/win.rs
index 77edf9bd8c..d1779f212e 100644
--- a/crates/recording/src/output_pipeline/win.rs
+++ b/crates/recording/src/output_pipeline/win.rs
@@ -1,7 +1,8 @@
-use crate::{AudioFrame, AudioMuxer, Muxer, TaskPool, VideoMuxer, screen_capture};
+use crate::{AudioFrame, AudioMuxer, Muxer, TaskPool, VideoFrame, VideoMuxer, screen_capture};
 use anyhow::{Context, anyhow};
 use cap_enc_ffmpeg::aac::AACEncoder;
 use cap_media_info::{AudioInfo, VideoInfo};
+use cap_timestamp::Timestamp;
 use futures::channel::oneshot;
 use std::{
     path::PathBuf,
@@ -16,7 +17,10 @@ use tracing::*;
 use windows::{
     Foundation::TimeSpan,
     Graphics::SizeInt32,
-    Win32::Graphics::{Direct3D11::ID3D11Device, Dxgi::Common::DXGI_FORMAT},
+    Win32::Graphics::{
+        Direct3D11::ID3D11Device,
+        Dxgi::Common::{DXGI_FORMAT, DXGI_FORMAT_NV12, DXGI_FORMAT_UYVY, DXGI_FORMAT_YUY2},
+    },
 };
 
 struct PauseTracker {
@@ -379,3 +383,361 @@ fn duration_to_timespan(duration: Duration) -> TimeSpan {
         Duration: clamped as i64,
     }
 }
+
+#[derive(Clone)]
+pub struct NativeCameraFrame {
+    pub buffer: windows::Win32::Media::MediaFoundation::IMFMediaBuffer,
+    pub pixel_format: cap_camera_windows::PixelFormat,
+    pub width: u32,
+    pub height: u32,
+    pub timestamp: Timestamp,
+}
+
+unsafe impl Send for NativeCameraFrame {}
+unsafe impl Sync for NativeCameraFrame {}
+
+impl VideoFrame for NativeCameraFrame {
+    fn timestamp(&self) -> Timestamp {
+        self.timestamp
+    }
+}
+
+impl NativeCameraFrame {
+    pub fn dxgi_format(&self) -> DXGI_FORMAT {
+        match self.pixel_format {
+            cap_camera_windows::PixelFormat::NV12 => DXGI_FORMAT_NV12,
+            cap_camera_windows::PixelFormat::YUYV422 => DXGI_FORMAT_YUY2,
+            cap_camera_windows::PixelFormat::UYVY422 => DXGI_FORMAT_UYVY,
+            _ => DXGI_FORMAT_NV12,
+        }
+    }
+}
+
+pub struct WindowsCameraMuxer {
+    video_tx: SyncSender<Option<(NativeCameraFrame, Duration)>>,
+    output: Arc<Mutex<ffmpeg::format::context::Output>>,
+    audio_encoder: Option<AACEncoder>,
+    pause: PauseTracker,
+}
+
+#[derive(Default)]
+pub struct WindowsCameraMuxerConfig {
+    pub output_height: Option<u32>,
+}
+
+impl Muxer for WindowsCameraMuxer {
+    type Config = WindowsCameraMuxerConfig;
+
+    async fn setup(
+        config: Self::Config,
+        output_path: PathBuf,
+        video_config: Option<VideoInfo>,
+        audio_config: Option<AudioInfo>,
+        pause_flag: Arc<AtomicBool>,
+        tasks: &mut TaskPool,
+    ) -> anyhow::Result<Self>
+    where
+        Self: Sized,
+    {
+        let video_config =
+            video_config.ok_or_else(|| anyhow!("invariant: video config expected"))?;
+
+        let input_size = SizeInt32 {
+            Width: video_config.width as i32,
+            Height: video_config.height as i32,
+        };
+
+        let output_height = config.output_height.unwrap_or(video_config.height);
+        let output_width = (video_config.width * output_height) / video_config.height;
+        let output_width = output_width & !1;
+        let output_height = output_height & !1;
+
+        let output_size = SizeInt32 {
+            Width: output_width as i32,
+            Height: output_height as i32,
+        };
+
+        let frame_rate = video_config.fps();
+        let bitrate_multiplier = 0.2;
+
+        let (video_tx, video_rx) = sync_channel::<Option<(NativeCameraFrame, Duration)>>(30);
+
+        let mut output = ffmpeg::format::output(&output_path)?;
+        let audio_encoder = audio_config
+            .map(|config| AACEncoder::init(config, &mut output))
+            .transpose()?;
+
+        let output = Arc::new(Mutex::new(output));
+        let (ready_tx, ready_rx) = oneshot::channel::<anyhow::Result<()>>();
+
+        {
+            let output = output.clone();
+
+            tasks.spawn_thread("windows-camera-encoder", move || {
+                cap_mediafoundation_utils::thread_init();
+
+                let d3d_device = match crate::capture_pipeline::create_d3d_device() {
+                    Ok(device) => device,
+                    Err(e) => {
+                        let _ = ready_tx.send(Err(anyhow!("Failed to create D3D device: {e}")));
+                        return Err(anyhow!("Failed to create D3D device: {e}"));
+                    }
+                };
+
+                let first_frame = match video_rx.recv() {
+                    Ok(Some(frame)) => frame,
+                    Ok(None) => {
+                        let _ = ready_tx.send(Ok(()));
+                        return Ok(());
+                    }
+                    Err(e) => {
+                        let _ = ready_tx.send(Err(anyhow!("No frames received: {e}")));
+                        return Err(anyhow!("No frames received: {e}"));
+                    }
+                };
+
+                let input_format = first_frame.0.dxgi_format();
+
+                let encoder_result = cap_enc_mediafoundation::H264Encoder::new_with_scaled_output(
+                    &d3d_device,
+                    input_format,
+                    input_size,
+                    output_size,
+                    frame_rate,
+                    bitrate_multiplier,
+                );
+
+                let (mut encoder, mut muxer) = match encoder_result {
+                    Ok(encoder) => {
+                        let muxer = {
+                            let mut output_guard = match output.lock() {
+                                Ok(guard) => guard,
+                                Err(poisoned) => {
+                                    let err = anyhow!("Failed to lock output mutex: {}", poisoned);
+                                    let _ = ready_tx.send(Err(err.clone().into()));
+                                    return Err(err);
+                                }
+                            };
+
+                            cap_mediafoundation_ffmpeg::H264StreamMuxer::new(
+                                &mut *output_guard,
+                                cap_mediafoundation_ffmpeg::MuxerConfig {
+                                    width: output_width,
+                                    height: output_height,
+                                    fps: frame_rate,
+                                    bitrate: encoder.bitrate(),
+                                },
+                            )
+                        };
+
+                        match muxer {
+                            Ok(muxer) => (encoder, muxer),
+                            Err(err) => {
+                                let err = anyhow!("Failed to create muxer: {err}");
+                                let _ = ready_tx.send(Err(err.clone().into()));
+                                return Err(err);
+                            }
+                        }
+                    }
+                    Err(err) => {
+                        let err = anyhow!("Failed to create H264 encoder: {err}");
+                        let _ = ready_tx.send(Err(err.clone().into()));
+                        return Err(err);
+                    }
+                };
+
+                if ready_tx.send(Ok(())).is_err() {
+                    error!("Failed to send ready signal - receiver dropped");
+                    return Ok(());
+                }
+
+                info!(
+                    "Windows camera encoder started: {:?} {}x{} -> NV12 {}x{} @ {}fps",
+                    input_format,
+                    input_size.Width,
+                    input_size.Height,
+                    output_size.Width,
+                    output_size.Height,
+                    frame_rate
+                );
+
+                let mut first_timestamp: Option<Duration> = None;
+                let mut frame_count = 0u64;
+
+                let process_frame = |frame: NativeCameraFrame,
+                                     timestamp: Duration|
+                 -> windows::core::Result<
+                    Option<(
+                        windows::Win32::Graphics::Direct3D11::ID3D11Texture2D,
+                        TimeSpan,
+                    )>,
+                > {
+                    let relative = if let Some(first) = first_timestamp {
+                        timestamp.checked_sub(first).unwrap_or(Duration::ZERO)
+                    } else {
+                        first_timestamp = Some(timestamp);
+                        Duration::ZERO
+                    };
+
+                    let texture = upload_mf_buffer_to_texture(&d3d_device, &frame)?;
+                    Ok(Some((texture, duration_to_timespan(relative))))
+                };
+
+                if let Ok(Some((texture, frame_time))) = process_frame(first_frame.0, first_frame.1)
+                {
+                    encoder
+                        .run(
+                            Arc::new(AtomicBool::default()),
+                            || {
+                                if frame_count > 0 {
+                                    let Ok(Some((frame, timestamp))) = video_rx.recv() else {
+                                        trace!("No more camera frames available");
+                                        return Ok(None);
+                                    };
+                                    frame_count += 1;
+                                    if frame_count % 30 == 0 {
+                                        debug!(
+                                            "Windows camera encoder: processed {} frames",
+                                            frame_count
+                                        );
+                                    }
+                                    return process_frame(frame, timestamp);
+                                }
+                                frame_count += 1;
+                                Ok(Some((texture.clone(), frame_time)))
+                            },
+                            |output_sample| {
+                                let mut output = output.lock().unwrap();
+                                let _ = muxer
+                                    .write_sample(&output_sample, &mut *output)
+                                    .map_err(|e| format!("WriteSample: {e}"));
+                                Ok(())
+                            },
+                        )
+                        .context("run camera encoder")?;
+                }
+
+                info!(
+                    "Windows camera encoder finished: {} frames encoded",
+                    frame_count
+                );
+                Ok(())
+            });
+        }
+
+        ready_rx
+            .await
+            .map_err(|_| anyhow!("Camera encoder thread ended unexpectedly"))??;
+
+        output.lock().unwrap().write_header()?;
+
+        Ok(Self {
+            video_tx,
+            output,
+            audio_encoder,
+            pause: PauseTracker::new(pause_flag),
+        })
+    }
+
+    fn stop(&mut self) {
+        let _ = self.video_tx.send(None);
+    }
+
+    fn finish(&mut self, _: Duration) -> anyhow::Result<anyhow::Result<()>> {
+        let mut output = self
+            .output
+            .lock()
+            .map_err(|_| anyhow!("Failed to lock output"))?;
+        let audio_result = self
+            .audio_encoder
+            .as_mut()
+            .map(|enc| enc.flush(&mut output))
+            .unwrap_or(Ok(()));
+
+        output.write_trailer()?;
+
+        Ok(audio_result.map_err(Into::into))
+    }
+}
+
+impl VideoMuxer for WindowsCameraMuxer {
+    type VideoFrame = NativeCameraFrame;
+
+    fn send_video_frame(
+        &mut self,
+        frame: Self::VideoFrame,
+        timestamp: Duration,
+    ) -> anyhow::Result<()> {
+        if let Some(timestamp) = self.pause.adjust(timestamp)? {
+            self.video_tx.send(Some((frame, timestamp)))?;
+        }
+
+        Ok(())
+    }
+}
+
+impl AudioMuxer for WindowsCameraMuxer {
+    fn send_audio_frame(&mut self, frame: AudioFrame, timestamp: Duration) -> anyhow::Result<()> {
+        if let Some(timestamp) = self.pause.adjust(timestamp)? {
+            if let Some(encoder) = self.audio_encoder.as_mut()
+                && let Ok(mut output) = self.output.lock()
+            {
+                encoder.send_frame(frame.inner, timestamp, &mut output)?;
+            }
+        }
+
+        Ok(())
+    }
+}
+
+fn upload_mf_buffer_to_texture(
+    device: &ID3D11Device,
+    frame: &NativeCameraFrame,
+) -> windows::core::Result<windows::Win32::Graphics::Direct3D11::ID3D11Texture2D> {
+    use cap_mediafoundation_utils::IMFMediaBufferExt;
+    use windows::Win32::Graphics::Direct3D11::{
+        D3D11_BIND_SHADER_RESOURCE, D3D11_SUBRESOURCE_DATA, D3D11_TEXTURE2D_DESC,
+        D3D11_USAGE_DEFAULT,
+    };
+    use windows::Win32::Graphics::Dxgi::Common::DXGI_SAMPLE_DESC;
+
+    let dxgi_format = frame.dxgi_format();
+    let bytes_per_pixel: u32 = match frame.pixel_format {
+        cap_camera_windows::PixelFormat::NV12 => 1,
+        cap_camera_windows::PixelFormat::YUYV422 | cap_camera_windows::PixelFormat::UYVY422 => 2,
+        _ => 2,
+    };
+
+    let lock = frame.buffer.lock()?;
+    let data = &*lock;
+
+    let row_pitch = frame.width * bytes_per_pixel;
+
+    let texture_desc = D3D11_TEXTURE2D_DESC {
+        Width: frame.width,
+        Height: frame.height,
+        MipLevels: 1,
+        ArraySize: 1,
+        Format: dxgi_format,
+        SampleDesc: DXGI_SAMPLE_DESC {
+            Count: 1,
+            Quality: 0,
+        },
+        Usage: D3D11_USAGE_DEFAULT,
+        BindFlags: D3D11_BIND_SHADER_RESOURCE.0 as u32,
+        CPUAccessFlags: 0,
+        MiscFlags: 0,
+    };
+
+    let subresource_data = D3D11_SUBRESOURCE_DATA {
+        pSysMem: data.as_ptr() as *const _,
+        SysMemPitch: row_pitch,
+        SysMemSlicePitch: 0,
+    };
+
+    unsafe {
+        let mut texture = None;
+        device.CreateTexture2D(&texture_desc, Some(&subresource_data), Some(&mut texture))?;
+        Ok(texture.unwrap())
+    }
+}
diff --git a/crates/recording/src/screenshot.rs b/crates/recording/src/screenshot.rs
index 56adedf4c6..f3d1dfee05 100644
--- a/crates/recording/src/screenshot.rs
+++ b/crates/recording/src/screenshot.rs
@@ -149,13 +149,35 @@ fn try_fast_capture(target: &ScreenCaptureTarget) -> Option<RgbImage> {
             let display = scap_targets::Display::from_id(screen)?;
             let display_id = display.raw_handle().inner().id;
             let scale = display.raw_handle().scale().unwrap_or(1.0);
+            let display_bounds = display.raw_handle().logical_bounds();
+            let display_physical = display.physical_size();
+
+            tracing::info!(
+                "Area screenshot debug: display_id={}, display_logical_bounds={:?}, display_physical={:?}",
+                display_id,
+                display_bounds,
+                display_physical,
+            );
+            tracing::info!(
+                "Area screenshot: input logical bounds=({}, {}, {}x{}), scale={}",
+                bounds.position().x(),
+                bounds.position().y(),
+                bounds.size().width(),
+                bounds.size().height(),
+                scale,
+            );
 
             let rect = CGRect::new(
-                &CGPoint::new(bounds.position().x() * scale, bounds.position().y() * scale),
-                &CGSize::new(
-                    bounds.size().width() * scale,
-                    bounds.size().height() * scale,
-                ),
+                &CGPoint::new(bounds.position().x(), bounds.position().y()),
+                &CGSize::new(bounds.size().width(), bounds.size().height()),
+            );
+
+            tracing::info!(
+                "Area screenshot: CGRect for capture (logical/points) = origin({}, {}), size({}x{})",
+                rect.origin.x,
+                rect.origin.y,
+                rect.size.width,
+                rect.size.height,
             );
 
             let image = unsafe { CGDisplayCreateImageForRect(display_id, rect) };
@@ -170,6 +192,12 @@ fn try_fast_capture(target: &ScreenCaptureTarget) -> Option<RgbImage> {
     let height = cg_image.height();
     let bytes_per_row = cg_image.bytes_per_row();
 
+    tracing::info!(
+        "Fast capture result: image dimensions = {}x{}",
+        width,
+        height,
+    );
+
     use core_foundation::data::CFData;
     let cf_data: CFData = cg_image.data();
     let data = cf_data.bytes();
diff --git a/crates/recording/src/sources/camera.rs b/crates/recording/src/sources/camera.rs
index 42c305f5f2..be2de19f25 100644
--- a/crates/recording/src/sources/camera.rs
+++ b/crates/recording/src/sources/camera.rs
@@ -5,27 +5,17 @@ use crate::{
 };
 use anyhow::anyhow;
 use cap_media_info::VideoInfo;
-use futures::{SinkExt, channel::mpsc};
-use std::sync::Arc;
-
-pub struct Camera(Arc<CameraFeedLock>);
+use futures::{FutureExt, channel::mpsc, future::BoxFuture};
+use std::sync::{
+    Arc,
+    atomic::{AtomicBool, Ordering},
+};
+use tokio::sync::oneshot;
 
-struct LogDrop<T>(T, &'static str);
-impl<T> Drop for LogDrop<T> {
-    fn drop(&mut self) {
-        tracing::debug!("Dropping {}", self.1);
-    }
-}
-impl<T> std::ops::Deref for LogDrop<T> {
-    type Target = T;
-    fn deref(&self) -> &Self::Target {
-        &self.0
-    }
-}
-impl<T> std::ops::DerefMut for LogDrop<T> {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        &mut self.0
-    }
+pub struct Camera {
+    feed_lock: Arc<CameraFeedLock>,
+    stop_tx: Option<oneshot::Sender<()>>,
+    stopped: Arc<AtomicBool>,
 }
 
 impl VideoSource for Camera {
@@ -40,44 +30,118 @@ impl VideoSource for Camera {
     where
         Self: Sized,
     {
-        let (tx, rx) = flume::bounded(32);
+        let (tx, rx) = flume::bounded(256);
 
         feed_lock
             .ask(camera::AddSender(tx))
             .await
             .map_err(|e| anyhow!("Failed to add camera sender: {e}"))?;
 
-        let mut video_tx = LogDrop(video_tx, "camera_video_tx");
+        let (stop_tx, stop_rx) = oneshot::channel();
+        let stopped = Arc::new(AtomicBool::new(false));
+        let stopped_clone = stopped.clone();
 
         tokio::spawn(async move {
             tracing::debug!("Camera source task started");
+            let mut frame_count: u64 = 0;
+            let mut sent_count: u64 = 0;
+            let mut dropped_count: u64 = 0;
+            let start = std::time::Instant::now();
+            let mut video_tx = video_tx;
+            let mut stop_rx = stop_rx.fuse();
+
             loop {
-                match rx.recv_async().await {
-                    Ok(frame) => {
-                        // tracing::trace!("Sending camera frame");
-                        if let Err(e) = video_tx.send(frame).await {
-                            tracing::warn!("Failed to send to video pipeline: {e}");
-                            // If pipeline is closed, we should stop?
-                            // But lets continue to keep rx alive for now to see if it helps,
-                            // or maybe break?
-                            // If we break, we disconnect from CameraFeed.
-                            // If pipeline is closed, we SHOULD disconnect.
-                            break;
-                        }
-                    }
-                    Err(e) => {
-                        tracing::debug!("Camera feed disconnected (rx closed): {e}");
+                if stopped_clone.load(Ordering::Relaxed) {
+                    tracing::debug!("Camera source: stop flag set, exiting");
+                    break;
+                }
+
+                tokio::select! {
+                    biased;
+                    _ = &mut stop_rx => {
+                        tracing::debug!("Camera source: received stop signal");
                         break;
                     }
+                    result = rx.recv_async() => {
+                        match result {
+                            Ok(frame) => {
+                                frame_count += 1;
+                                match video_tx.try_send(frame) {
+                                    Ok(()) => {
+                                        sent_count += 1;
+                                        if sent_count % 30 == 0 {
+                                            tracing::debug!(
+                                                "Camera source: sent {} frames, dropped {} in {:?}",
+                                                sent_count,
+                                                dropped_count,
+                                                start.elapsed()
+                                            );
+                                        }
+                                    }
+                                    Err(e) => {
+                                        if e.is_full() {
+                                            dropped_count += 1;
+                                            if dropped_count % 30 == 0 {
+                                                tracing::warn!(
+                                                    "Camera source: encoder can't keep up, dropped {} frames so far",
+                                                    dropped_count
+                                                );
+                                            }
+                                        } else if e.is_disconnected() {
+                                            tracing::debug!(
+                                                "Camera source: pipeline closed after {} sent, {} dropped",
+                                                sent_count,
+                                                dropped_count
+                                            );
+                                            break;
+                                        }
+                                    }
+                                }
+                            }
+                            Err(e) => {
+                                tracing::debug!(
+                                    "Camera feed disconnected (rx closed) after {} frames in {:?}: {e}",
+                                    frame_count,
+                                    start.elapsed()
+                                );
+                                break;
+                            }
+                        }
+                    }
                 }
             }
-            tracing::debug!("Camera source task finished");
+
+            drop(video_tx);
+
+            tracing::info!(
+                "Camera source finished: {} received, {} sent, {} dropped in {:?}",
+                frame_count,
+                sent_count,
+                dropped_count,
+                start.elapsed()
+            );
         });
 
-        Ok(Self(feed_lock))
+        Ok(Self {
+            feed_lock,
+            stop_tx: Some(stop_tx),
+            stopped,
+        })
     }
 
     fn video_info(&self) -> VideoInfo {
-        *self.0.video_info()
+        *self.feed_lock.video_info()
+    }
+
+    fn stop(&mut self) -> BoxFuture<'_, anyhow::Result<()>> {
+        async move {
+            tracing::debug!("Camera source: stopping");
+            self.stopped.store(true, Ordering::SeqCst);
+            if let Some(stop_tx) = self.stop_tx.take() {
+                let _ = stop_tx.send(());
+            }
+            Ok(())
+        }
+        .boxed()
     }
 }
diff --git a/crates/recording/src/sources/mod.rs b/crates/recording/src/sources/mod.rs
index 1c5cff9324..e86649ec58 100644
--- a/crates/recording/src/sources/mod.rs
+++ b/crates/recording/src/sources/mod.rs
@@ -1,8 +1,9 @@
 pub mod audio_mixer;
 pub mod camera;
 pub mod microphone;
+pub mod native_camera;
 pub mod screen_capture;
 
 pub use camera::*;
 pub use microphone::*;
-// pub use screen_capture::*;
+pub use native_camera::*;
diff --git a/crates/recording/src/sources/native_camera.rs b/crates/recording/src/sources/native_camera.rs
new file mode 100644
index 0000000000..e2e95ab27b
--- /dev/null
+++ b/crates/recording/src/sources/native_camera.rs
@@ -0,0 +1,146 @@
+use crate::{
+    feeds::camera::{self, CameraFeedLock},
+    output_pipeline::{NativeCameraFrame, SetupCtx, VideoSource},
+};
+use anyhow::anyhow;
+use cap_media_info::VideoInfo;
+use futures::{FutureExt, channel::mpsc, future::BoxFuture};
+use std::sync::{
+    Arc,
+    atomic::{AtomicBool, Ordering},
+};
+use tokio::sync::oneshot;
+
+pub struct NativeCamera {
+    feed_lock: Arc<CameraFeedLock>,
+    stop_tx: Option<oneshot::Sender<()>>,
+    stopped: Arc<AtomicBool>,
+}
+
+impl VideoSource for NativeCamera {
+    type Config = Arc<CameraFeedLock>;
+    type Frame = NativeCameraFrame;
+
+    async fn setup(
+        feed_lock: Self::Config,
+        video_tx: mpsc::Sender<Self::Frame>,
+        _: &mut SetupCtx,
+    ) -> anyhow::Result<Self>
+    where
+        Self: Sized,
+    {
+        let (tx, rx) = flume::bounded(256);
+
+        feed_lock
+            .ask(camera::AddNativeSender(tx))
+            .await
+            .map_err(|e| anyhow!("Failed to add native camera sender: {e}"))?;
+
+        let (stop_tx, stop_rx) = oneshot::channel();
+        let stopped = Arc::new(AtomicBool::new(false));
+        let stopped_clone = stopped.clone();
+
+        tokio::spawn(async move {
+            tracing::debug!("Native camera source task started");
+            let mut frame_count: u64 = 0;
+            let mut sent_count: u64 = 0;
+            let mut dropped_count: u64 = 0;
+            let start = std::time::Instant::now();
+            let mut video_tx = video_tx;
+            let mut stop_rx = stop_rx.fuse();
+
+            loop {
+                if stopped_clone.load(Ordering::Relaxed) {
+                    tracing::debug!("Native camera source: stop flag set, exiting");
+                    break;
+                }
+
+                tokio::select! {
+                    biased;
+                    _ = &mut stop_rx => {
+                        tracing::debug!("Native camera source: received stop signal");
+                        break;
+                    }
+                    result = rx.recv_async() => {
+                        match result {
+                            Ok(frame) => {
+                                frame_count += 1;
+                                match video_tx.try_send(frame) {
+                                    Ok(()) => {
+                                        sent_count += 1;
+                                        if sent_count % 30 == 0 {
+                                            tracing::debug!(
+                                                "Native camera source: sent {} frames, dropped {} in {:?}",
+                                                sent_count,
+                                                dropped_count,
+                                                start.elapsed()
+                                            );
+                                        }
+                                    }
+                                    Err(e) => {
+                                        if e.is_full() {
+                                            dropped_count += 1;
+                                            if dropped_count % 30 == 0 {
+                                                tracing::warn!(
+                                                    "Native camera source: encoder can't keep up, dropped {} frames so far",
+                                                    dropped_count
+                                                );
+                                            }
+                                        } else if e.is_disconnected() {
+                                            tracing::debug!(
+                                                "Native camera source: pipeline closed after {} sent, {} dropped",
+                                                sent_count,
+                                                dropped_count
+                                            );
+                                            break;
+                                        }
+                                    }
+                                }
+                            }
+                            Err(e) => {
+                                tracing::debug!(
+                                    "Native camera feed disconnected (rx closed) after {} frames in {:?}: {e}",
+                                    frame_count,
+                                    start.elapsed()
+                                );
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+
+            drop(video_tx);
+
+            tracing::info!(
+                "Native camera source finished: {} received, {} sent, {} dropped in {:?}",
+                frame_count,
+                sent_count,
+                dropped_count,
+                start.elapsed()
+            );
+        });
+
+        Ok(Self {
+            feed_lock,
+            stop_tx: Some(stop_tx),
+            stopped,
+        })
+    }
+
+    fn video_info(&self) -> VideoInfo {
+        *self.feed_lock.video_info()
+    }
+
+    fn stop(&mut self) -> BoxFuture<'_, anyhow::Result<()>> {
+        async move {
+            tracing::debug!("Native camera source: stopping");
+            self.stopped.store(true, Ordering::SeqCst);
+            if let Some(stop_tx) = self.stop_tx.take() {
+                let _ = stop_tx.send(());
+            }
+            Ok(())
+        }
+        .boxed()
+    }
+}
diff --git a/crates/recording/src/sources/screen_capture/mod.rs b/crates/recording/src/sources/screen_capture/mod.rs
index 4120b047b0..0353d5bb57 100644
--- a/crates/recording/src/sources/screen_capture/mod.rs
+++ b/crates/recording/src/sources/screen_capture/mod.rs
@@ -193,6 +193,14 @@ impl ScreenCaptureTarget {
             Self::Area { screen, .. } => Display::from_id(screen).and_then(|d| d.name()),
         }
     }
+
+    pub fn kind_str(&self) -> &str {
+        match self {
+            ScreenCaptureTarget::Display { .. } => "Display",
+            ScreenCaptureTarget::Window { .. } => "Window",
+            ScreenCaptureTarget::Area { .. } => "Area",
+        }
+    }
 }
 
 pub struct ScreenCaptureConfig<TCaptureFormat: ScreenCaptureFormat> {
diff --git a/crates/recording/src/studio_recording.rs b/crates/recording/src/studio_recording.rs
index 79a7b19c27..19e9bcbef4 100644
--- a/crates/recording/src/studio_recording.rs
+++ b/crates/recording/src/studio_recording.rs
@@ -5,11 +5,17 @@ use crate::{
     },
     cursor::{CursorActor, Cursors, spawn_cursor_recorder},
     feeds::{camera::CameraFeedLock, microphone::MicrophoneFeedLock},
-    ffmpeg::{Mp4Muxer, OggMuxer},
+    ffmpeg::OggMuxer,
     output_pipeline::{DoneFut, FinishedOutputPipeline, OutputPipeline, PipelineDoneError},
     screen_capture::ScreenCaptureConfig,
     sources::{self, screen_capture},
 };
+
+#[cfg(target_os = "macos")]
+use crate::output_pipeline::{AVFoundationCameraMuxer, AVFoundationCameraMuxerConfig};
+
+#[cfg(windows)]
+use crate::output_pipeline::{WindowsCameraMuxer, WindowsCameraMuxerConfig};
 use anyhow::{Context as _, anyhow, bail};
 use cap_media_info::VideoInfo;
 use cap_project::{CursorEvents, StudioRecordingMeta};
@@ -824,11 +830,24 @@ async fn create_segment_pipeline(
     .await
     .context("screen pipeline setup")?;
 
+    #[cfg(target_os = "macos")]
+    let camera = OptionFuture::from(base_inputs.camera_feed.map(|camera_feed| {
+        OutputPipeline::builder(dir.join("camera.mp4"))
+            .with_video::<sources::NativeCamera>(camera_feed)
+            .with_timestamps(start_time)
+            .build::<AVFoundationCameraMuxer>(AVFoundationCameraMuxerConfig::default())
+            .instrument(error_span!("camera-out"))
+    }))
+    .await
+    .transpose()
+    .context("camera pipeline setup")?;
+
+    #[cfg(windows)]
     let camera = OptionFuture::from(base_inputs.camera_feed.map(|camera_feed| {
         OutputPipeline::builder(dir.join("camera.mp4"))
-            .with_video::<sources::Camera>(camera_feed)
+            .with_video::<sources::NativeCamera>(camera_feed)
             .with_timestamps(start_time)
-            .build::<Mp4Muxer>(())
+            .build::<WindowsCameraMuxer>(WindowsCameraMuxerConfig::default())
             .instrument(error_span!("camera-out"))
     }))
     .await
diff --git a/crates/rendering/src/decoder/avassetreader.rs b/crates/rendering/src/decoder/avassetreader.rs
index 8bc0dac427..f4ad42eb77 100644
--- a/crates/rendering/src/decoder/avassetreader.rs
+++ b/crates/rendering/src/decoder/avassetreader.rs
@@ -261,13 +261,14 @@ impl AVAssetReaderDecoder {
                             .as_ref()
                             .map(|last| {
                                 requested_frame < last.number
-                                // seek forward for big jumps. this threshold is arbitrary but should be derived from i-frames in future
-                                || requested_frame - last.number > FRAME_CACHE_SIZE as u32
+                                    || requested_frame - last.number > FRAME_CACHE_SIZE as u32
                             })
                             .unwrap_or(true)
                     {
                         this.reset(requested_time);
                         frames = this.inner.frames();
+                        *last_sent_frame.borrow_mut() = None;
+                        cache.clear();
                     }
 
                     last_active_frame = Some(requested_frame);
diff --git a/crates/rendering/src/decoder/ffmpeg.rs b/crates/rendering/src/decoder/ffmpeg.rs
index 2776c7d799..5a25eca6fc 100644
--- a/crates/rendering/src/decoder/ffmpeg.rs
+++ b/crates/rendering/src/decoder/ffmpeg.rs
@@ -133,8 +133,7 @@ impl FfmpegDecoder {
                                 .as_ref()
                                 .map(|last| {
                                     requested_frame < last.number
-                                    // seek forward for big jumps. this threshold is arbitrary but should be derived from i-frames in future
-                                    || requested_frame - last.number > FRAME_CACHE_SIZE as u32
+                                        || requested_frame - last.number > FRAME_CACHE_SIZE as u32
                                 })
                                 .unwrap_or(true)
                         {
@@ -142,6 +141,8 @@ impl FfmpegDecoder {
 
                             let _ = this.reset(requested_time);
                             frames = this.frames();
+                            *last_sent_frame.borrow_mut() = None;
+                            cache.clear();
                         }
 
                         last_active_frame = Some(requested_frame);
diff --git a/crates/rendering/src/layers/captions.rs b/crates/rendering/src/layers/captions.rs
index 5e1f6bcb85..b7497f5aca 100644
--- a/crates/rendering/src/layers/captions.rs
+++ b/crates/rendering/src/layers/captions.rs
@@ -1,38 +1,43 @@
-#![allow(unused)] // TODO: This module is still being implemented
-
 use bytemuck::{Pod, Zeroable};
 use cap_project::XY;
+use glyphon::cosmic_text::LayoutRunIter;
 use glyphon::{
     Attrs, Buffer, Cache, Color, Family, FontSystem, Metrics, Resolution, Shaping, SwashCache,
-    TextArea, TextAtlas, TextBounds, TextRenderer, Viewport,
+    TextArea, TextAtlas, TextBounds, TextRenderer, Viewport, Weight,
 };
-use log::{debug, info, warn};
-use wgpu::{Device, Queue, util::DeviceExt};
+use log::{debug, warn};
+use wgpu::{Device, Queue, include_wgsl, util::DeviceExt};
 
 use crate::{DecodedSegmentFrames, ProjectUniforms, RenderVideoConstants, parse_color_component};
 
-/// Represents a caption segment with timing and text
+#[derive(Debug, Clone)]
+pub struct CaptionWord {
+    pub text: String,
+    pub start: f32,
+    pub end: f32,
+}
+
 #[derive(Debug, Clone)]
 pub struct CaptionSegment {
     pub id: String,
     pub start: f32,
     pub end: f32,
     pub text: String,
+    pub words: Vec<CaptionWord>,
 }
 
-/// Settings for caption rendering
 #[repr(C)]
 #[derive(Copy, Clone, Pod, Zeroable, Debug)]
 pub struct CaptionSettings {
-    pub enabled: u32, // 0 = disabled, 1 = enabled
+    pub enabled: u32,
     pub font_size: f32,
     pub color: [f32; 4],
     pub background_color: [f32; 4],
-    pub position: u32, // 0 = top, 1 = middle, 2 = bottom
-    pub outline: u32,  // 0 = disabled, 1 = enabled
+    pub position: u32,
+    pub outline: u32,
     pub outline_color: [f32; 4],
-    pub font: u32,          // 0 = SansSerif, 1 = Serif, 2 = Monospace
-    pub _padding: [f32; 1], // for alignment
+    pub font: u32,
+    pub _padding: [f32; 1],
 }
 
 impl Default for CaptionSettings {
@@ -40,18 +45,115 @@ impl Default for CaptionSettings {
         Self {
             enabled: 1,
             font_size: 24.0,
-            color: [1.0, 1.0, 1.0, 1.0],            // white
-            background_color: [0.0, 0.0, 0.0, 0.8], // 80% black
-            position: 2,                            // bottom
-            outline: 1,                             // enabled
-            outline_color: [0.0, 0.0, 0.0, 1.0],    // black
-            font: 0,                                // SansSerif
+            color: [1.0, 1.0, 1.0, 1.0],
+            background_color: [0.0, 0.0, 0.0, 0.9],
+            position: 5,
+            outline: 1,
+            outline_color: [0.0, 0.0, 0.0, 1.0],
+            font: 0,
             _padding: [0.0],
         }
     }
 }
 
-/// Caption layer that renders text using GPU
+#[repr(C)]
+#[derive(Copy, Clone, Pod, Zeroable, Debug)]
+struct CaptionBackgroundUniforms {
+    rect: [f32; 4],
+    color: [f32; 4],
+    radius: f32,
+    _padding: [f32; 3],
+    _padding2: [f32; 4],
+}
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum CaptionPosition {
+    TopLeft,
+    TopCenter,
+    TopRight,
+    BottomLeft,
+    BottomCenter,
+    BottomRight,
+}
+
+impl CaptionPosition {
+    fn from_str(s: &str) -> Self {
+        match s {
+            "top-left" => Self::TopLeft,
+            "top-center" | "top" => Self::TopCenter,
+            "top-right" => Self::TopRight,
+            "bottom-left" => Self::BottomLeft,
+            "bottom-right" => Self::BottomRight,
+            _ => Self::BottomCenter,
+        }
+    }
+
+    fn y_factor(&self) -> f32 {
+        match self {
+            Self::TopLeft | Self::TopCenter | Self::TopRight => 0.08,
+            Self::BottomLeft | Self::BottomCenter | Self::BottomRight => 0.85,
+        }
+    }
+
+    fn x_alignment(&self) -> f32 {
+        match self {
+            Self::TopLeft | Self::BottomLeft => 0.05,
+            Self::TopCenter | Self::BottomCenter => 0.5,
+            Self::TopRight | Self::BottomRight => 0.95,
+        }
+    }
+}
+
+const BASE_TEXT_OPACITY: f32 = 0.8;
+const MAX_WORDS_PER_LINE: usize = 6;
+const BOUNCE_OFFSET_PIXELS: f32 = 8.0;
+
+fn wrap_text_by_words(text: &str, max_words: usize) -> String {
+    let words: Vec<&str> = text.split_whitespace().collect();
+    if words.is_empty() {
+        return String::new();
+    }
+
+    let mut result = String::new();
+    for (i, word) in words.iter().enumerate() {
+        if i > 0 {
+            if i % max_words == 0 {
+                result.push('\n');
+            } else {
+                result.push(' ');
+            }
+        }
+        result.push_str(word);
+    }
+    result
+}
+
+fn calculate_bounce_offset(
+    _fade_opacity: f32,
+    time_from_start: f32,
+    time_to_end: f32,
+    fade_duration: f32,
+) -> f32 {
+    if fade_duration <= 0.0 {
+        return 0.0;
+    }
+
+    let fade_in_progress = (time_from_start / fade_duration).clamp(0.0, 1.0);
+    let fade_out_progress = (time_to_end / fade_duration).clamp(0.0, 1.0);
+
+    if fade_in_progress < 1.0 {
+        let ease = 1.0 - fade_in_progress;
+        let bounce = ease * ease;
+        -bounce * BOUNCE_OFFSET_PIXELS
+    } else if fade_out_progress < 1.0 {
+        let ease = 1.0 - fade_out_progress;
+        let bounce = ease * ease;
+        bounce * BOUNCE_OFFSET_PIXELS
+    } else {
+        0.0
+    }
+}
+
 pub struct CaptionsLayer {
     settings_buffer: wgpu::Buffer,
     font_system: FontSystem,
@@ -60,13 +162,19 @@ pub struct CaptionsLayer {
     text_renderer: TextRenderer,
     text_buffer: Buffer,
     current_text: Option<String>,
-    current_segment_time: f32,
+    current_segment_start: f32,
+    current_segment_end: f32,
     viewport: Viewport,
+    background_pipeline: wgpu::RenderPipeline,
+    background_bind_group: wgpu::BindGroup,
+    background_uniform_buffer: wgpu::Buffer,
+    background_scissor: Option<[u32; 4]>,
+    output_size: (u32, u32),
+    has_caption: bool,
 }
 
 impl CaptionsLayer {
     pub fn new(device: &Device, queue: &Queue) -> Self {
-        // Create default settings buffer
         let settings = CaptionSettings::default();
         let settings_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
             label: Some("Caption Settings Buffer"),
@@ -74,7 +182,6 @@ impl CaptionsLayer {
             usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
         });
 
-        // Initialize glyphon text rendering components
         let font_system = FontSystem::new();
         let swash_cache = SwashCache::new();
         let cache = Cache::new(device);
@@ -88,10 +195,87 @@ impl CaptionsLayer {
             None,
         );
 
-        // Create an empty buffer with default metrics
-        let metrics = Metrics::new(24.0, 24.0 * 1.2); // Default font size and line height
+        let metrics = Metrics::new(24.0, 24.0 * 1.2);
         let text_buffer = Buffer::new_empty(metrics);
 
+        let background_uniforms = CaptionBackgroundUniforms {
+            rect: [0.0; 4],
+            color: [0.0; 4],
+            radius: 0.0,
+            _padding: [0.0; 3],
+            _padding2: [0.0; 4],
+        };
+
+        let background_uniform_buffer =
+            device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
+                label: Some("Caption Background Uniform Buffer"),
+                contents: bytemuck::bytes_of(&background_uniforms),
+                usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
+            });
+
+        let background_bind_group_layout =
+            device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+                label: Some("Caption Background Bind Group Layout"),
+                entries: &[wgpu::BindGroupLayoutEntry {
+                    binding: 0,
+                    visibility: wgpu::ShaderStages::FRAGMENT,
+                    ty: wgpu::BindingType::Buffer {
+                        ty: wgpu::BufferBindingType::Uniform,
+                        has_dynamic_offset: false,
+                        min_binding_size: None,
+                    },
+                    count: None,
+                }],
+            });
+
+        let background_bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
+            label: Some("Caption Background Bind Group"),
+            layout: &background_bind_group_layout,
+            entries: &[wgpu::BindGroupEntry {
+                binding: 0,
+                resource: background_uniform_buffer.as_entire_binding(),
+            }],
+        });
+
+        let background_shader =
+            device.create_shader_module(include_wgsl!("../shaders/caption_bg.wgsl"));
+
+        let background_pipeline_layout =
+            device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+                label: Some("Caption Background Pipeline Layout"),
+                bind_group_layouts: &[&background_bind_group_layout],
+                push_constant_ranges: &[],
+            });
+
+        let background_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
+            label: Some("Caption Background Pipeline"),
+            layout: Some(&background_pipeline_layout),
+            vertex: wgpu::VertexState {
+                module: &background_shader,
+                entry_point: Some("vs_main"),
+                buffers: &[],
+                compilation_options: wgpu::PipelineCompilationOptions::default(),
+            },
+            fragment: Some(wgpu::FragmentState {
+                module: &background_shader,
+                entry_point: Some("fs_main"),
+                targets: &[Some(wgpu::ColorTargetState {
+                    format: wgpu::TextureFormat::Rgba8UnormSrgb,
+                    blend: Some(wgpu::BlendState::ALPHA_BLENDING),
+                    write_mask: wgpu::ColorWrites::ALL,
+                })],
+                compilation_options: wgpu::PipelineCompilationOptions::default(),
+            }),
+            primitive: wgpu::PrimitiveState {
+                topology: wgpu::PrimitiveTopology::TriangleList,
+                ..Default::default()
+            },
+            depth_stencil: None,
+            multisample: wgpu::MultisampleState::default(),
+            multiview: None,
+            cache: None,
+        });
+
         Self {
             settings_buffer,
             font_system,
@@ -100,35 +284,41 @@ impl CaptionsLayer {
             text_renderer,
             text_buffer,
             current_text: None,
-            current_segment_time: 0.0,
+            current_segment_start: 0.0,
+            current_segment_end: 0.0,
             viewport,
+            background_pipeline,
+            background_bind_group,
+            background_uniform_buffer,
+            background_scissor: None,
+            output_size: (0, 0),
+            has_caption: false,
         }
     }
 
-    /// Update the settings for caption rendering
     pub fn update_settings(&mut self, queue: &Queue, settings: CaptionSettings) {
         queue.write_buffer(&self.settings_buffer, 0, bytemuck::cast_slice(&[settings]));
     }
 
-    /// Update the current caption text and timing
-    pub fn update_caption(&mut self, text: Option<String>, time: f32) {
-        debug!("Updating caption - Text: {text:?}, Time: {time}");
-        if self.current_text != text {
-            if let Some(content) = &text {
-                info!("Setting new caption text: {content}");
-                // Update the text buffer with new content
-                let metrics = Metrics::new(24.0, 24.0 * 1.2);
-                self.text_buffer = Buffer::new_empty(metrics);
-                self.text_buffer.set_text(
-                    &mut self.font_system,
-                    content,
-                    &Attrs::new(),
-                    Shaping::Advanced,
-                );
-            }
-            self.current_text = text;
+    pub fn update_caption(&mut self, text: Option<String>, start: f32, end: f32) {
+        debug!("Updating caption - Text: {text:?}, Start: {start}, End: {end}");
+        self.current_text = text;
+        self.current_segment_start = start;
+        self.current_segment_end = end;
+    }
+
+    fn calculate_fade_opacity(&self, current_time: f32, fade_duration: f32) -> (f32, f32, f32) {
+        let time_from_start = current_time - self.current_segment_start;
+        let time_to_end = self.current_segment_end - current_time;
+
+        if fade_duration <= 0.0 {
+            return (1.0, time_from_start, time_to_end);
         }
-        self.current_segment_time = time;
+
+        let fade_in = (time_from_start / fade_duration).min(1.0);
+        let fade_out = (time_to_end / fade_duration).min(1.0);
+
+        (fade_in.min(fade_out).max(0.0), time_from_start, time_to_end)
     }
 
     pub fn prepare(
@@ -138,222 +328,392 @@ impl CaptionsLayer {
         output_size: XY<u32>,
         constants: &RenderVideoConstants,
     ) {
-        // Render captions if there are any caption segments to display
-        if let Some(caption_data) = &uniforms.project.captions
-            && caption_data.settings.enabled
-        {
-            // Find the current caption for this time
-            let current_time = segment_frames.segment_time;
-
-            if let Some(current_caption) =
-                find_caption_at_time_project(current_time, &caption_data.segments)
-            {
-                // Get caption text and time for use in rendering
-                let caption_text = current_caption.text.clone();
-
-                // Create settings for the caption
-                let settings = CaptionSettings {
-                    enabled: 1,
-                    font_size: caption_data.settings.size as f32,
-                    color: [
-                        parse_color_component(&caption_data.settings.color, 0),
-                        parse_color_component(&caption_data.settings.color, 1),
-                        parse_color_component(&caption_data.settings.color, 2),
-                        1.0,
-                    ],
-                    background_color: [
-                        parse_color_component(&caption_data.settings.background_color, 0),
-                        parse_color_component(&caption_data.settings.background_color, 1),
-                        parse_color_component(&caption_data.settings.background_color, 2),
-                        caption_data.settings.background_opacity as f32 / 100.0,
-                    ],
-                    position: match caption_data.settings.position.as_str() {
-                        "top" => 0,
-                        "middle" => 1,
-                        _ => 2, // default to bottom
-                    },
-                    outline: if caption_data.settings.outline { 1 } else { 0 },
-                    outline_color: [
-                        parse_color_component(&caption_data.settings.outline_color, 0),
-                        parse_color_component(&caption_data.settings.outline_color, 1),
-                        parse_color_component(&caption_data.settings.outline_color, 2),
-                        1.0,
-                    ],
-                    font: match caption_data.settings.font.as_str() {
-                        "System Serif" => 1,
-                        "System Monospace" => 2,
-                        _ => 0, // Default to SansSerif for "System Sans-Serif" and any other value
-                    },
-                    _padding: [0.0],
-                };
-
-                self.update_caption(Some(caption_text), current_time);
+        self.has_caption = false;
+        self.background_scissor = None;
+        self.output_size = (output_size.x, output_size.y);
+
+        let Some(caption_data) = &uniforms.project.captions else {
+            self.current_text = None;
+            return;
+        };
+
+        if !caption_data.settings.enabled {
+            self.current_text = None;
+            return;
+        }
 
-                if settings.enabled == 0 {
-                    return;
-                }
+        let current_time = segment_frames.segment_time;
+        let fade_duration = caption_data.settings.fade_duration;
 
-                if self.current_text.is_none() {
-                    return;
-                }
+        let Some(current_caption) =
+            find_caption_at_time_project(current_time, &caption_data.segments)
+        else {
+            self.current_text = None;
+            return;
+        };
 
-                if let Some(text) = &self.current_text {
-                    let (width, height) = (output_size.x, output_size.y);
+        self.update_caption(
+            Some(current_caption.text.clone()),
+            current_caption.start,
+            current_caption.end,
+        );
 
-                    // Access device and queue from the pipeline's constants
-                    let device = &constants.device;
-                    let queue = &constants.queue;
+        let raw_caption_text = self.current_text.clone().unwrap_or_default();
+        let caption_text = wrap_text_by_words(&raw_caption_text, MAX_WORDS_PER_LINE);
+        let caption_words = current_caption.words.clone();
+        let (fade_opacity, time_from_start, time_to_end) =
+            self.calculate_fade_opacity(current_time, fade_duration);
+        if fade_opacity <= 0.0 {
+            self.current_text = None;
+            return;
+        }
 
-                    // Find caption position based on settings
-                    let y_position = match settings.position {
-                        0 => height as f32 * 0.1,  // top
-                        1 => height as f32 * 0.5,  // middle
-                        _ => height as f32 * 0.85, // bottom (default)
-                    };
+        let bounce_offset =
+            calculate_bounce_offset(fade_opacity, time_from_start, time_to_end, fade_duration);
+
+        let (width, height) = (output_size.x, output_size.y);
+        let device = &constants.device;
+        let queue = &constants.queue;
+
+        let position = CaptionPosition::from_str(&caption_data.settings.position);
+        let margin = width as f32 * 0.05;
+
+        let base_color = [
+            parse_color_component(&caption_data.settings.color, 0),
+            parse_color_component(&caption_data.settings.color, 1),
+            parse_color_component(&caption_data.settings.color, 2),
+        ];
+
+        let highlight_color_rgb = [
+            parse_color_component(&caption_data.settings.highlight_color, 0),
+            parse_color_component(&caption_data.settings.highlight_color, 1),
+            parse_color_component(&caption_data.settings.highlight_color, 2),
+        ];
+
+        let outline_color_rgb = [
+            parse_color_component(&caption_data.settings.outline_color, 0),
+            parse_color_component(&caption_data.settings.outline_color, 1),
+            parse_color_component(&caption_data.settings.outline_color, 2),
+        ];
+
+        let background_color_rgb = [
+            parse_color_component(&caption_data.settings.background_color, 0),
+            parse_color_component(&caption_data.settings.background_color, 1),
+            parse_color_component(&caption_data.settings.background_color, 2),
+        ];
+
+        let background_alpha = ((caption_data.settings.background_opacity as f32 / 100.0)
+            * fade_opacity)
+            .clamp(0.0, 1.0);
+
+        let font_size = caption_data.settings.size as f32 * (height as f32 / 1080.0);
+        let metrics = Metrics::new(font_size, font_size * 1.2);
+
+        let mut updated_buffer = Buffer::new(&mut self.font_system, metrics);
+        let wrap_width = (width as f32 - margin * 2.0).max(font_size);
+        updated_buffer.set_size(&mut self.font_system, Some(wrap_width), None);
+        updated_buffer.set_wrap(&mut self.font_system, glyphon::Wrap::Word);
+
+        let font_family = match caption_data.settings.font.as_str() {
+            "System Serif" => Family::Serif,
+            "System Monospace" => Family::Monospace,
+            _ => Family::SansSerif,
+        };
+
+        let weight = if caption_data.settings.bold {
+            Weight::BOLD
+        } else {
+            Weight::NORMAL
+        };
+
+        let base_alpha = (fade_opacity * BASE_TEXT_OPACITY).clamp(0.0, 1.0);
+        let highlight_alpha = fade_opacity.clamp(0.0, 1.0);
+
+        if !caption_words.is_empty() {
+            let current_word_idx = caption_words
+                .iter()
+                .position(|w| current_time >= w.start && current_time < w.end);
+
+            let mut rich_text: Vec<(&str, Attrs)> = Vec::new();
+            let full_text = caption_text.as_str();
+            let mut last_end = 0usize;
+
+            for (idx, word) in caption_words.iter().enumerate() {
+                if let Some(start_pos) = full_text[last_end..].find(&word.text) {
+                    let abs_start = last_end + start_pos;
+
+                    if abs_start > last_end {
+                        let space = &full_text[last_end..abs_start];
+                        rich_text.push((
+                            space,
+                            Attrs::new()
+                                .family(font_family)
+                                .weight(weight)
+                                .color(Color::rgba(
+                                    (base_color[0] * 255.0) as u8,
+                                    (base_color[1] * 255.0) as u8,
+                                    (base_color[2] * 255.0) as u8,
+                                    (base_alpha * 255.0) as u8,
+                                )),
+                        ));
+                    }
 
-                    // Set up caption appearance
-                    let color = Color::rgb(
-                        (settings.color[0] * 255.0) as u8,
-                        (settings.color[1] * 255.0) as u8,
-                        (settings.color[2] * 255.0) as u8,
-                    );
-
-                    // Get outline color if needed
-                    let outline_color = Color::rgb(
-                        (settings.outline_color[0] * 255.0) as u8,
-                        (settings.outline_color[1] * 255.0) as u8,
-                        (settings.outline_color[2] * 255.0) as u8,
-                    );
-
-                    // Calculate text bounds
-                    let font_size = settings.font_size * (height as f32 / 1080.0); // Scale font size based on resolution
-                    let metrics = Metrics::new(font_size, font_size * 1.2); // 1.2 line height
-
-                    // Create a new buffer with explicit size for this frame
-                    let mut updated_buffer = Buffer::new(&mut self.font_system, metrics);
-
-                    // Set explicit width to enable proper text wrapping and centering
-                    // Set width to 90% of screen width for better appearance
-                    let text_width = width as f32 * 0.9;
-                    updated_buffer.set_size(&mut self.font_system, Some(text_width), None);
-                    updated_buffer.set_wrap(&mut self.font_system, glyphon::Wrap::Word);
-
-                    // Position text in the center horizontally
-                    // The bounds dictate the rendering area
-                    let bounds = TextBounds {
-                        left: ((width as f32 - text_width) / 2.0) as i32, // Center the text horizontally
-                        top: y_position as i32,
-                        right: ((width as f32 + text_width) / 2.0) as i32, // Center + width
-                        bottom: (y_position + font_size * 4.0) as i32, // Increased height for better visibility
+                    let is_current = Some(idx) == current_word_idx;
+                    let alpha = if is_current {
+                        highlight_alpha
+                    } else {
+                        base_alpha
                     };
 
-                    // Apply text styling directly when setting the text
-                    // Create text attributes with or without outline
-                    let font_family = match settings.font {
-                        0 => Family::SansSerif,
-                        1 => Family::Serif,
-                        2 => Family::Monospace,
-                        _ => Family::SansSerif, // Default to SansSerif for any other value
-                    };
-                    let attrs = Attrs::new().family(font_family).color(color);
-
-                    // Apply text to buffer
-                    updated_buffer.set_text(&mut self.font_system, text, &attrs, Shaping::Advanced);
-
-                    // Replace the existing buffer
-                    self.text_buffer = updated_buffer;
-
-                    // Update the viewport with explicit resolution
-                    self.viewport.update(queue, Resolution { width, height });
-
-                    // Background color
-                    let bg_color = if settings.background_color[3] > 0.01 {
-                        // Create a new text area with background color
-                        Color::rgba(
-                            (settings.background_color[0] * 255.0) as u8,
-                            (settings.background_color[1] * 255.0) as u8,
-                            (settings.background_color[2] * 255.0) as u8,
-                            (settings.background_color[3] * 255.0) as u8,
-                        )
+                    let color_rgb = if is_current {
+                        highlight_color_rgb
                     } else {
-                        Color::rgba(0, 0, 0, 0)
+                        base_color
                     };
 
-                    // Prepare text areas for rendering
-                    let mut text_areas = Vec::new();
-
-                    // Add background if enabled
-                    if settings.background_color[3] > 0.01 {
-                        text_areas.push(TextArea {
-                            buffer: &self.text_buffer,
-                            left: bounds.left as f32, // Match the bounds left for positioning
-                            top: y_position,
-                            scale: 1.0,
-                            bounds,
-                            default_color: bg_color,
-                            custom_glyphs: &[],
-                        });
-                    }
+                    let word_end = abs_start + word.text.len();
+                    rich_text.push((
+                        &full_text[abs_start..word_end],
+                        Attrs::new()
+                            .family(font_family)
+                            .weight(weight)
+                            .color(Color::rgba(
+                                (color_rgb[0] * 255.0) as u8,
+                                (color_rgb[1] * 255.0) as u8,
+                                (color_rgb[2] * 255.0) as u8,
+                                (alpha * 255.0) as u8,
+                            )),
+                    ));
+                    last_end = word_end;
+                }
+            }
 
-                    // Add outline if enabled (by rendering the text multiple times with slight offsets in different positions)
-                    if settings.outline == 1 {
-                        info!("Rendering with outline");
-                        // Outline is created by drawing the text multiple times with small offsets in different directions
-                        let outline_offsets = [
-                            (-1.0, -1.0),
-                            (0.0, -1.0),
-                            (1.0, -1.0),
-                            (-1.0, 0.0),
-                            (1.0, 0.0),
-                            (-1.0, 1.0),
-                            (0.0, 1.0),
-                            (1.0, 1.0),
-                        ];
-
-                        for (offset_x, offset_y) in outline_offsets.iter() {
-                            text_areas.push(TextArea {
-                                buffer: &self.text_buffer,
-                                left: bounds.left as f32 + offset_x, // Match bounds with small offset for outline
-                                top: y_position + offset_y,
-                                scale: 1.0,
-                                bounds,
-                                default_color: outline_color,
-                                custom_glyphs: &[],
-                            });
-                        }
-                    }
+            if last_end < full_text.len() {
+                rich_text.push((
+                    &full_text[last_end..],
+                    Attrs::new()
+                        .family(font_family)
+                        .weight(weight)
+                        .color(Color::rgba(
+                            (base_color[0] * 255.0) as u8,
+                            (base_color[1] * 255.0) as u8,
+                            (base_color[2] * 255.0) as u8,
+                            (base_alpha * 255.0) as u8,
+                        )),
+                ));
+            }
 
-                    // Add main text (rendered last, on top of everything)
-                    text_areas.push(TextArea {
-                        buffer: &self.text_buffer,
-                        left: bounds.left as f32, // Match the bounds left for positioning
-                        top: y_position,
-                        scale: 1.0,
-                        bounds,
-                        default_color: color,
-                        custom_glyphs: &[],
-                    });
-
-                    // Prepare text rendering
-                    match self.text_renderer.prepare(
-                        device,
-                        queue,
-                        &mut self.font_system,
-                        &mut self.text_atlas,
-                        &self.viewport,
-                        text_areas,
-                        &mut self.swash_cache,
-                    ) {
-                        Ok(_) => {}
-                        Err(e) => warn!("Error preparing text: {e:?}"),
-                    }
-                }
+            updated_buffer.set_rich_text(
+                &mut self.font_system,
+                rich_text,
+                &Attrs::new().family(font_family).weight(weight),
+                Shaping::Advanced,
+                None,
+            );
+        } else {
+            let color = Color::rgba(
+                (base_color[0] * 255.0) as u8,
+                (base_color[1] * 255.0) as u8,
+                (base_color[2] * 255.0) as u8,
+                (base_alpha * 255.0) as u8,
+            );
+            let attrs = Attrs::new().family(font_family).weight(weight).color(color);
+            updated_buffer.set_text(
+                &mut self.font_system,
+                caption_text.as_str(),
+                &attrs,
+                Shaping::Advanced,
+            );
+        }
+
+        let mut layout_width: f32 = 0.0;
+        let mut layout_height: f32 = 0.0;
+        for run in LayoutRunIter::new(&updated_buffer) {
+            layout_width = layout_width.max(run.line_w);
+            layout_height = layout_height.max(run.line_top + run.line_height);
+        }
+
+        if layout_height == 0.0 {
+            layout_height = font_size * 1.2;
+            layout_width = layout_width.max(font_size);
+        }
+
+        let available_width = (width as f32 - margin * 2.0).max(1.0);
+        let padding = font_size * 0.5;
+        let corner_radius = font_size * 0.55;
+        let text_width = layout_width.min(available_width);
+        let text_height = layout_height;
+        let box_width = (text_width + padding * 2.0).min(available_width).max(1.0);
+        let box_height = (text_height + padding * 2.0).min(height as f32).max(1.0);
+
+        let background_left = match position {
+            CaptionPosition::TopLeft | CaptionPosition::BottomLeft => margin,
+            CaptionPosition::TopRight | CaptionPosition::BottomRight => {
+                (width as f32 - margin - box_width).max(0.0)
+            }
+            _ => ((width as f32 - box_width) / 2.0).max(0.0),
+        };
+
+        let center_y = height as f32 * position.y_factor();
+        let base_background_top =
+            (center_y - box_height / 2.0).clamp(0.0, (height as f32 - box_height).max(0.0));
+        let background_top =
+            (base_background_top + bounce_offset).clamp(0.0, (height as f32 - box_height).max(0.0));
+
+        let text_left = background_left + padding;
+        let text_top = background_top + padding;
+
+        let bounds = TextBounds {
+            left: (text_left - 2.0).floor() as i32,
+            top: (text_top - 2.0).floor() as i32,
+            right: (text_left + text_width + 2.0).ceil() as i32,
+            bottom: (text_top + text_height + 2.0).ceil() as i32,
+        };
+
+        self.text_buffer = updated_buffer;
+        self.viewport.update(queue, Resolution { width, height });
+
+        let mut text_areas = Vec::new();
+
+        let outline_color = Color::rgba(
+            (outline_color_rgb[0] * 255.0) as u8,
+            (outline_color_rgb[1] * 255.0) as u8,
+            (outline_color_rgb[2] * 255.0) as u8,
+            (fade_opacity * 255.0) as u8,
+        );
+
+        if caption_data.settings.outline {
+            let outline_thickness = 1.2;
+            let outline_offsets = [
+                (-outline_thickness, -outline_thickness),
+                (0.0, -outline_thickness),
+                (outline_thickness, -outline_thickness),
+                (-outline_thickness, 0.0),
+                (outline_thickness, 0.0),
+                (-outline_thickness, outline_thickness),
+                (0.0, outline_thickness),
+                (outline_thickness, outline_thickness),
+                (-outline_thickness * 0.7, -outline_thickness * 0.7),
+                (outline_thickness * 0.7, -outline_thickness * 0.7),
+                (-outline_thickness * 0.7, outline_thickness * 0.7),
+                (outline_thickness * 0.7, outline_thickness * 0.7),
+            ];
+
+            for (offset_x, offset_y) in outline_offsets.iter() {
+                text_areas.push(TextArea {
+                    buffer: &self.text_buffer,
+                    left: text_left + offset_x,
+                    top: text_top + offset_y,
+                    scale: 1.0,
+                    bounds,
+                    default_color: outline_color,
+                    custom_glyphs: &[],
+                });
             }
         }
+
+        let default_color = Color::rgba(
+            (base_color[0] * 255.0) as u8,
+            (base_color[1] * 255.0) as u8,
+            (base_color[2] * 255.0) as u8,
+            (base_alpha * 255.0) as u8,
+        );
+
+        text_areas.push(TextArea {
+            buffer: &self.text_buffer,
+            left: text_left,
+            top: text_top,
+            scale: 1.0,
+            bounds,
+            default_color,
+            custom_glyphs: &[],
+        });
+
+        match self.text_renderer.prepare(
+            device,
+            queue,
+            &mut self.font_system,
+            &mut self.text_atlas,
+            &self.viewport,
+            text_areas,
+            &mut self.swash_cache,
+        ) {
+            Ok(_) => {}
+            Err(e) => warn!("Error preparing text: {e:?}"),
+        }
+
+        let rect = CaptionBackgroundUniforms {
+            rect: [
+                background_left.max(0.0),
+                background_top.max(0.0),
+                box_width,
+                box_height,
+            ],
+            color: [
+                background_color_rgb[0],
+                background_color_rgb[1],
+                background_color_rgb[2],
+                background_alpha,
+            ],
+            radius: corner_radius.min(box_width / 2.0).min(box_height / 2.0),
+            _padding: [0.0; 3],
+            _padding2: [0.0; 4],
+        };
+
+        queue.write_buffer(
+            &self.background_uniform_buffer,
+            0,
+            bytemuck::bytes_of(&rect),
+        );
+
+        let scissor_padding = 4.0;
+        let scissor_x = (background_left - scissor_padding).max(0.0).floor() as u32;
+        let scissor_y = (background_top - scissor_padding).max(0.0).floor() as u32;
+        let max_width = width.saturating_sub(scissor_x);
+        let max_height = height.saturating_sub(scissor_y);
+
+        if max_width == 0 || max_height == 0 {
+            self.has_caption = false;
+            return;
+        }
+
+        let scissor_width = (box_width + scissor_padding * 2.0)
+            .ceil()
+            .max(1.0)
+            .min(max_width as f32) as u32;
+        let scissor_height = (box_height + scissor_padding * 2.0)
+            .ceil()
+            .max(1.0)
+            .min(max_height as f32) as u32;
+
+        if scissor_width == 0 || scissor_height == 0 {
+            self.has_caption = false;
+            return;
+        }
+
+        self.background_scissor = Some([scissor_x, scissor_y, scissor_width, scissor_height]);
+        self.has_caption = true;
+    }
+
+    pub fn has_content(&self) -> bool {
+        self.has_caption
     }
 
-    /// Render the current caption to the frame
     pub fn render<'a>(&'a self, pass: &mut wgpu::RenderPass<'a>) {
+        if !self.has_caption {
+            return;
+        }
+
+        if let Some([x, y, width, height]) = self.background_scissor {
+            pass.set_scissor_rect(x, y, width, height);
+            pass.set_pipeline(&self.background_pipeline);
+            pass.set_bind_group(0, &self.background_bind_group, &[]);
+            pass.draw(0..6, 0..1);
+            pass.set_scissor_rect(x, y, width, height);
+        } else if self.output_size.0 > 0 && self.output_size.1 > 0 {
+            pass.set_scissor_rect(0, 0, self.output_size.0, self.output_size.1);
+        }
+
         match self
             .text_renderer
             .render(&self.text_atlas, &self.viewport, pass)
@@ -361,18 +721,19 @@ impl CaptionsLayer {
             Ok(_) => {}
             Err(e) => warn!("Error rendering text: {e:?}"),
         }
+
+        if self.output_size.0 > 0 && self.output_size.1 > 0 {
+            pass.set_scissor_rect(0, 0, self.output_size.0, self.output_size.1);
+        }
     }
 }
 
-/// Function to find the current caption segment based on playback time
 pub fn find_caption_at_time(time: f32, segments: &[CaptionSegment]) -> Option<&CaptionSegment> {
     segments
         .iter()
         .find(|segment| time >= segment.start && time < segment.end)
 }
 
-// Adding a new version that accepts cap_project::CaptionSegment
-/// Function to find the current caption segment from cap_project::CaptionSegment based on playback time
 pub fn find_caption_at_time_project(
     time: f32,
     segments: &[cap_project::CaptionSegment],
@@ -385,5 +746,14 @@ pub fn find_caption_at_time_project(
             start: segment.start,
             end: segment.end,
             text: segment.text.clone(),
+            words: segment
+                .words
+                .iter()
+                .map(|w| CaptionWord {
+                    text: w.text.clone(),
+                    start: w.start,
+                    end: w.end,
+                })
+                .collect(),
         })
 }
diff --git a/crates/rendering/src/layers/mask.rs b/crates/rendering/src/layers/mask.rs
new file mode 100644
index 0000000000..daf62fac40
--- /dev/null
+++ b/crates/rendering/src/layers/mask.rs
@@ -0,0 +1,235 @@
+use bytemuck::{Pod, Zeroable};
+use wgpu::util::DeviceExt;
+
+use crate::{PreparedMask, RenderSession};
+
+pub struct MaskLayer {
+    sampler: wgpu::Sampler,
+    uniforms_buffer: wgpu::Buffer,
+    pipeline: MaskPipeline,
+}
+
+impl MaskLayer {
+    pub fn new(device: &wgpu::Device) -> Self {
+        Self {
+            sampler: device.create_sampler(&wgpu::SamplerDescriptor {
+                address_mode_u: wgpu::AddressMode::ClampToEdge,
+                address_mode_v: wgpu::AddressMode::ClampToEdge,
+                address_mode_w: wgpu::AddressMode::ClampToEdge,
+                mag_filter: wgpu::FilterMode::Nearest,
+                min_filter: wgpu::FilterMode::Nearest,
+                mipmap_filter: wgpu::FilterMode::Nearest,
+                ..Default::default()
+            }),
+            uniforms_buffer: device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
+                label: Some("Mask Uniform Buffer"),
+                contents: bytemuck::cast_slice(&[MaskUniforms::default()]),
+                usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
+            }),
+            pipeline: MaskPipeline::new(device),
+        }
+    }
+
+    pub fn render(
+        &self,
+        device: &wgpu::Device,
+        queue: &wgpu::Queue,
+        session: &mut RenderSession,
+        encoder: &mut wgpu::CommandEncoder,
+        mask: &PreparedMask,
+    ) {
+        let uniforms = MaskUniforms::from_mask(mask);
+        queue.write_buffer(&self.uniforms_buffer, 0, bytemuck::cast_slice(&[uniforms]));
+
+        let bind_group = self.pipeline.bind_group(
+            device,
+            &self.uniforms_buffer,
+            session.current_texture_view(),
+            &self.sampler,
+        );
+
+        let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+            label: Some("Mask Pass"),
+            color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+                view: session.other_texture_view(),
+                resolve_target: None,
+                ops: wgpu::Operations {
+                    load: wgpu::LoadOp::Clear(wgpu::Color::TRANSPARENT),
+                    store: wgpu::StoreOp::Store,
+                },
+            })],
+            depth_stencil_attachment: None,
+            timestamp_writes: None,
+            occlusion_query_set: None,
+        });
+
+        pass.set_pipeline(&self.pipeline.render_pipeline);
+        pass.set_bind_group(0, &bind_group, &[]);
+        pass.draw(0..3, 0..1);
+
+        drop(pass);
+        session.swap_textures();
+    }
+}
+
+#[repr(C)]
+#[derive(Debug, Clone, Copy, Pod, Zeroable, PartialEq)]
+struct MaskUniforms {
+    rect_center: [f32; 2],
+    rect_size: [f32; 2],
+    feather: f32,
+    opacity: f32,
+    pixel_size: f32,
+    darkness: f32,
+    mode: u32,
+    padding0: u32,
+    output_size: [f32; 2],
+    padding1: [f32; 2],
+}
+
+impl Default for MaskUniforms {
+    fn default() -> Self {
+        Self::zeroed()
+    }
+}
+
+impl MaskUniforms {
+    fn from_mask(mask: &PreparedMask) -> Self {
+        Self {
+            rect_center: [mask.center.x, mask.center.y],
+            rect_size: [mask.size.x, mask.size.y],
+            feather: mask.feather,
+            opacity: mask.opacity,
+            pixel_size: mask.pixel_size,
+            darkness: mask.darkness,
+            mode: mask.mode_value(),
+            padding0: 0,
+            output_size: [mask.output_size.x as f32, mask.output_size.y as f32],
+            padding1: [0.0; 2],
+        }
+    }
+}
+
+pub struct MaskPipeline {
+    bind_group_layout: wgpu::BindGroupLayout,
+    render_pipeline: wgpu::RenderPipeline,
+}
+
+impl MaskPipeline {
+    pub fn new(device: &wgpu::Device) -> Self {
+        let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+            label: Some("Mask Bind Group Layout"),
+            entries: &[
+                wgpu::BindGroupLayoutEntry {
+                    binding: 0,
+                    visibility: wgpu::ShaderStages::VERTEX_FRAGMENT,
+                    ty: wgpu::BindingType::Buffer {
+                        ty: wgpu::BufferBindingType::Uniform,
+                        has_dynamic_offset: false,
+                        min_binding_size: None,
+                    },
+                    count: None,
+                },
+                wgpu::BindGroupLayoutEntry {
+                    binding: 1,
+                    visibility: wgpu::ShaderStages::FRAGMENT,
+                    ty: wgpu::BindingType::Texture {
+                        sample_type: wgpu::TextureSampleType::Float { filterable: true },
+                        view_dimension: wgpu::TextureViewDimension::D2,
+                        multisampled: false,
+                    },
+                    count: None,
+                },
+                wgpu::BindGroupLayoutEntry {
+                    binding: 2,
+                    visibility: wgpu::ShaderStages::FRAGMENT,
+                    ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
+                    count: None,
+                },
+            ],
+        });
+
+        let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
+            label: Some("Mask Shader"),
+            source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/mask.wgsl").into()),
+        });
+
+        let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+            label: Some("Mask Pipeline Layout"),
+            bind_group_layouts: &[&bind_group_layout],
+            push_constant_ranges: &[],
+        });
+
+        let render_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
+            label: Some("Mask Pipeline"),
+            layout: Some(&pipeline_layout),
+            vertex: wgpu::VertexState {
+                module: &shader,
+                entry_point: Some("vs_main"),
+                buffers: &[],
+                compilation_options: wgpu::PipelineCompilationOptions {
+                    constants: &[],
+                    zero_initialize_workgroup_memory: false,
+                },
+            },
+            fragment: Some(wgpu::FragmentState {
+                module: &shader,
+                entry_point: Some("fs_main"),
+                targets: &[Some(wgpu::ColorTargetState {
+                    format: wgpu::TextureFormat::Rgba8UnormSrgb,
+                    blend: Some(wgpu::BlendState::REPLACE),
+                    write_mask: wgpu::ColorWrites::ALL,
+                })],
+                compilation_options: wgpu::PipelineCompilationOptions {
+                    constants: &[],
+                    zero_initialize_workgroup_memory: false,
+                },
+            }),
+            primitive: wgpu::PrimitiveState {
+                topology: wgpu::PrimitiveTopology::TriangleList,
+                strip_index_format: None,
+                front_face: wgpu::FrontFace::Ccw,
+                cull_mode: Some(wgpu::Face::Back),
+                polygon_mode: wgpu::PolygonMode::Fill,
+                unclipped_depth: false,
+                conservative: false,
+            },
+            depth_stencil: None,
+            multisample: wgpu::MultisampleState::default(),
+            multiview: None,
+            cache: None,
+        });
+
+        Self {
+            bind_group_layout,
+            render_pipeline,
+        }
+    }
+
+    pub fn bind_group(
+        &self,
+        device: &wgpu::Device,
+        uniform_buffer: &wgpu::Buffer,
+        texture_view: &wgpu::TextureView,
+        sampler: &wgpu::Sampler,
+    ) -> wgpu::BindGroup {
+        device.create_bind_group(&wgpu::BindGroupDescriptor {
+            label: Some("Mask Bind Group"),
+            layout: &self.bind_group_layout,
+            entries: &[
+                wgpu::BindGroupEntry {
+                    binding: 0,
+                    resource: uniform_buffer.as_entire_binding(),
+                },
+                wgpu::BindGroupEntry {
+                    binding: 1,
+                    resource: wgpu::BindingResource::TextureView(texture_view),
+                },
+                wgpu::BindGroupEntry {
+                    binding: 2,
+                    resource: wgpu::BindingResource::Sampler(sampler),
+                },
+            ],
+        })
+    }
+}
diff --git a/crates/rendering/src/layers/mod.rs b/crates/rendering/src/layers/mod.rs
index 1469690f2a..536fbb3bf6 100644
--- a/crates/rendering/src/layers/mod.rs
+++ b/crates/rendering/src/layers/mod.rs
@@ -4,6 +4,8 @@ mod camera;
 mod captions;
 mod cursor;
 mod display;
+mod mask;
+mod text;
 
 pub use background::*;
 pub use blur::*;
@@ -11,3 +13,5 @@ pub use camera::*;
 pub use captions::*;
 pub use cursor::*;
 pub use display::*;
+pub use mask::*;
+pub use text::*;
diff --git a/crates/rendering/src/layers/text.rs b/crates/rendering/src/layers/text.rs
new file mode 100644
index 0000000000..aac7b846a9
--- /dev/null
+++ b/crates/rendering/src/layers/text.rs
@@ -0,0 +1,162 @@
+use glyphon::cosmic_text::Align;
+use glyphon::{
+    Attrs, Buffer, Cache, Color, Family, FontSystem, Metrics, Resolution, Shaping, Style,
+    SwashCache, TextArea, TextAtlas, TextBounds, TextRenderer, Viewport, Weight,
+};
+use log::warn;
+use wgpu::{Device, Queue};
+
+use crate::text::PreparedText;
+
+pub struct TextLayer {
+    font_system: FontSystem,
+    swash_cache: SwashCache,
+    text_atlas: TextAtlas,
+    text_renderer: TextRenderer,
+    viewport: Viewport,
+    buffers: Vec<Buffer>,
+}
+
+impl TextLayer {
+    pub fn new(device: &Device, queue: &Queue) -> Self {
+        let font_system = FontSystem::new();
+        let swash_cache = SwashCache::new();
+        let cache = Cache::new(device);
+        let viewport = Viewport::new(device, &cache);
+        let mut text_atlas =
+            TextAtlas::new(device, queue, &cache, wgpu::TextureFormat::Rgba8UnormSrgb);
+        let text_renderer = TextRenderer::new(
+            &mut text_atlas,
+            device,
+            wgpu::MultisampleState::default(),
+            None,
+        );
+
+        Self {
+            font_system,
+            swash_cache,
+            text_atlas,
+            text_renderer,
+            viewport,
+            buffers: Vec::new(),
+        }
+    }
+
+    pub fn prepare(
+        &mut self,
+        device: &Device,
+        queue: &Queue,
+        output_size: (u32, u32),
+        texts: &[PreparedText],
+    ) {
+        self.buffers.clear();
+        self.buffers.reserve(texts.len());
+        let mut text_area_data = Vec::with_capacity(texts.len());
+
+        for text in texts {
+            let color = Color::rgba(
+                (text.color[0].clamp(0.0, 1.0) * 255.0) as u8,
+                (text.color[1].clamp(0.0, 1.0) * 255.0) as u8,
+                (text.color[2].clamp(0.0, 1.0) * 255.0) as u8,
+                (text.color[3].clamp(0.0, 1.0) * 255.0) as u8,
+            );
+
+            let width = (text.bounds[2] - text.bounds[0]).max(1.0);
+            let height = (text.bounds[3] - text.bounds[1]).max(1.0);
+
+            let metrics = Metrics::new(text.font_size, text.font_size * 1.2);
+            let mut buffer = Buffer::new(&mut self.font_system, metrics);
+            buffer.set_size(&mut self.font_system, Some(width), Some(height));
+            buffer.set_wrap(&mut self.font_system, glyphon::Wrap::Word);
+
+            let family = match text.font_family.trim() {
+                "" => Family::SansSerif,
+                name => match name.to_ascii_lowercase().as_str() {
+                    "sans" | "sans-serif" | "system sans" | "system sans-serif" => {
+                        Family::SansSerif
+                    }
+                    "serif" | "system serif" => Family::Serif,
+                    "mono" | "monospace" | "system mono" | "system monospace" => Family::Monospace,
+                    _ => Family::Name(name.into()),
+                },
+            };
+            let weight = Weight(text.font_weight.round().clamp(100.0, 900.0) as u16);
+            let attrs = Attrs::new()
+                .family(family)
+                .color(color)
+                .weight(weight)
+                .style(if text.italic {
+                    Style::Italic
+                } else {
+                    Style::Normal
+                });
+
+            buffer.set_text(
+                &mut self.font_system,
+                &text.content,
+                &attrs,
+                Shaping::Advanced,
+            );
+
+            for line in buffer.lines.iter_mut() {
+                line.set_align(Some(Align::Center));
+            }
+
+            buffer.shape_until_scroll(&mut self.font_system, false);
+
+            let bounds = TextBounds {
+                left: text.bounds[0].floor() as i32,
+                top: text.bounds[1].floor() as i32,
+                right: (text.bounds[0] + width).ceil() as i32,
+                bottom: (text.bounds[1] + height).ceil() as i32,
+            };
+
+            self.buffers.push(buffer);
+            text_area_data.push((bounds, text.bounds[0], text.bounds[1], color));
+        }
+
+        let text_areas = self
+            .buffers
+            .iter()
+            .zip(text_area_data.into_iter())
+            .map(|(buffer, (bounds, left, top, color))| TextArea {
+                buffer,
+                left,
+                top,
+                scale: 1.0,
+                bounds,
+                default_color: color,
+                custom_glyphs: &[],
+            })
+            .collect::<Vec<_>>();
+
+        self.viewport.update(
+            queue,
+            Resolution {
+                width: output_size.0,
+                height: output_size.1,
+            },
+        );
+
+        if let Err(error) = self.text_renderer.prepare(
+            device,
+            queue,
+            &mut self.font_system,
+            &mut self.text_atlas,
+            &self.viewport,
+            text_areas,
+            &mut self.swash_cache,
+        ) {
+            warn!("Failed to prepare text: {error:?}");
+        }
+    }
+
+    pub fn render<'a>(&'a self, pass: &mut wgpu::RenderPass<'a>) {
+        if let Err(error) = self
+            .text_renderer
+            .render(&self.text_atlas, &self.viewport, pass)
+        {
+            warn!("Failed to render text: {error:?}");
+        }
+    }
+}
diff --git a/crates/rendering/src/lib.rs b/crates/rendering/src/lib.rs
index aa8fb3b583..756ba5bc40 100644
--- a/crates/rendering/src/lib.rs
+++ b/crates/rendering/src/lib.rs
@@ -1,7 +1,7 @@
 use anyhow::Result;
 use cap_project::{
     AspectRatio, CameraShape, CameraXPosition, CameraYPosition, ClipOffsets, CornerStyle, Crop,
-    CursorEvents, ProjectConfiguration, RecordingMeta, StudioRecordingMeta, XY,
+    CursorEvents, MaskKind, ProjectConfiguration, RecordingMeta, StudioRecordingMeta, XY,
 };
 use composite_frame::CompositeVideoFrameUniforms;
 use core::f64;
@@ -12,6 +12,7 @@ use futures::FutureExt;
 use futures::future::OptionFuture;
 use layers::{
     Background, BackgroundLayer, BlurLayer, CameraLayer, CaptionsLayer, CursorLayer, DisplayLayer,
+    MaskLayer, TextLayer,
 };
 use specta::Type;
 use spring_mass_damper::SpringMassDamperSimulationConfig;
@@ -26,9 +27,11 @@ mod cursor_interpolation;
 pub mod decoder;
 mod frame_pipeline;
 mod layers;
+mod mask;
 mod project_recordings;
 mod scene;
 mod spring_mass_damper;
+mod text;
 mod zoom;
 
 pub use coord::*;
@@ -36,7 +39,9 @@ pub use decoder::DecodedFrame;
 pub use frame_pipeline::RenderedFrame;
 pub use project_recordings::{ProjectRecordingsMeta, SegmentRecordings};
 
+use mask::interpolate_masks;
 use scene::*;
+use text::{PreparedText, prepare_texts};
 use zoom::*;
 
 const STANDARD_CURSOR_HEIGHT: f32 = 75.0;
@@ -54,6 +59,42 @@ pub struct RenderOptions {
     pub screen_size: XY<u32>,
 }
 
+#[derive(Debug, Clone, Copy)]
+pub enum MaskRenderMode {
+    Sensitive,
+    Highlight,
+}
+
+impl MaskRenderMode {
+    fn from_kind(kind: MaskKind) -> Self {
+        match kind {
+            MaskKind::Sensitive => MaskRenderMode::Sensitive,
+            MaskKind::Highlight => MaskRenderMode::Highlight,
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct PreparedMask {
+    pub center: XY<f32>,
+    pub size: XY<f32>,
+    pub feather: f32,
+    pub opacity: f32,
+    pub pixel_size: f32,
+    pub darkness: f32,
+    pub mode: MaskRenderMode,
+    pub output_size: XY<u32>,
+}
+
+impl PreparedMask {
+    fn mode_value(&self) -> u32 {
+        match self.mode {
+            MaskRenderMode::Sensitive => 0,
+            MaskRenderMode::Highlight => 1,
+        }
+    }
+}
+
 #[derive(Clone)]
 pub struct RecordingSegmentDecoders {
     screen: AsyncVideoDecoderHandle,
@@ -382,6 +423,8 @@ pub struct ProjectUniforms {
     pub resolution_base: XY<u32>,
     pub display_parent_motion_px: XY<f32>,
     pub motion_blur_amount: f32,
+    pub masks: Vec<PreparedMask>,
+    pub texts: Vec<PreparedText>,
 }
 
 #[derive(Debug, Clone)]
@@ -1421,6 +1464,31 @@ impl ProjectUniforms {
                 }
             });
 
+        let masks = project
+            .timeline
+            .as_ref()
+            .map(|timeline| {
+                interpolate_masks(
+                    XY::new(output_size.0, output_size.1),
+                    frame_time as f64,
+                    &timeline.mask_segments,
+                )
+            })
+            .unwrap_or_default();
+
+        let texts = project
+            .timeline
+            .as_ref()
+            .map(|timeline| {
+                prepare_texts(
+                    XY::new(output_size.0, output_size.1),
+                    frame_time as f64,
+                    &timeline.text_segments,
+                    &project.hidden_text_segments,
+                )
+            })
+            .unwrap_or_default();
+
         Self {
             output_size,
             cursor_size: project.cursor.size as f32,
@@ -1436,6 +1504,8 @@ impl ProjectUniforms {
             prev_cursor: prev_interpolated_cursor,
             display_parent_motion_px: display_motion_parent,
             motion_blur_amount: user_motion_blur,
+            masks,
+            texts,
         }
     }
 }
@@ -1500,7 +1570,8 @@ pub struct RendererLayers {
     cursor: CursorLayer,
     camera: CameraLayer,
     camera_only: CameraLayer,
-    #[allow(unused)]
+    mask: MaskLayer,
+    text: TextLayer,
     captions: CaptionsLayer,
 }
 
@@ -1513,6 +1584,8 @@ impl RendererLayers {
             cursor: CursorLayer::new(device),
             camera: CameraLayer::new(device),
             camera_only: CameraLayer::new(device),
+            mask: MaskLayer::new(device),
+            text: TextLayer::new(device, queue),
             captions: CaptionsLayer::new(device, queue),
         }
     }
@@ -1577,12 +1650,27 @@ impl RendererLayers {
             })(),
         );
 
+        self.text.prepare(
+            &constants.device,
+            &constants.queue,
+            uniforms.output_size,
+            &uniforms.texts,
+        );
+
+        self.captions.prepare(
+            uniforms,
+            segment_frames,
+            XY::new(uniforms.output_size.0, uniforms.output_size.1),
+            constants,
+        );
+
         Ok(())
     }
 
     pub fn render(
         &self,
         device: &wgpu::Device,
+        queue: &wgpu::Queue,
         encoder: &mut wgpu::CommandEncoder,
         session: &mut RenderSession,
         uniforms: &ProjectUniforms,
@@ -1645,6 +1733,22 @@ impl RendererLayers {
             let mut pass = render_pass!(session.current_texture_view(), wgpu::LoadOp::Load);
             self.camera.render(&mut pass);
         }
+
+        if !uniforms.masks.is_empty() {
+            for mask in &uniforms.masks {
+                self.mask.render(device, queue, session, encoder, mask);
+            }
+        }
+
+        if !uniforms.texts.is_empty() {
+            let mut pass = render_pass!(session.current_texture_view(), wgpu::LoadOp::Load);
+            self.text.render(&mut pass);
+        }
+
+        if self.captions.has_content() {
+            let mut pass = render_pass!(session.current_texture_view(), wgpu::LoadOp::Load);
+            self.captions.render(&mut pass);
+        }
     }
 }
 
@@ -1807,7 +1911,13 @@ async fn produce_frame(
         }),
     );
 
-    layers.render(&constants.device, &mut encoder, session, &uniforms);
+    layers.render(
+        &constants.device,
+        &constants.queue,
+        &mut encoder,
+        session,
+        &uniforms,
+    );
 
     finish_encoder(
         session,
diff --git a/crates/rendering/src/mask.rs b/crates/rendering/src/mask.rs
new file mode 100644
index 0000000000..b89e89c1cc
--- /dev/null
+++ b/crates/rendering/src/mask.rs
@@ -0,0 +1,126 @@
+use cap_project::{MaskKind, MaskScalarKeyframe, MaskSegment, MaskVectorKeyframe, XY};
+
+use crate::{MaskRenderMode, PreparedMask};
+
+fn interpolate_vector(base: XY<f64>, keys: &[MaskVectorKeyframe], time: f64) -> XY<f64> {
+    if keys.is_empty() {
+        return base;
+    }
+
+    let mut sorted = keys.to_vec();
+    sorted.sort_by(|a, b| {
+        a.time
+            .partial_cmp(&b.time)
+            .unwrap_or(std::cmp::Ordering::Equal)
+    });
+
+    if time <= sorted[0].time {
+        return XY::new(sorted[0].x, sorted[0].y);
+    }
+
+    for window in sorted.windows(2) {
+        let prev = &window[0];
+        let next = &window[1];
+        if time <= next.time {
+            let span = (next.time - prev.time).max(1e-6);
+            let t = ((time - prev.time) / span).clamp(0.0, 1.0);
+            let x = prev.x + (next.x - prev.x) * t;
+            let y = prev.y + (next.y - prev.y) * t;
+            return XY::new(x, y);
+        }
+    }
+
+    let last = sorted.last().unwrap();
+    XY::new(last.x, last.y)
+}
+
+fn interpolate_scalar(base: f64, keys: &[MaskScalarKeyframe], time: f64) -> f64 {
+    if keys.is_empty() {
+        return base;
+    }
+
+    let mut sorted = keys.to_vec();
+    sorted.sort_by(|a, b| {
+        a.time
+            .partial_cmp(&b.time)
+            .unwrap_or(std::cmp::Ordering::Equal)
+    });
+
+    if time <= sorted[0].time {
+        return sorted[0].value;
+    }
+
+    for window in sorted.windows(2) {
+        let prev = &window[0];
+        let next = &window[1];
+        if time <= next.time {
+            let span = (next.time - prev.time).max(1e-6);
+            let t = ((time - prev.time) / span).clamp(0.0, 1.0);
+            return prev.value + (next.value - prev.value) * t;
+        }
+    }
+
+    sorted.last().map(|k| k.value).unwrap_or(base)
+}
+
+pub fn interpolate_masks(
+    output_size: XY<u32>,
+    frame_time: f64,
+    segments: &[MaskSegment],
+) -> Vec<PreparedMask> {
+    let mut prepared = Vec::new();
+
+    for segment in segments.iter().filter(|s| s.enabled) {
+        if frame_time < segment.start || frame_time > segment.end {
+            continue;
+        }
+
+        let relative_time = (frame_time - segment.start).max(0.0);
+
+        let position =
+            interpolate_vector(segment.center, &segment.keyframes.position, relative_time);
+        let size = interpolate_vector(segment.size, &segment.keyframes.size, relative_time);
+        let mut intensity =
+            interpolate_scalar(segment.opacity, &segment.keyframes.intensity, relative_time);
+
+        if let MaskKind::Highlight = segment.mask_type {
+            let fade_duration = 0.15;
+            let time_since_start = (frame_time - segment.start).max(0.0);
+            let time_until_end = (segment.end - frame_time).max(0.0);
+
+            let fade_in = (time_since_start / fade_duration).min(1.0);
+            let fade_out = (time_until_end / fade_duration).min(1.0);
+
+            intensity *= fade_in * fade_out;
+        }
+
+        let clamped_size = XY::new(size.x.clamp(0.01, 2.0), size.y.clamp(0.01, 2.0));
+
+        let min_axis = clamped_size.x.min(clamped_size.y).abs();
+        let segment_feather = if let MaskKind::Highlight = segment.mask_type {
+            0.0
+        } else {
+            segment.feather
+        };
+        let feather = (min_axis * 0.5 * segment_feather.max(0.0)).max(0.0001) as f32;
+
+        prepared.push(PreparedMask {
+            center: XY::new(
+                position.x.clamp(0.0, 1.0) as f32,
+                position.y.clamp(0.0, 1.0) as f32,
+            ),
+            size: XY::new(
+                clamped_size.x.clamp(0.0, 2.0) as f32,
+                clamped_size.y.clamp(0.0, 2.0) as f32,
+            ),
+            feather,
+            opacity: intensity.clamp(0.0, 1.0) as f32,
+            pixel_size: segment.pixelation.max(1.0) as f32,
+            darkness: segment.darkness.clamp(0.0, 1.0) as f32,
+            mode: MaskRenderMode::from_kind(segment.mask_type),
+            output_size,
+        });
+    }
+
+    prepared
+}
diff --git a/crates/rendering/src/shaders/caption_bg.wgsl b/crates/rendering/src/shaders/caption_bg.wgsl
new file mode 100644
index 0000000000..5d80c18b38
--- /dev/null
+++ b/crates/rendering/src/shaders/caption_bg.wgsl
@@ -0,0 +1,60 @@
+struct BackgroundUniforms {
+    rect: vec4<f32>,
+    color: vec4<f32>,
+    radius: f32,
+    _padding: vec3<f32>,
+};
+
+@group(0) @binding(0) var<uniform> uniforms: BackgroundUniforms;
+
+struct VertexOutput {
+    @builtin(position) position: vec4<f32>,
+};
+
+@vertex
+fn vs_main(@builtin(vertex_index) vertex_index: u32) -> VertexOutput {
+    var positions = array<vec2<f32>, 6>(
+        vec2<f32>(-1.0, -1.0),
+        vec2<f32>(1.0, -1.0),
+        vec2<f32>(-1.0, 1.0),
+        vec2<f32>(-1.0, 1.0),
+        vec2<f32>(1.0, -1.0),
+        vec2<f32>(1.0, 1.0),
+    );
+
+    var output: VertexOutput;
+    output.position = vec4<f32>(positions[vertex_index], 0.0, 1.0);
+    return output;
+}
+
+fn squircle_sdf(p: vec2<f32>, half_size: vec2<f32>, radius: f32) -> f32 {
+    let adjusted_half = half_size - vec2<f32>(radius);
+    let q = abs(p) - adjusted_half;
+    
+    if q.x <= 0.0 && q.y <= 0.0 {
+        return max(q.x, q.y) - radius;
+    }
+    
+    let corner = max(q, vec2<f32>(0.0));
+    let n = 4.0;
+    let corner_dist = pow(pow(corner.x, n) + pow(corner.y, n), 1.0 / n);
+    
+    return corner_dist - radius;
+}
+
+@fragment
+fn fs_main(@builtin(position) position: vec4<f32>) -> @location(0) vec4<f32> {
+    let rect_min = uniforms.rect.xy;
+    let rect_size = uniforms.rect.zw;
+    let rect_center = rect_min + rect_size * 0.5;
+    let radius = uniforms.radius;
+    let half_size = rect_size * 0.5;
+
+    let local = position.xy - rect_center;
+    let distance = squircle_sdf(local, half_size, radius);
+    
+    let edge_softness = 1.5;
+    let alpha = 1.0 - smoothstep(-edge_softness, edge_softness, distance);
+
+    return vec4<f32>(uniforms.color.rgb, uniforms.color.a * alpha);
+}
diff --git a/crates/rendering/src/shaders/mask.wgsl b/crates/rendering/src/shaders/mask.wgsl
new file mode 100644
index 0000000000..5269801cc7
--- /dev/null
+++ b/crates/rendering/src/shaders/mask.wgsl
@@ -0,0 +1,71 @@
+struct Uniforms {
+    rect_center: vec2<f32>,
+    rect_size: vec2<f32>,
+    feather: f32,
+    opacity: f32,
+    pixel_size: f32,
+    darkness: f32,
+    mode: u32,
+    padding0: u32,
+    output_size: vec2<f32>,
+    padding1: vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> uniforms: Uniforms;
+@group(0) @binding(1) var source_texture: texture_2d<f32>;
+@group(0) @binding(2) var source_sampler: sampler;
+
+struct VertexOutput {
+    @builtin(position) position: vec4<f32>,
+    @location(0) uv: vec2<f32>,
+}
+
+@vertex
+fn vs_main(@builtin(vertex_index) vertex_index: u32) -> VertexOutput {
+    var positions = array<vec2<f32>, 3>(
+        vec2<f32>(-1.0, -1.0),
+        vec2<f32>(3.0, -1.0),
+        vec2<f32>(-1.0, 3.0),
+    );
+
+    let pos = positions[vertex_index];
+    var out: VertexOutput;
+    out.position = vec4<f32>(pos, 0.0, 1.0);
+    out.uv = vec2<f32>(pos.x * 0.5 + 0.5, 1.0 - (pos.y * 0.5 + 0.5));
+    return out;
+}
+
+fn rect_mask(uv: vec2<f32>) -> f32 {
+    let half_size = uniforms.rect_size * 0.5;
+    let delta = abs(uv - uniforms.rect_center) - half_size;
+    let outside = max(delta, vec2<f32>(0.0));
+    let outside_dist = length(outside);
+    let inside_dist = min(max(delta.x, delta.y), 0.0);
+    let sdf = outside_dist + inside_dist;
+    let edge = max(uniforms.feather, 1e-4);
+    return clamp(smoothstep(0.0, edge, -sdf), 0.0, 1.0);
+}
+
+fn pixelate_sample(uv: vec2<f32>) -> vec4<f32> {
+    let px_size = max(uniforms.pixel_size, 1.0);
+    let cell = px_size / uniforms.output_size;
+    let snapped = floor(uv / cell) * cell + cell * 0.5;
+    return textureSample(source_texture, source_sampler, snapped);
+}
+
+@fragment
+fn fs_main(@location(0) uv: vec2<f32>) -> @location(0) vec4<f32> {
+    let base = textureSample(source_texture, source_sampler, uv);
+    let mask = rect_mask(uv);
+
+    if uniforms.mode == 0u {
+        let pixelated = pixelate_sample(uv);
+        let mix_amount = clamp(uniforms.opacity, 0.0, 1.0);
+        let effect = mix(base, pixelated, mix_amount);
+        return mix(base, effect, mask * mix_amount);
+    }
+
+    let darkness = clamp(uniforms.darkness * uniforms.opacity, 0.0, 1.0);
+    let outside = vec4<f32>(base.rgb * (1.0 - darkness), base.a);
+    return mix(outside, base, mask);
+}
diff --git a/crates/rendering/src/text.rs b/crates/rendering/src/text.rs
new file mode 100644
index 0000000000..944c904f47
--- /dev/null
+++ b/crates/rendering/src/text.rs
@@ -0,0 +1,85 @@
+use cap_project::{TextSegment, XY};
+
+const BASE_TEXT_HEIGHT: f64 = 0.2;
+
+#[derive(Debug, Clone)]
+pub struct PreparedText {
+    pub content: String,
+    pub bounds: [f32; 4],
+    pub color: [f32; 4],
+    pub font_family: String,
+    pub font_size: f32,
+    pub font_weight: f32,
+    pub italic: bool,
+}
+
+fn parse_color(hex: &str) -> [f32; 4] {
+    let color = hex.trim_start_matches('#');
+    if color.len() == 6 {
+        if let (Ok(r), Ok(g), Ok(b)) = (
+            u8::from_str_radix(&color[0..2], 16),
+            u8::from_str_radix(&color[2..4], 16),
+            u8::from_str_radix(&color[4..6], 16),
+        ) {
+            return [r as f32 / 255.0, g as f32 / 255.0, b as f32 / 255.0, 1.0];
+        }
+    }
+
+    [1.0, 1.0, 1.0, 1.0]
+}
+
+pub fn prepare_texts(
+    output_size: XY<u32>,
+    frame_time: f64,
+    segments: &[TextSegment],
+    hidden_indices: &[usize],
+) -> Vec<PreparedText> {
+    let mut prepared = Vec::new();
+    let height_scale = if output_size.y == 0 {
+        1.0
+    } else {
+        output_size.y as f32 / 1080.0
+    };
+
+    for (i, segment) in segments.iter().enumerate() {
+        if !segment.enabled || hidden_indices.contains(&i) {
+            continue;
+        }
+
+        if frame_time < segment.start || frame_time > segment.end {
+            continue;
+        }
+
+        let center = XY::new(
+            segment.center.x.clamp(0.0, 1.0),
+            segment.center.y.clamp(0.0, 1.0),
+        );
+        let size = XY::new(
+            segment.size.x.clamp(0.01, 2.0),
+            segment.size.y.clamp(0.01, 2.0),
+        );
+        let size_scale = (size.y / BASE_TEXT_HEIGHT).clamp(0.25, 4.0) as f32;
+
+        let width = (size.x * output_size.x as f64).max(1.0) as f32;
+        let height = (size.y * output_size.y as f64).max(1.0) as f32;
+        let half_w = width / 2.0;
+        let half_h = height / 2.0;
+
+        let left = (center.x as f32 * output_size.x as f32 - half_w).max(0.0);
+        let top = (center.y as f32 * output_size.y as f32 - half_h).max(0.0);
+        let right = (left + width).min(output_size.x as f32);
+        let bottom = (top + height).min(output_size.y as f32);
+
+        prepared.push(PreparedText {
+            content: segment.content.clone(),
+            bounds: [left, top, right, bottom],
+            color: parse_color(&segment.color),
+            font_family: segment.font_family.clone(),
+            font_size: (segment.font_size * size_scale).max(1.0) * height_scale,
+            font_weight: segment.font_weight,
+            italic: segment.italic,
+        });
+    }
+
+    prepared
+}
diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml
index c1b9db8e14..66c180a553 100644
--- a/crates/utils/Cargo.toml
+++ b/crates/utils/Cargo.toml
@@ -8,13 +8,13 @@ nix = { version = "0.29.0", features = ["fs"] }
 
 [target.'cfg(windows)'.dependencies]
 windows = { version = "0.58.0", features = [
-	"Win32_Foundation",
-	"Win32_System",
-	"Win32_System_WindowsProgramming",
-	"Win32_Security",
-	"Win32_Storage_FileSystem",
-	"Win32_System_Pipes",
-	"Win32_System_Diagnostics_Debug",
+    "Win32_Foundation",
+    "Win32_System",
+    "Win32_System_WindowsProgramming",
+    "Win32_Security",
+    "Win32_Storage_FileSystem",
+    "Win32_System_Pipes",
+    "Win32_System_Diagnostics_Debug",
 ] }
 windows-sys = "0.52.0"
 
@@ -27,7 +27,11 @@ serde_json = "1.0"
 flume = "0.11.0"
 tracing.workspace = true
 directories = "5.0"
+aho-corasick.workspace = true
 workspace-hack = { version = "0.1", path = "../workspace-hack" }
 
+[dev-dependencies]
+tempfile = "3"
+
 [lints]
 workspace = true
diff --git a/crates/utils/src/lib.rs b/crates/utils/src/lib.rs
index 73ef3f41db..7b0ed42f28 100644
--- a/crates/utils/src/lib.rs
+++ b/crates/utils/src/lib.rs
@@ -1,5 +1,12 @@
-use std::{future::Future, path::PathBuf};
+use std::{
+    borrow::Cow,
+    future::Future,
+    num::{NonZero, NonZeroI32},
+    path::PathBuf,
+    sync::LazyLock,
+};
 
+use aho_corasick::{AhoCorasickBuilder, MatchKind};
 use tracing::Instrument;
 
 /// Wrapper around tokio::spawn that inherits the current tracing subscriber and span.
@@ -16,3 +23,389 @@ pub fn ensure_dir(path: &PathBuf) -> Result<PathBuf, std::io::Error> {
     std::fs::create_dir_all(path)?;
     Ok(path.clone())
 }
+
+/// Generates a unique filename by appending incremental numbers if conflicts exist.
+///
+/// This function takes a base filename and ensures it's unique by appending `(1)`, `(2)`, etc.
+/// if a file with the same name already exists. It works with any file extension.
+///
+/// # Arguments
+///
+/// * `base_filename` - The desired filename (with extension)
+/// * `parent_dir` - The directory where the file should be created
+///
+/// # Returns
+///
+/// Returns the unique filename that doesn't conflict with existing files.
+///
+/// # Example
+///
+/// ```rust
+/// let unique_name = ensure_unique_filename("My Recording.cap", &recordings_dir,);
+/// // If "My Recording.cap" exists, returns "My Recording (1).cap"
+/// // If that exists too, returns "My Recording (2).cap", etc.
+///
+/// let unique_name = ensure_unique_filename("document.pdf", &documents_dir);
+/// // If "document.pdf" exists, returns "document (1).pdf"
+/// ```
+#[inline]
+pub fn ensure_unique_filename(
+    base_filename: &str,
+    parent_dir: &std::path::Path,
+) -> Result<String, String> {
+    const DEFAULT_MAX_ATTEMPTS: NonZero<i32> = NonZero::new(50).unwrap();
+    ensure_unique_filename_with_attempts(base_filename, parent_dir, DEFAULT_MAX_ATTEMPTS)
+}
+
+pub fn ensure_unique_filename_with_attempts(
+    base_filename: &str,
+    parent_dir: &std::path::Path,
+    attempts: NonZeroI32,
+) -> Result<String, String> {
+    if base_filename.contains('/') || base_filename.contains('\\') {
+        return Err("Filename cannot contain path separators".to_string());
+    }
+
+    let initial_path = parent_dir.join(base_filename);
+
+    if !initial_path.exists() {
+        return Ok(base_filename.to_string());
+    }
+
+    let path = std::path::Path::new(base_filename);
+    let (name_without_ext, extension) = if let Some(ext) = path.extension() {
+        let name_without_ext = path
+            .file_stem()
+            .and_then(|s| s.to_str())
+            .unwrap_or(base_filename);
+        let extension = format!(".{}", ext.to_string_lossy());
+        (name_without_ext, extension)
+    } else {
+        (base_filename, String::new())
+    };
+
+    let max_attempts = attempts.get();
+    let mut counter = 1;
+
+    loop {
+        let numbered_filename = if extension.is_empty() {
+            format!("{} ({})", name_without_ext, counter)
+        } else {
+            format!("{} ({}){}", name_without_ext, counter, &extension)
+        };
+
+        let test_path = parent_dir.join(&numbered_filename);
+
+        if !test_path.exists() {
+            return Ok(numbered_filename);
+        }
+
+        counter += 1;
+
+        // prevent infinite loop
+        if counter > max_attempts {
+            return Err(
+                "Too many filename conflicts, unable to create unique filename".to_string(),
+            );
+        }
+    }
+}
+
+/// Converts moment-style template format strings to chrono format strings.
+///
+/// This function translates a custom subset of date/time patterns to chrono format specifiers.
+///
+/// **Note**: This is NOT fully compatible with moment.js. Notably, `DDD`/`DDDD` map to
+/// weekday names here, whereas in moment.js they represent day-of-year. Day-of-year and
+/// ISO week tokens are not supported.
+///
+/// # Supported Format Patterns
+///
+/// ## Year
+/// - `YYYY` → `%Y` - Year with century (e.g., 2025)
+/// - `YY` → `%y` - Year without century (e.g., 25)
+///
+/// ## Month
+/// - `MMMM` → `%B` - Full month name (e.g., January)
+/// - `MMM` → `%b` - Abbreviated month name (e.g., Jan)
+/// - `MM` → `%m` - Month as zero-padded number (01-12)
+/// - `M` → `%-m` - Month as number (1-12, no padding)
+///
+/// ## Day
+/// - `DDDD` → `%A` - Full weekday name (e.g., Monday)
+/// - `DDD` → `%a` - Abbreviated weekday name (e.g., Mon)
+/// - `DD` → `%d` - Day of month as zero-padded number (01-31)
+/// - `D` → `%-d` - Day of month as number (1-31, no padding)
+///
+/// ## Hour
+/// - `HH` → `%H` - Hour (24-hour) as zero-padded number (00-23)
+/// - `H` → `%-H` - Hour (24-hour) as number (0-23, no padding)
+/// - `hh` → `%I` - Hour (12-hour) as zero-padded number (01-12)
+/// - `h` → `%-I` - Hour (12-hour) as number (1-12, no padding)
+///
+/// ## Minute
+/// - `mm` → `%M` - Minute as zero-padded number (00-59)
+/// - `m` → `%-M` - Minute as number (0-59, no padding)
+///
+/// ## Second
+/// - `ss` → `%S` - Second as zero-padded number (00-59)
+/// - `s` → `%-S` - Second as number (0-59, no padding)
+///
+/// ## AM/PM
+/// - `A` → `%p` - AM/PM (uppercase)
+/// - `a` → `%P` - am/pm (lowercase)
+///
+/// ## Examples
+///
+/// ```
+/// // Basic formats
+/// YYYY-MM-DD HH:mm → %Y-%m-%d %H:%M
+/// // Output: "2025-01-15 14:30"
+///
+/// // Full month and day names
+/// MMMM DD, YYYY → %B %d, %Y
+/// // Output: "January 15, 2025"
+///
+/// // Abbreviated names
+/// DDD, MMM D, YYYY → %a, %b %-d, %Y
+/// // Output: "Mon, Jan 15, 2025"
+///
+/// // Compact format
+/// YYYYMMDD_HHmmss → %Y%m%d_%H%M%S
+/// // Output: "20250115_143045"
+///
+/// // 12-hour format with full names
+/// DDDD, MMMM DD at h:mm A → %A, %B %d at %-I:%M %p
+/// // Output: "Monday, January 15 at 2:30 PM"
+/// ```
+///
+/// # Note
+///
+/// Pattern matching is case-sensitive and processes longer patterns first to avoid
+/// conflicts (e.g., `MMMM` is matched before `MM`).
+pub fn moment_format_to_chrono(template_format: &str) -> Cow<'_, str> {
+    static AC: LazyLock<aho_corasick::AhoCorasick> = LazyLock::new(|| {
+        AhoCorasickBuilder::new()
+            // Use LeftmostLongest patterns to ensure overlapping shorter patterns won't also match.
+            .match_kind(MatchKind::LeftmostLongest)
+            .build([
+                "MMMM", "MMM", "MM", "M", "DDDD", "DDD", "DD", "D", "YYYY", "YY", "HH", "H", "hh",
+                "h", "mm", "m", "ss", "s", "A", "a",
+            ])
+            .expect("Failed to build AhoCorasick automaton")
+    });
+
+    if !AC.is_match(template_format) {
+        return Cow::Borrowed(template_format);
+    }
+
+    let replacements = [
+        "%B", "%b", "%m", "%-m", // Month
+        "%A", "%a", "%d", "%-d", // Day
+        "%Y", "%y", // Year
+        "%H", "%-H", // Hour (24)
+        "%I", "%-I", // Hour (12)
+        "%M", "%-M", // Minute
+        "%S", "%-S", // Second
+        "%p", "%P", // AM/PM
+    ];
+
+    let replaced = AC
+        .try_replace_all(template_format, &replacements)
+        .expect("AhoCorasick replace should never fail with default configuration");
+
+    Cow::Owned(replaced)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::fs;
+
+    // moment_format_to_chrono tests
+
+    #[test]
+    fn moment_format_converts_all_patterns() {
+        let input = "YYYY-MM-DD HH:mm:ss A a DDDD - DD - MMMM";
+        let out = moment_format_to_chrono(input);
+        let expected = "%Y-%m-%d %H:%M:%S %p %P %A - %d - %B";
+        assert_eq!(out, expected);
+    }
+
+    #[test]
+    fn moment_format_handles_overlapping_patterns() {
+        // MMMM should be matched before MMM, MM, M
+        assert_eq!(moment_format_to_chrono("MMMM"), "%B");
+        assert_eq!(moment_format_to_chrono("MMM"), "%b");
+        assert_eq!(moment_format_to_chrono("MM"), "%m");
+        assert_eq!(moment_format_to_chrono("M"), "%-m");
+
+        // DDDD should be matched before DDD, DD, D
+        assert_eq!(moment_format_to_chrono("DDDD"), "%A");
+        assert_eq!(moment_format_to_chrono("DDD"), "%a");
+        assert_eq!(moment_format_to_chrono("DD"), "%d");
+        assert_eq!(moment_format_to_chrono("D"), "%-d");
+    }
+
+    #[test]
+    fn moment_format_handles_adjacent_tokens() {
+        // No separator between tokens
+        assert_eq!(moment_format_to_chrono("YYYYMMDD"), "%Y%m%d");
+        assert_eq!(moment_format_to_chrono("HHmmss"), "%H%M%S");
+        assert_eq!(
+            moment_format_to_chrono("DDDDMMMMYYYYHHmmss"),
+            "%A%B%Y%H%M%S"
+        );
+    }
+
+    #[test]
+    fn moment_format_handles_12_and_24_hour() {
+        assert_eq!(moment_format_to_chrono("HH:mm"), "%H:%M"); // 24-hour
+        assert_eq!(moment_format_to_chrono("hh:mm A"), "%I:%M %p"); // 12-hour
+        assert_eq!(moment_format_to_chrono("H"), "%-H"); // No padding
+        assert_eq!(moment_format_to_chrono("h"), "%-I"); // No padding
+    }
+
+    #[test]
+    fn moment_format_handles_padding_variants() {
+        // Padded versions
+        assert_eq!(moment_format_to_chrono("DD"), "%d");
+        assert_eq!(moment_format_to_chrono("MM"), "%m");
+        assert_eq!(moment_format_to_chrono("HH"), "%H");
+
+        // Unpadded versions
+        assert_eq!(moment_format_to_chrono("D"), "%-d");
+        assert_eq!(moment_format_to_chrono("M"), "%-m");
+        assert_eq!(moment_format_to_chrono("H"), "%-H");
+    }
+
+    #[test]
+    fn moment_format_empty_string() {
+        let out = moment_format_to_chrono("");
+        match out {
+            Cow::Borrowed(s) => assert_eq!(s, ""),
+            Cow::Owned(_) => panic!("Expected Cow::Borrowed for empty string"),
+        }
+    }
+
+    // ensure_unique_filename tests
+
+    #[test]
+    fn unique_filename_when_no_conflict() {
+        let temp_dir = tempfile::tempdir().unwrap();
+        let result = ensure_unique_filename("test.cap", temp_dir.path()).unwrap();
+        assert_eq!(result, "test.cap");
+    }
+
+    #[test]
+    fn unique_filename_appends_counter_on_conflict() {
+        let temp_dir = tempfile::tempdir().unwrap();
+
+        // Create existing file
+        fs::write(temp_dir.path().join("test.cap"), "").unwrap();
+
+        let result = ensure_unique_filename("test.cap", temp_dir.path()).unwrap();
+        assert_eq!(result, "test (1).cap");
+    }
+
+    #[test]
+    fn unique_filename_increments_counter() {
+        let temp_dir = tempfile::tempdir().unwrap();
+
+        // Create existing files
+        fs::write(temp_dir.path().join("test.cap"), "").unwrap();
+        fs::write(temp_dir.path().join("test (1).cap"), "").unwrap();
+        fs::write(temp_dir.path().join("test (2).cap"), "").unwrap();
+
+        let result = ensure_unique_filename("test.cap", temp_dir.path()).unwrap();
+        assert_eq!(result, "test (3).cap");
+    }
+
+    #[test]
+    fn unique_filename_handles_no_extension() {
+        let temp_dir = tempfile::tempdir().unwrap();
+
+        fs::write(temp_dir.path().join("README"), "").unwrap();
+
+        let result = ensure_unique_filename("README", temp_dir.path()).unwrap();
+        assert_eq!(result, "README (1)");
+    }
+
+    #[test]
+    fn unique_filename_handles_multiple_dots() {
+        let temp_dir = tempfile::tempdir().unwrap();
+
+        fs::write(temp_dir.path().join("archive.tar.gz"), "").unwrap();
+
+        let result = ensure_unique_filename("archive.tar.gz", temp_dir.path()).unwrap();
+        // Only the last extension is considered
+        assert_eq!(result, "archive.tar (1).gz");
+    }
+
+    #[test]
+    fn unique_filename_respects_max_attempts() {
+        let temp_dir = tempfile::tempdir().unwrap();
+
+        // Create base file
+        fs::write(temp_dir.path().join("test.cap"), "").unwrap();
+
+        // Try with only 3 attempts
+        let attempts = NonZero::new(3).unwrap();
+
+        // Create conflicts for attempts 1, 2, 3
+        fs::write(temp_dir.path().join("test (1).cap"), "").unwrap();
+        fs::write(temp_dir.path().join("test (2).cap"), "").unwrap();
+        fs::write(temp_dir.path().join("test (3).cap"), "").unwrap();
+
+        let result = ensure_unique_filename_with_attempts("test.cap", temp_dir.path(), attempts);
+
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("Too many filename conflicts"));
+    }
+
+    #[test]
+    fn unique_filename_handles_directories_as_conflicts() {
+        let temp_dir = tempfile::tempdir().unwrap();
+
+        // Create a directory with the target name
+        fs::create_dir(temp_dir.path().join("test.cap")).unwrap();
+
+        let result = ensure_unique_filename("test.cap", temp_dir.path()).unwrap();
+        assert_eq!(result, "test (1).cap");
+    }
+
+    #[test]
+    fn unique_filename_handles_special_characters() {
+        let temp_dir = tempfile::tempdir().unwrap();
+
+        fs::write(temp_dir.path().join("My Recording (2024).cap"), "").unwrap();
+
+        let result = ensure_unique_filename("My Recording (2024).cap", temp_dir.path()).unwrap();
+        assert_eq!(result, "My Recording (2024) (1).cap");
+    }
+
+    #[test]
+    fn unique_filename_handles_spaces() {
+        let temp_dir = tempfile::tempdir().unwrap();
+
+        fs::write(temp_dir.path().join("My Project.cap"), "").unwrap();
+
+        let result = ensure_unique_filename("My Project.cap", temp_dir.path()).unwrap();
+        assert_eq!(result, "My Project (1).cap");
+    }
+
+    #[test]
+    fn unique_filename_finds_gap_in_sequence() {
+        let temp_dir = tempfile::tempdir().unwrap();
+
+        // Create files with a gap in numbering
+        fs::write(temp_dir.path().join("test.cap"), "").unwrap();
+        fs::write(temp_dir.path().join("test (1).cap"), "").unwrap();
+        // Gap: test (2).cap doesn't exist
+        fs::write(temp_dir.path().join("test (3).cap"), "").unwrap();
+
+        let result = ensure_unique_filename("test.cap", temp_dir.path()).unwrap();
+        // Should find the gap at (2)
+        assert_eq!(result, "test (2).cap");
+    }
+}
diff --git a/packages/ui-solid/src/auto-imports.d.ts b/packages/ui-solid/src/auto-imports.d.ts
index 930b3ac259..9708f6b361 100644
--- a/packages/ui-solid/src/auto-imports.d.ts
+++ b/packages/ui-solid/src/auto-imports.d.ts
@@ -8,6 +8,7 @@ export {}
 declare global {
   const IconCapArrows: typeof import('~icons/cap/arrows.jsx')['default']
   const IconCapAudioOn: typeof import('~icons/cap/audio-on.jsx')['default']
+  const IconCapAuto: typeof import('~icons/cap/auto.jsx')['default']
   const IconCapBgBlur: typeof import('~icons/cap/bg-blur.jsx')['default']
   const IconCapCamera: typeof import('~icons/cap/camera.jsx')['default']
   const IconCapCaptions: typeof import('~icons/cap/captions.jsx')['default']
@@ -67,8 +68,10 @@ declare global {
   const IconHugeiconsEaseCurveControlPoints: typeof import('~icons/hugeicons/ease-curve-control-points.jsx')['default']
   const IconLucideAlertTriangle: typeof import('~icons/lucide/alert-triangle.jsx')['default']
   const IconLucideBell: typeof import('~icons/lucide/bell.jsx')['default']
+  const IconLucideBoxSelect: typeof import('~icons/lucide/box-select.jsx')['default']
   const IconLucideBug: typeof import('~icons/lucide/bug.jsx')['default']
   const IconLucideCheck: typeof import('~icons/lucide/check.jsx')['default']
+  const IconLucideClapperboard: typeof import('~icons/lucide/clapperboard.jsx')['default']
   const IconLucideClock: typeof import('~icons/lucide/clock.jsx')['default']
   const IconLucideDatabase: typeof import('~icons/lucide/database.jsx')['default']
   const IconLucideEdit: typeof import('~icons/lucide/edit.jsx')['default']
@@ -91,6 +94,8 @@ declare global {
   const IconLucideSave: typeof import('~icons/lucide/save.jsx')['default']
   const IconLucideSearch: typeof import('~icons/lucide/search.jsx')['default']
   const IconLucideSquarePlay: typeof import('~icons/lucide/square-play.jsx')['default']
+  const IconLucideTimer: typeof import('~icons/lucide/timer.jsx')['default']
+  const IconLucideType: typeof import('~icons/lucide/type.jsx')['default']
   const IconLucideUnplug: typeof import('~icons/lucide/unplug.jsx')['default']
   const IconLucideVideo: typeof import('~icons/lucide/video.jsx')['default']
   const IconLucideVolume2: typeof import('~icons/lucide/volume2.jsx')['default']
diff --git a/packages/ui-solid/tailwind.config.js b/packages/ui-solid/tailwind.config.js
index 77adf26586..094a6bad66 100644
--- a/packages/ui-solid/tailwind.config.js
+++ b/packages/ui-solid/tailwind.config.js
@@ -89,12 +89,18 @@ module.exports = {
 			},
 			keyframes: {
 				"collapsible-down": {
-					from: { height: 0 },
-					to: { height: "var(--kb-collapsible-content-height)" },
+					from: { height: 0, filter: "blur(4px)" },
+					to: {
+						height: "var(--kb-collapsible-content-height)",
+						filter: "blur(0px)",
+					},
 				},
 				"collapsible-up": {
-					from: { height: "var(--kb-collapsible-content-height)" },
-					to: { height: 0 },
+					from: {
+						height: "var(--kb-collapsible-content-height)",
+						filter: "blur(0px)",
+					},
+					to: { height: 0, filter: "blur(4px)" },
 				},
 			},
 			animation: {
diff --git a/scripts/setup.js b/scripts/setup.js
index 235ce33af8..2ab6c109e1 100644
--- a/scripts/setup.js
+++ b/scripts/setup.js
@@ -230,7 +230,7 @@ async function signMacOSFrameworkLibs(frameworkDir) {
 					.map((entry) =>
 						exec(
 							`codesign ${keychain} -s "${signId}" -f "${path.join(
-								entry.parentPath,
+								entry.parentPath || entry.path,
 								entry.name,
 							)}"`,
 						),