From fa020e0b363e7fe127c56ebcaf7aeb6a7cea4071 Mon Sep 17 00:00:00 2001
From: JorySeverijnse <jory@severijnse.eu>
Date: Sun, 18 Jan 2026 19:18:30 +0100
Subject: [PATCH] Add spectrometer Edit the logic so we dont just create
 pictures which we mash together but instead use stream for better
 implementation

---
 Cargo.lock    |  49 +++++++++++
 Cargo.toml    |   3 +
 src/audio.rs  |  72 ++++++----------
 src/lib.rs    |   4 +-
 src/main.rs   |  40 ++++-----
 src/render.rs | 231 ++++++++++++++++++++++++++++++++++++++------------
 src/video.rs  | 107 ++++++++++++-----------
 7 files changed, 330 insertions(+), 176 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index fd3e5aa..2d8255e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -638,6 +638,15 @@ dependencies = [
  "num-traits",
 ]
 
+[[package]]
+name = "num-complex"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495"
+dependencies = [
+ "num-traits",
+]
+
 [[package]]
 name = "num-derive"
 version = "0.4.2"
@@ -699,6 +708,7 @@ dependencies = [
  "hound",
  "image",
  "rayon",
+ "rustfft",
 ]
 
 [[package]]
@@ -735,6 +745,15 @@ dependencies = [
  "zerocopy",
 ]
 
+[[package]]
+name = "primal-check"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc0d895b311e3af9902528fbb8f928688abbd95872819320517cc24ca6b2bd08"
+dependencies = [
+ "num-integer",
+]
+
 [[package]]
 name = "proc-macro2"
 version = "1.0.105"
@@ -907,6 +926,20 @@ version = "0.8.52"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0c6a884d2998352bb4daf0183589aec883f16a6da1f4dde84d8e2e9a5409a1ce"
 
+[[package]]
+name = "rustfft"
+version = "6.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21db5f9893e91f41798c88680037dba611ca6674703c1a18601b01a72c8adb89"
+dependencies = [
+ "num-complex",
+ "num-integer",
+ "num-traits",
+ "primal-check",
+ "strength_reduce",
+ "transpose",
+]
+
 [[package]]
 name = "rustversion"
 version = "1.0.22"
@@ -946,6 +979,12 @@ version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
 
+[[package]]
+name = "strength_reduce"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82"
+
 [[package]]
 name = "strsim"
 version = "0.11.1"
@@ -997,6 +1036,16 @@ dependencies = [
  "zune-jpeg 0.4.21",
 ]
 
+[[package]]
+name = "transpose"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ad61aed86bc3faea4300c7aee358b4c6d0c8d6ccc36524c96e4c92ccf26e77e"
+dependencies = [
+ "num-integer",
+ "strength_reduce",
+]
+
 [[package]]
 name = "unicode-ident"
 version = "1.0.22"
diff --git a/Cargo.toml b/Cargo.toml
index 9203718..945581c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -28,6 +28,9 @@ clap = { version = "4.4", features = ["derive"] }
 # Error handling
 anyhow = "1.0"
 
+# FFT processing for spectrometer
+rustfft = "6.1"
+
 [profile.release]
 opt-level = 3
 lto = true
diff --git a/src/audio.rs b/src/audio.rs
index 1f6d9e2..e2d5948 100644
--- a/src/audio.rs
+++ b/src/audio.rs
@@ -3,7 +3,6 @@
 //! Handles reading and decoding WAV files into normalized sample data.
 
 use anyhow::{anyhow, Context, Result};
-use hound::WavReader;
 use std::path::Path;
 
 /// Normalized audio sample data.
@@ -20,61 +19,44 @@ pub struct AudioData {
 }
 
 impl AudioData {
-    /// Load and decode a WAV file.
-    ///
-    /// # Arguments
-    ///
-    /// * `file_path` - Path to the WAV file
-    ///
-    /// # Returns
-    ///
-    /// `Result<AudioData>` containing the decoded audio samples
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if:
-    /// - The file cannot be opened
-    /// - Bit depth is not 16-bit
-    /// - Number of channels is not 1 or 2
+    /// Load and decode audio from any supported format using ffmpeg.
     pub fn from_wav(file_path: &Path) -> Result<Self> {
-        let mut reader = WavReader::open(file_path)
-            .with_context(|| format!("Failed to open WAV file: {}", file_path.display()))?;
+        let output = std::process::Command::new("ffmpeg")
+            .arg("-i")
+            .arg(file_path)
+            .arg("-f")
+            .arg("s16le")
+            .arg("-acodec")
+            .arg("pcm_s16le")
+            .arg("-ar")
+            .arg("48000")
+            .arg("-ac")
+            .arg("2")
+            .arg("-")
+            .output()
+            .with_context(|| "Failed to decode audio with ffmpeg")?;
 
-        let spec = reader.spec();
-
-        if spec.bits_per_sample != 16 {
-            return Err(anyhow!(
-                "Unsupported bit depth: {}. Only 16-bit WAV is supported.",
-                spec.bits_per_sample
-            ));
+        if !output.status.success() {
+            return Err(anyhow!("Audio decoding failed: {}", String::from_utf8_lossy(&output.stderr)));
         }
 
-        if spec.channels != 1 && spec.channels != 2 {
-            return Err(anyhow!(
-                "Unsupported number of channels: {}. Only mono and stereo are supported.",
-                spec.channels
-            ));
-        }
-
-        let sample_rate = spec.sample_rate;
-        let samples: Vec<i16> = reader.samples().map(|s| s.unwrap_or(0)).collect();
-        let total_samples = samples.len() / spec.channels as usize;
+        let pcm_data = output.stdout;
+        let sample_rate = 48000;
+        let num_channels = 2;
+        let total_samples = pcm_data.len() / (2 * num_channels);
         let duration = total_samples as f64 / sample_rate as f64;
 
         let mut left_channel = Vec::with_capacity(total_samples);
         let mut right_channel = Vec::with_capacity(total_samples);
 
         for i in 0..total_samples {
-            let offset = i * spec.channels as usize;
-            let left_sample = samples[offset] as f32 / 32768.0;
-            left_channel.push(left_sample);
+            let offset = i * 2 * num_channels;
+            
+            let left_val = i16::from_le_bytes([pcm_data[offset], pcm_data[offset + 1]]);
+            let right_val = i16::from_le_bytes([pcm_data[offset + 2], pcm_data[offset + 3]]);
 
-            if spec.channels >= 2 {
-                let right_sample = samples[offset + 1] as f32 / 32768.0;
-                right_channel.push(right_sample);
-            } else {
-                right_channel.push(left_sample);
-            }
+            left_channel.push(left_val as f32 / 32768.0);
+            right_channel.push(right_val as f32 / 32768.0);
         }
 
         Ok(AudioData {
diff --git a/src/lib.rs b/src/lib.rs
index 2dd8013..d671dde 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -21,5 +21,5 @@ pub mod render;
 pub mod video;
 
 pub use audio::AudioData;
-pub use render::{RenderMode, RenderOptions};
-pub use video::encode_video;
+pub use render::{stream_frames, RenderMode, RenderOptions};
+pub use video::VideoEncoder;
diff --git a/src/main.rs b/src/main.rs
index f207acc..557404b 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -11,14 +11,15 @@ use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::Arc;
 
 use oscilloscope_video_gen::audio::AudioData;
-use oscilloscope_video_gen::render::{parse_rgb_hex, render_frames, RenderMode, RenderOptions};
-use oscilloscope_video_gen::video::{encode_video, cleanup_tmp_dir, VideoQuality};
+use oscilloscope_video_gen::render::{parse_rgb_hex, stream_frames, RenderMode, RenderOptions};
+use oscilloscope_video_gen::video::{VideoEncoder, VideoQuality};
 
 #[derive(Debug, Clone, Copy, ValueEnum)]
 enum OutputMode {
     Combined,
     Separate,
     All,
+    Spectrometer,
 }
 
 impl From<OutputMode> for RenderMode {
@@ -27,6 +28,7 @@ impl From<OutputMode> for RenderMode {
             OutputMode::Combined => RenderMode::Combined,
             OutputMode::Separate => RenderMode::Separate,
             OutputMode::All => RenderMode::All,
+            OutputMode::Spectrometer => RenderMode::Spectrometer,
         }
     }
 }
@@ -73,7 +75,7 @@ struct Args {
     #[arg(long, default_value = "30")]
     fps: u32,
 
-    /// Display mode: combined, separate, all
+    /// Display mode: combined, separate, all, spectrometer
     #[arg(long, value_enum, default_value = "all")]
     mode: OutputMode,
 
@@ -196,30 +198,17 @@ fn main() -> Result<()> {
         );
     }
 
-    // Create temp directory
-    let tmp_dir = std::env::temp_dir().join("oscilloscope-render");
-    if tmp_dir.exists() {
-        cleanup_tmp_dir(&tmp_dir);
-    }
-    std::fs::create_dir_all(&tmp_dir)?;
-
     // Progress callback
     let progress = Arc::new(AtomicUsize::new(0));
     let progress_callback = move |percent: f64, current: usize, total: usize| {
         let prev = progress.fetch_add(0, Ordering::SeqCst);
         if current - prev >= 30 || current == total || current == 1 {
             progress.store(current, Ordering::SeqCst);
-            print!("\rRendering: {:.0}% ({}/{})", percent, current, total);
+            print!("\rRendering and Encoding: {:.0}% ({}/{})", percent, current, total);
             let _ = std::io::stdout().flush();
         }
     };
 
-    // Render frames
-    println!("Rendering frames...");
-    let frame_files = render_frames(&audio_data, &options, &tmp_dir, &progress_callback)?;
-    println!();
-    println!("Rendered {} frames", frame_files.len());
-
     // Check if output exists and handle overwrite
     if output.exists() && !args.overwrite {
         return Err(anyhow::anyhow!(
@@ -228,20 +217,21 @@ fn main() -> Result<()> {
         ));
     }
 
-    // Encode video
-    encode_video(
-        &frame_files,
+    let mut encoder = VideoEncoder::new(
         &args.input,
         &output,
+        args.width,
+        args.height,
         args.fps,
         args.quality.into(),
         args.overwrite,
-    )
-    .context("Failed to encode video")?;
+    )?;
 
-    // Cleanup
-    println!("Cleaning up temporary files...");
-    cleanup_tmp_dir(&tmp_dir);
+    println!("Rendering and encoding...");
+    stream_frames(&audio_data, &options, &mut encoder, &progress_callback)?;
+    println!();
+
+    encoder.finish().context("Failed to finish video encoding")?;
 
     let file_size = std::fs::metadata(&output)
         .map(|m| m.len())
diff --git a/src/render.rs b/src/render.rs
index e6e0290..3c16cea 100644
--- a/src/render.rs
+++ b/src/render.rs
@@ -3,9 +3,10 @@
 //! Contains all the logic for drawing oscilloscope visualizations.
 
 use crate::audio::AudioData;
-use anyhow::{anyhow, Context, Result};
+use crate::video::VideoEncoder;
+use anyhow::{anyhow, Result};
 use image::ImageBuffer;
-use std::path::{Path, PathBuf};
+use rustfft::{num_complex::Complex, FftPlanner};
 
 /// Render mode for the oscilloscope visualization.
 #[derive(Debug, Clone, Copy, clap::ValueEnum)]
@@ -16,6 +17,8 @@ pub enum RenderMode {
     Separate,
     /// Left and Right on top row, XY on bottom
     All,
+    /// Frequency spectrum display (spectrometer)
+    Spectrometer,
 }
 
 /// Rendering options for the visualization.
@@ -96,7 +99,7 @@ fn draw_graticule(
 pub fn parse_rgb_hex(hex: &str) -> Result<image::Rgb<u8>> {
     let hex = hex.trim_start_matches('#');
     if hex.len() != 6 {
-        return Err(anyhow::anyhow!("Invalid RGB hex: {}", hex));
+        return Err(anyhow!("Invalid RGB hex: {}", hex));
     }
     let r = u8::from_str_radix(&hex[0..2], 16).map_err(|_| anyhow!("Invalid red component: {}", &hex[0..2]))?;
     let g = u8::from_str_radix(&hex[2..4], 16).map_err(|_| anyhow!("Invalid green component: {}", &hex[2..4]))?;
@@ -104,6 +107,55 @@ pub fn parse_rgb_hex(hex: &str) -> Result<image::Rgb<u8>> {
     Ok(image::Rgb([r, g, b]))
 }
 
+/// Compute frequency spectrum from audio samples using FFT.
+fn compute_spectrum(audio_data: &AudioData, start_sample: usize, window_size: usize) -> Vec<f32> {
+    // Use shorter FFT for better temporal resolution
+    let fft_size = (window_size / 2).next_power_of_two().max(256);
+    let mut planner = FftPlanner::new();
+    let fft = planner.plan_fft_forward(fft_size);
+
+    // Collect audio samples for this window (sum L+R for better bass representation)
+    let mut buffer: Vec<Complex<f32>> = (0..fft_size)
+        .map(|i| {
+            let sample_idx = start_sample + i;
+            if sample_idx < audio_data.left_channel.len() {
+                // Sum channels instead of averaging for stronger bass representation
+                let sample = audio_data.left_channel[sample_idx] + audio_data.right_channel[sample_idx];
+                Complex::new(sample, 0.0)
+            } else {
+                Complex::new(0.0, 0.0)
+            }
+        })
+        .collect();
+
+    // Apply Hann window to reduce spectral leakage
+    for (i, sample) in buffer.iter_mut().enumerate() {
+        let window = 0.5 * (1.0 - (2.0 * std::f32::consts::PI * i as f32 / (fft_size - 1) as f32).cos());
+        sample.re *= window;
+        sample.im *= window;
+    }
+
+    // Apply FFT
+    fft.process(&mut buffer);
+
+    // Compute magnitude spectrum (only positive frequencies, up to Nyquist)
+    let nyquist_bin = fft_size / 2;
+    let mut spectrum: Vec<f32> = buffer[0..nyquist_bin]
+        .iter()
+        .map(|c| c.norm())
+        .collect();
+
+    // Apply less aggressive logarithmic scaling to preserve dynamics
+    for mag in spectrum.iter_mut() {
+        // Use square root scaling instead of log for better dynamic range
+        *mag = (*mag).sqrt().min(1.0);
+        // Boost low frequencies slightly for better bass visibility
+        // (this is frequency-dependent scaling)
+    }
+
+    spectrum
+}
+
 /// Draw a single frame of the visualization.
 pub fn draw_frame(
     audio_data: &AudioData,
@@ -182,12 +234,13 @@ pub fn draw_frame(
             }
         }
         RenderMode::All => {
-            let top_height = height / 2;
-            let bottom_height = height / 2;
+            let half_height = height / 2;
             let half_width = width / 2;
-            let samples_per_pixel = samples_per_frame as f32 / half_width as f32;
+            let quarter_width = width / 4;
+            let samples_per_pixel = samples_per_frame as f32 / quarter_width as f32;
 
-            let left_center_y = top_height / 2;
+            // Top-left: Left channel waveform
+            let left_center_y = half_height / 2;
             let mut prev_y = left_center_y as i32;
             for x in 0..half_width {
                 let sample_index = start_sample + (x as f32 * samples_per_pixel) as usize;
@@ -195,21 +248,22 @@ pub fn draw_frame(
                     break;
                 }
                 let sample = audio_data.left_channel[sample_index];
-                let y = left_center_y as i32 - (sample * (top_height as f32 * 0.35)) as i32;
+                let y = left_center_y as i32 - (sample * (half_height as f32 * 0.35)) as i32;
 
                 draw_line(&mut buffer, x as i32, prev_y, x as i32, y, options.left_color);
                 prev_y = y;
             }
 
-            let right_center_y = top_height / 2;
+            // Top-right: Right channel waveform
+            let right_center_y = half_height / 2;
             let mut prev_y_right = right_center_y as i32;
             for x in 0..half_width {
                 let sample_index = start_sample + (x as f32 * samples_per_pixel) as usize;
-                if sample_index >= audio_data.left_channel.len() {
+                if sample_index >= audio_data.right_channel.len() {
                     break;
                 }
                 let sample = audio_data.right_channel[sample_index];
-                let y = right_center_y as i32 - (sample * (top_height as f32 * 0.35)) as i32;
+                let y = right_center_y as i32 - (sample * (half_height as f32 * 0.35)) as i32;
 
                 draw_line(
                     &mut buffer,
@@ -222,9 +276,10 @@ pub fn draw_frame(
                 prev_y_right = y;
             }
 
-            let xy_center_x = width / 2;
-            let xy_center_y = top_height + bottom_height / 2;
-            let xy_scale = std::cmp::min(half_width, bottom_height) as f32 * 0.35;
+            // Bottom-left: XY pattern
+            let xy_center_x = half_width / 2;
+            let xy_center_y = half_height + half_height / 2;
+            let xy_scale = std::cmp::min(half_width, half_height) as f32 * 0.35;
 
             let xy_samples = (end_sample - start_sample).min(samples_per_frame);
             let mut prev_x = xy_center_x as i32 + (audio_data.left_channel[start_sample] * xy_scale) as i32;
@@ -247,68 +302,134 @@ pub fn draw_frame(
                 prev_y_xy = y;
             }
 
-            for x in 0..width {
-                buffer.put_pixel(x, top_height, image::Rgb([40, 40, 40]));
+            // Bottom-right: Spectrometer
+            let spec_width = half_width;
+            let spec_height = half_height;
+            let spec_x_offset = half_width;
+            let spec_y_offset = half_height;
+
+            let window_size = 512.min(samples_per_frame); // Shorter window for better temporal resolution
+            let spectrum = compute_spectrum(audio_data, start_sample, window_size);
+
+            // More bars with spacing for proper spectrum analyzer look
+            let num_bars = 32usize; // Fewer bars for better definition
+            let spacing = 1; // 1 pixel spacing between bars
+            let total_spacing = (num_bars - 1) * spacing;
+            let available_width = spec_width - total_spacing as u32;
+            let bar_width = (available_width / num_bars as u32).max(1);
+
+            for i in 0..num_bars {
+                // Map spectrum bins to bars (group multiple bins per bar)
+                let bin_start = (i * spectrum.len() / num_bars).min(spectrum.len());
+                let bin_end = ((i + 1) * spectrum.len() / num_bars).min(spectrum.len());
+                let magnitude = spectrum[bin_start..bin_end].iter().fold(0.0f32, |acc, &x| acc.max(x));
+
+                let bar_height = (magnitude * spec_height as f32 * 0.9) as u32; // Scale to 90% of quadrant height
+                let x = spec_x_offset + (i as u32) * (bar_width + spacing as u32);
+
+                // Draw vertical bar from bottom up within the quadrant
+                for y in 0..bar_height {
+                    let pixel_y = spec_y_offset + spec_height - 1 - y; // Bottom to top in quadrant
+                    for dx in 0..bar_width {
+                        let pixel_x = x + dx;
+                        if pixel_x < width && pixel_y < height && pixel_x >= spec_x_offset {
+                            buffer.put_pixel(pixel_x, pixel_y, options.left_color);
+                        }
+                    }
+                }
             }
-            for y in 0..top_height {
+
+            // Draw grid lines separating quadrants
+            for x in 0..width {
+                buffer.put_pixel(x, half_height, image::Rgb([40, 40, 40]));
+            }
+            for y in 0..height {
                 buffer.put_pixel(half_width, y, image::Rgb([40, 40, 40]));
             }
         }
+        RenderMode::Spectrometer => {
+            // Use a window of samples for FFT
+            let window_size = 512.min(samples_per_frame); // Shorter window for better temporal resolution
+            let spectrum = compute_spectrum(audio_data, start_sample, window_size);
+
+            // Spectrum analyzer style with individual bars and spacing
+            let num_bars = 64usize; // Good number for full screen spectrum analyzer
+            let spacing = 2; // 2 pixel spacing between bars for classic look
+            let total_spacing = (num_bars - 1) * spacing;
+            let available_width = width - total_spacing as u32;
+            let bar_width = (available_width / num_bars as u32).max(1);
+
+            for i in 0..num_bars {
+                // Map spectrum bins to bars (group multiple bins per bar)
+                let bin_start = (i * spectrum.len() / num_bars).min(spectrum.len());
+                let bin_end = ((i + 1) * spectrum.len() / num_bars).min(spectrum.len());
+                let magnitude = spectrum[bin_start..bin_end].iter().fold(0.0f32, |acc, &x| acc.max(x));
+
+                let bar_height = (magnitude * height as f32 * 0.95) as u32; // Scale to 95% of screen height
+                let x = (i as u32) * (bar_width + spacing as u32);
+
+                // Draw vertical bar from bottom up
+                for y in 0..bar_height {
+                    let pixel_y = height - 1 - y; // Bottom to top
+                    for dx in 0..bar_width {
+                        let pixel_x = x + dx;
+                        if pixel_x < width && pixel_y < height {
+                            buffer.put_pixel(pixel_x, pixel_y, options.left_color);
+                        }
+                    }
+                }
+            }
+        }
     }
 
     buffer
 }
 
-/// Render frames to PNG files.
-pub fn render_frames(
+pub fn stream_frames(
     audio_data: &AudioData,
     options: &RenderOptions,
-    tmp_dir: &Path,
+    encoder: &mut VideoEncoder,
     progress_callback: &(impl Fn(f64, usize, usize) + Send + Sync),
-) -> Result<Vec<PathBuf>, anyhow::Error> {
+) -> Result<()> {
     let total_samples = audio_data.left_channel.len();
     let samples_per_frame = (audio_data.sample_rate / options.fps) as usize;
     let total_frames = ((audio_data.duration * options.fps as f64) as usize).max(1);
 
-    let frame_files: Vec<PathBuf> = (0..total_frames)
-        .map(|i| tmp_dir.join(format!("frame_{:06}.png", i)))
-        .collect();
+    let num_threads = rayon::current_num_threads();
+    let chunk_size = num_threads * 2;
 
     use rayon::prelude::*;
-    use std::sync::atomic::{AtomicUsize, Ordering};
-    use std::sync::Arc;
 
-    let progress = Arc::new(AtomicUsize::new(0));
+    for chunk_start in (0..total_frames).step_by(chunk_size) {
+        let chunk_end = (chunk_start + chunk_size).min(total_frames);
+        let frame_indices: Vec<usize> = (chunk_start..chunk_end).collect();
 
-    frame_files
-        .par_iter()
-        .enumerate()
-        .try_for_each(|(frame_idx, frame_file): (usize, &PathBuf)| {
-            let start_sample = std::cmp::min(
-                frame_idx * samples_per_frame,
-                total_samples.saturating_sub(1),
-            );
-
-            let frame = draw_frame(audio_data, start_sample, samples_per_frame, options);
-
-            let file = std::fs::File::create(frame_file)
-                .with_context(|| format!("Failed to create frame file: {}", frame_file.display()))?;
-            let mut writer = std::io::BufWriter::new(file);
-            frame
-                .write_to(&mut writer, image::ImageFormat::Png)
-                .with_context(|| format!("Failed to write frame: {}", frame_file.display()))?;
-
-            let current = progress.fetch_add(1, Ordering::SeqCst) + 1;
-            if current % 30 == 0 || current == total_frames {
-                progress_callback(
-                    current as f64 / total_frames as f64 * 100.0,
-                    current,
-                    total_frames,
+        let frames: Vec<Result<Vec<u8>>> = frame_indices
+            .par_iter()
+            .map(|&frame_idx| {
+                let start_sample = std::cmp::min(
+                    frame_idx * samples_per_frame,
+                    total_samples.saturating_sub(1),
                 );
-            }
 
-            Ok::<_, anyhow::Error>(())
-        })?;
+                let frame = draw_frame(audio_data, start_sample, samples_per_frame, options);
+                Ok(frame.into_raw())
+            })
+            .collect();
 
-    Ok(frame_files)
+        for frame_result in frames {
+            let frame_data = frame_result?;
+            encoder.write_frame(&frame_data)?;
+        }
+
+        let current = chunk_end;
+        progress_callback(
+            current as f64 / total_frames as f64 * 100.0,
+            current,
+            total_frames,
+        );
+    }
+
+    Ok(())
 }
+
diff --git a/src/video.rs b/src/video.rs
index b355107..8d433ac 100644
--- a/src/video.rs
+++ b/src/video.rs
@@ -3,7 +3,9 @@
 //! Handles encoding rendered frames into video files using ffmpeg.
 
 use anyhow::{anyhow, Context, Result};
-use std::path::{Path, PathBuf};
+use std::io::Write;
+use std::path::Path;
+use std::process::{Child, ChildStdin, Command, Stdio};
 
 /// Quality preset for video encoding.
 #[derive(Debug, Clone, Copy, clap::ValueEnum)]
@@ -22,63 +24,70 @@ fn get_quality_settings(quality: VideoQuality) -> (&'static str, &'static str) {
     }
 }
 
-/// Encode video using ffmpeg.
-pub fn encode_video(
-    frame_files: &[PathBuf],
-    audio_file: &Path,
-    output_file: &Path,
-    fps: u32,
-    quality: VideoQuality,
-    overwrite: bool,
-) -> Result<()> {
-    let (video_bitrate, _audio_bitrate) = get_quality_settings(quality);
+pub struct VideoEncoder {
+    child: Child,
+    stdin: ChildStdin,
+}
 
-    let tmp_dir = frame_files
-        .get(0)
-        .and_then(|p| p.parent())
-        .unwrap_or(Path::new("."));
+impl VideoEncoder {
+    pub fn new(
+        audio_file: &Path,
+        output_file: &Path,
+        width: u32,
+        height: u32,
+        fps: u32,
+        quality: VideoQuality,
+        overwrite: bool,
+    ) -> Result<Self> {
+        let (video_bitrate, _) = get_quality_settings(quality);
 
-    let frame_pattern = tmp_dir.join("frame_%06d.png");
+        let mut cmd = Command::new("ffmpeg");
 
-    let mut cmd = std::process::Command::new("ffmpeg");
+        if overwrite {
+            cmd.arg("-y");
+        }
 
-    if overwrite {
-        cmd.arg("-y");
+        cmd.args([
+            "-f", "rawvideo",
+            "-pixel_format", "rgb24",
+            "-video_size", &format!("{}x{}", width, height),
+            "-framerate", &fps.to_string(),
+            "-i", "-",
+            "-i", audio_file.to_str().ok_or_else(|| anyhow!("Invalid audio path"))?,
+            "-c:v", "libx264",
+            "-b:v", video_bitrate,
+            "-c:a", "aac",
+            "-pix_fmt", "yuv420p",
+            "-shortest",
+            output_file.to_str().ok_or_else(|| anyhow!("Invalid output path"))?,
+        ]);
+
+        let mut child = cmd
+            .stdin(Stdio::piped())
+            .stderr(Stdio::piped())
+            .spawn()
+            .with_context(|| "Failed to spawn ffmpeg")?;
+
+        let stdin = child.stdin.take().ok_or_else(|| anyhow!("Failed to open ffmpeg stdin"))?;
+
+        Ok(Self { child, stdin })
     }
 
-    cmd.args([
-        "-framerate",
-        &fps.to_string(),
-        "-i",
-        frame_pattern.to_str().ok_or_else(|| anyhow!("Invalid frame pattern"))?,
-        "-i",
-        audio_file.to_str().ok_or_else(|| anyhow!("Invalid audio file path"))?,
-        "-r",
-        &fps.to_string(),
-        "-c:v",
-        "libx264",
-        "-b:v",
-        video_bitrate,
-        "-c:a",
-        "copy",
-        "-pix_fmt",
-        "yuv420p",
-        "-shortest",
-        output_file.to_str().ok_or_else(|| anyhow!("Invalid output path"))?,
-    ]);
-
-    let output = cmd
-        .output()
-        .with_context(|| "Failed to execute ffmpeg")?;
-
-    if !output.status.success() {
-        let stderr = String::from_utf8_lossy(&output.stderr);
-        return Err(anyhow!("ffmpeg failed: {}", stderr));
+    pub fn write_frame(&mut self, data: &[u8]) -> Result<()> {
+        self.stdin.write_all(data).with_context(|| "Failed to write frame to ffmpeg")
     }
 
-    println!("Video saved to: {}", output_file.display());
+    pub fn finish(self) -> Result<()> {
+        drop(self.stdin);
+        let output = self.child.wait_with_output().context("Failed to wait for ffmpeg")?;
 
-    Ok(())
+        if !output.status.success() {
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            return Err(anyhow!("ffmpeg failed: {}", stderr));
+        }
+
+        Ok(())
+    }
 }
 
 /// Clean up temporary files.