From 36a4f4120fac490a58ad54e5fde93d7b2bf0a30f Mon Sep 17 00:00:00 2001 From: RangeyRover Date: Sat, 16 May 2026 18:29:12 +0930 Subject: [PATCH 1/2] feat: implement per-app recording --- .gitignore | 4 + README.md | 2 +- audio_recorder.py | 127 ++++++++++++++--- gui.py | 102 ++++++++++++++ main.py | 6 +- process_utils.py | 82 +++++++++++ requirements.txt | 9 ++ .../checklists/requirements.md | 35 +++++ specs/001-per-app-recording/spec.md | 89 ++++++++++++ specs/001-per-app-recording/tasks.md | 45 ++++++ tests/__init__.py | 1 + tests/conftest.py | 1 + tests/test_audio_recorder.py | 133 ++++++++++++++++++ tests/test_process_utils.py | 68 +++++++++ 14 files changed, 685 insertions(+), 19 deletions(-) create mode 100644 process_utils.py create mode 100644 requirements.txt create mode 100644 specs/001-per-app-recording/checklists/requirements.md create mode 100644 specs/001-per-app-recording/spec.md create mode 100644 specs/001-per-app-recording/tasks.md create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/test_audio_recorder.py create mode 100644 tests/test_process_utils.py diff --git a/.gitignore b/.gitignore index f5cc44a..4e75766 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,7 @@ settings.json # IDE .vscode/ .idea/ +*.mp3 +*.wav +*.png +debug_com.py diff --git a/README.md b/README.md index 4c972b7..56163a5 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ It sits quietly in your system tray and is always ready with a single click or g ### Requirements - Python 3.12+ -- `pip install PyQt6 soundcard soundfile numpy lameenc keyboard` +- `pip install -r requirements.txt` ### Build from Source To create the standalone executable: diff --git a/audio_recorder.py b/audio_recorder.py index a7c67b4..774663b 100644 --- a/audio_recorder.py +++ b/audio_recorder.py @@ -35,18 +35,71 @@ def stop(self): self.stop_event.set() self.join() +class ProcTapRecorder(threading.Thread): + """ + Helper thread to record a specific application's audio to a WAV file using ProcTap. + """ + def __init__(self, pid, filepath): + super().__init__() + self.pid = pid + self.filepath = filepath + self.samplerate = 48000 + self.channels = 2 + self.stop_event = threading.Event() + self.error = None + self.max_amp_seen = 0.0 + self.chunks_read = 0 + + def run(self): + try: + import proctap + import soundfile as sf + import numpy as np + + print(f"[ProcTapRecorder] Starting capture for PID {self.pid}...") + capture = proctap.ProcessAudioCapture(self.pid) + capture.start() + + with sf.SoundFile(self.filepath, mode='w', samplerate=self.samplerate, channels=self.channels) as f_wav: + while not self.stop_event.is_set(): + data = capture.read(timeout=0.1) + if data: + np_data = np.frombuffer(data, dtype=np.float32).reshape(-1, self.channels) + f_wav.write(np_data) + + # Debugging amplitude + amp = np.max(np.abs(np_data)) + if amp > self.max_amp_seen: + self.max_amp_seen = amp + self.chunks_read += 1 + + capture.stop() + capture.close() + print(f"[ProcTapRecorder] Finished PID {self.pid}. Chunks: {self.chunks_read}, Max Amp: {self.max_amp_seen:.4f}") + if self.max_amp_seen == 0.0 and self.chunks_read > 0: + print(f"[ProcTapRecorder] WARNING: All captured chunks were perfect silence (0.0). Application may be bypassing WASAPI loopback.") + except Exception as e: + print(f"[ProcTapRecorder] Error: {e}") + self.error = str(e) + + def stop(self): + self.stop_event.set() + self.join() + class AudioRecorder(threading.Thread): """ Orchestrates recording from Microphone, Loopback, or Both. """ def __init__(self, mic_id, source_mode, output_folder, output_format="mp3", - normalize=False, on_finish_callback=None): + normalize=False, target_pid=None, speaker_id=None, on_finish_callback=None): super().__init__() self.mic_id = mic_id self.source_mode = source_mode # "mic", "loopback", "both" self.output_folder = output_folder self.output_format = output_format.lower() self.normalize = normalize + self.target_pid = target_pid + self.speaker_id = speaker_id self.callback = on_finish_callback self.recording = False @@ -58,19 +111,27 @@ def __init__(self, mic_id, source_mode, output_folder, output_format="mp3", self.temp_files = [] self.recorders = [] - def _get_device(self, is_loopback): + def _get_device(self, is_loopback=False): if is_loopback: - # For loopback, we try to find the default speaker's loopback - default_speaker = sc.default_speaker() + target_speaker_name = None + if self.speaker_id: + for speaker in sc.all_speakers(): + if speaker.id == self.speaker_id: + target_speaker_name = speaker.name + break + + if target_speaker_name is None: + target_speaker_name = sc.default_speaker().name + mics = sc.all_microphones(include_loopback=True) # Try exact name match - loopback_mic = next((m for m in mics if m.name == default_speaker.name), None) + loopback_mic = next((m for m in mics if m.name == target_speaker_name and m.isloopback), None) # Try fuzzy match if not loopback_mic: - loopback_mic = next((m for m in mics if default_speaker.name in m.name), None) + loopback_mic = next((m for m in mics if target_speaker_name in m.name and m.isloopback), None) if not loopback_mic: - raise Exception("Could not detect System Audio loopback device.") + raise Exception(f"Could not detect System Audio loopback device for '{target_speaker_name}'.") return loopback_mic else: return sc.get_microphone(self.mic_id, include_loopback=False) @@ -83,29 +144,53 @@ def run(self): try: # 1. Setup Recorders + is_per_app = self.target_pid is not None + + # Use root PID for the target to ensure we capture the whole tree (browser sandboxes) + actual_pid = self.target_pid + if is_per_app: + try: + from process_utils import get_root_pid + actual_pid = get_root_pid(self.target_pid) + print(f"Targeting root PID {actual_pid} (derived from selected {self.target_pid})") + except Exception as e: + print(f"Error resolving root PID: {e}") + + # If using ProcTap, its fixed sample rate is 48000. + # We must match this for hardware mics to avoid mixing different sample rates. + target_sr = 48000 if is_per_app else 44100 + if self.source_mode == "both": # Need two recorders dev_mic = self._get_device(is_loopback=False) - dev_loop = self._get_device(is_loopback=True) t1 = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name t2 = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name self.temp_files = [t1, t2] - self.recorders.append(RawRecorder(dev_mic, t1)) - self.recorders.append(RawRecorder(dev_loop, t2)) + self.recorders.append(RawRecorder(dev_mic, t1, samplerate=target_sr)) + + if is_per_app: + self.recorders.append(ProcTapRecorder(actual_pid, t2)) + else: + dev_loop = self._get_device(is_loopback=True) + self.recorders.append(RawRecorder(dev_loop, t2, samplerate=target_sr)) elif self.source_mode == "loopback": - dev = self._get_device(is_loopback=True) t1 = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name self.temp_files = [t1] - self.recorders.append(RawRecorder(dev, t1)) + + if is_per_app: + self.recorders.append(ProcTapRecorder(actual_pid, t1)) + else: + dev = self._get_device(is_loopback=True) + self.recorders.append(RawRecorder(dev, t1, samplerate=target_sr)) else: # mic dev = self._get_device(is_loopback=False) t1 = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name self.temp_files = [t1] - self.recorders.append(RawRecorder(dev, t1)) + self.recorders.append(RawRecorder(dev, t1, samplerate=target_sr)) print(f"Starting recording mode: {self.source_mode}") @@ -113,14 +198,22 @@ def run(self): for r in self.recorders: r.start() - # Wait for stop signal - self.stop_event.wait() + # Wait for stop signal or for a recorder to crash + while not self.stop_event.is_set(): + for r in self.recorders: + if not r.is_alive(): + # A recorder thread died prematurely + self.stop_event.set() + if r.error: + self.error_message = f"Recording stopped because the application closed. ({r.error})" + break + self.stop_event.wait(0.5) # 3. Stop Recording for r in self.recorders: r.stop() - if r.error: - raise Exception(f"Recorder error: {r.error}") + if r.error and not self.error_message: + self.error_message = f"Recording stopped because the application closed. ({r.error})" # 4. Mix/Process if len(self.temp_files) == 2: diff --git a/gui.py b/gui.py index 5c0b485..1d55541 100644 --- a/gui.py +++ b/gui.py @@ -11,8 +11,10 @@ from PyQt6.QtCore import pyqtSignal, QObject, Qt, QUrl, QMimeData, QDir import soundcard as sc import keyboard +import psutil from audio_recorder import AudioRecorder, get_devices from clipboard_utils import copy_file_to_clipboard +from process_utils import get_active_applications CONFIG_FILE = "settings.json" @@ -108,6 +110,36 @@ def init_ui(self): container.setLayout(layout) self.setCentralWidget(container) + # Capture Target Selection + group_source = QGroupBox("Capture Target (Output Source)") + layout_source = QVBoxLayout() + + self.combo_source = QComboBox() + self.combo_source.addItems(["Hardware Output Device", "Specific App"]) + self.combo_source.currentIndexChanged.connect(self.on_source_changed) + layout_source.addWidget(self.combo_source) + + layout_hw = QHBoxLayout() + self.combo_hardware = QComboBox() + self.btn_refresh_hw = QPushButton("Refresh Devices") + self.btn_refresh_hw.clicked.connect(self.refresh_hw) + layout_hw.addWidget(self.combo_hardware) + layout_hw.addWidget(self.btn_refresh_hw) + layout_source.addLayout(layout_hw) + + layout_app = QHBoxLayout() + self.combo_app = QComboBox() + self.combo_app.setEnabled(False) + self.btn_refresh_app = QPushButton("Refresh Apps") + self.btn_refresh_app.setEnabled(False) + self.btn_refresh_app.clicked.connect(self.refresh_apps) + layout_app.addWidget(self.combo_app) + layout_app.addWidget(self.btn_refresh_app) + layout_source.addLayout(layout_app) + + group_source.setLayout(layout_source) + layout.addWidget(group_source) + # Microphone group_mic = QGroupBox("Input Device") layout_mic = QVBoxLayout() @@ -181,6 +213,7 @@ def init_ui(self): btn_save.clicked.connect(self.save_settings) layout.addWidget(btn_save) + self.refresh_hw() self.refresh_devices() def refresh_devices(self): @@ -197,6 +230,42 @@ def refresh_devices(self): except Exception as e: print(f"Error refreshing devices: {e}") + def refresh_hw(self): + self.combo_hardware.clear() + try: + speakers = get_devices(include_loopback=True) + default_spk = sc.default_speaker() + default_index = 0 + for i, s in enumerate(speakers): + self.combo_hardware.addItem(f"{s['name']}", s['id']) + if s['id'] == default_spk.id: + default_index = i + if self.combo_hardware.count() > 0: + self.combo_hardware.setCurrentIndex(default_index) + except Exception as e: + print(f"Error refreshing hardware devices: {e}") + + def on_source_changed(self, index): + is_app = (self.combo_source.currentText() == "Specific App") + self.combo_app.setEnabled(is_app) + self.btn_refresh_app.setEnabled(is_app) + self.combo_hardware.setEnabled(not is_app) + self.btn_refresh_hw.setEnabled(not is_app) + + if is_app and self.combo_app.count() == 0: + self.refresh_apps() + if not is_app and self.combo_hardware.count() == 0: + self.refresh_hw() + + def refresh_apps(self): + self.combo_app.clear() + try: + apps = get_active_applications() + for app in apps: + self.combo_app.addItem(f"{app['title']} ({app['name']})", app['pid']) + except Exception as e: + print(f"Error refreshing apps: {e}") + def browse_folder(self): folder = QFileDialog.getExistingDirectory(self, "Select Output Folder", options=QFileDialog.Option.DontUseNativeDialog) if folder: @@ -217,6 +286,22 @@ def load_settings(self): idx = self.combo_mic.findData(saved_id) if idx >= 0: self.combo_mic.setCurrentIndex(idx) + source_mode = data.get("source", "Hardware Output Device") + if source_mode == "System-Wide": source_mode = "Hardware Output Device" + source_idx = self.combo_source.findText(source_mode) + if source_idx >= 0: self.combo_source.setCurrentIndex(source_idx) + + saved_hw_id = data.get("speaker_id") + if saved_hw_id and source_mode == "Hardware Output Device": + idx = self.combo_hardware.findData(saved_hw_id) + if idx >= 0: self.combo_hardware.setCurrentIndex(idx) + + saved_pid = data.get("target_pid") + if saved_pid and source_mode == "Specific App": + self.refresh_apps() + pid_idx = self.combo_app.findData(saved_pid) + if pid_idx >= 0: self.combo_app.setCurrentIndex(pid_idx) + mode = data.get("tray_click_mode", "Last Used") mode_idx = self.combo_left_click.findText(mode) if mode_idx >= 0: self.combo_left_click.setCurrentIndex(mode_idx) @@ -245,6 +330,9 @@ def save_settings(self): def get_settings(self): return { + "source": self.combo_source.currentText(), + "target_pid": self.combo_app.currentData(), + "speaker_id": self.combo_hardware.currentData(), "device_id": self.combo_mic.currentData(), "output_folder": self.lbl_folder.text(), "format": self.combo_fmt.currentText(), @@ -284,6 +372,9 @@ def __init__(self, app): self.tray_icon.showMessage("Ready", "Left-click to toggle recording.", QSystemTrayIcon.MessageIcon.Information, 2000) self.register_hotkeys() + + # Open settings on startup + self.open_settings() def generate_icons(self): if not os.path.exists(self.icon_idle_path): @@ -374,6 +465,15 @@ def start_recording(self, mode="mic"): settings = self.settings_window.get_settings() target_id = settings['device_id'] + is_app = settings.get('source') == "Specific App" + target_pid = settings.get('target_pid') if is_app else None + speaker_id = settings.get('speaker_id') if not is_app else None + + if is_app: + if not target_pid or not psutil.pid_exists(target_pid): + self.tray_icon.showMessage("Error", "Application not found. Please refresh the list.", QSystemTrayIcon.MessageIcon.Warning, 3000) + return + def finish_callback(path, error): self.signals.recording_finished.emit(path if path else "", error if error else "") @@ -383,6 +483,8 @@ def finish_callback(path, error): output_folder=settings['output_folder'], output_format=settings['format'], normalize=settings['normalize'], + target_pid=target_pid, + speaker_id=speaker_id, on_finish_callback=finish_callback ) self.recorder.start() diff --git a/main.py b/main.py index 1b8f8a1..7717aa3 100644 --- a/main.py +++ b/main.py @@ -1,11 +1,15 @@ import sys +# Ensure COM is initialized as Single-Threaded Apartment (STA) +# BEFORE any PyQt or win32 modules attempt to initialize it as MTA. +sys.coinit_flags = 2 + from PyQt6.QtWidgets import QApplication -from gui import TrayApplication def main(): app = QApplication(sys.argv) app.setQuitOnLastWindowClosed(False) + from gui import TrayApplication tray = TrayApplication(app) sys.exit(app.exec()) diff --git a/process_utils.py b/process_utils.py new file mode 100644 index 0000000..28929e6 --- /dev/null +++ b/process_utils.py @@ -0,0 +1,82 @@ +import psutil + +def get_root_pid(pid): + """ + Traverses the process tree upwards to find the highest-level parent process + with the same executable name. This is critical for browsers (Firefox/Chrome) + to ensure the WASAPI loopback captures the entire process tree including + sandboxed audio utility processes. + """ + try: + p = psutil.Process(pid) + target_name = p.name().lower() + while True: + parent = p.parent() + if parent is None: + break + if parent.name().lower() != target_name: + break + p = parent + return p.pid + except Exception: + return pid + +def get_active_applications(): + """ + Returns a list of dictionaries with 'pid', 'name', and 'title' of all + currently running user-facing applications. + """ + try: + import win32gui + import win32process + except ImportError: + win32gui = None + win32process = None + + if win32gui is None: + # Fallback if pywin32 is not installed, though we lose window title / visibility checks + apps = [] + for p in psutil.process_iter(['pid', 'name']): + try: + if p.info['name'] not in ('svchost.exe', 'System Idle Process', 'System'): + apps.append({ + 'pid': p.info['pid'], + 'name': p.info['name'], + 'title': p.info['name'] + }) + except (psutil.NoSuchProcess, psutil.AccessDenied): + pass + return apps + + # Using pywin32 for accurate visible window detection + visible_hwnds = [] + + def enum_windows_callback(hwnd, extra): + if win32gui.IsWindowVisible(hwnd) and win32gui.GetWindowText(hwnd): + visible_hwnds.append(hwnd) + + win32gui.EnumWindows(enum_windows_callback, None) + + # Map HWND to PID + pid_to_hwnd = {} + for hwnd in visible_hwnds: + _, pid = win32process.GetWindowThreadProcessId(hwnd) + # Store the first window title found for this PID + if pid not in pid_to_hwnd: + pid_to_hwnd[pid] = hwnd + + apps = [] + for pid, hwnd in pid_to_hwnd.items(): + try: + p = psutil.Process(pid) + name = p.name() + title = win32gui.GetWindowText(hwnd) + apps.append({ + 'pid': pid, + 'name': name, + 'title': title + }) + except (psutil.NoSuchProcess, psutil.AccessDenied): + pass + + return apps diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9a1b13a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +PyQt6 +soundcard +soundfile +numpy +lameenc +keyboard +proc-tap +psutil +pytest diff --git a/specs/001-per-app-recording/checklists/requirements.md b/specs/001-per-app-recording/checklists/requirements.md new file mode 100644 index 0000000..655cfea --- /dev/null +++ b/specs/001-per-app-recording/checklists/requirements.md @@ -0,0 +1,35 @@ +# Specification Quality Checklist: Per-Application Audio Recording + +**Purpose**: Validate specification completeness and quality before proceeding to planning +**Created**: 2026-05-16 +**Feature**: [spec.md](./spec.md) + +## Content Quality + +- [x] No implementation details (languages, frameworks, APIs) +- [x] Focused on user value and business needs +- [x] Written for non-technical stakeholders +- [x] All mandatory sections completed + +## Requirement Completeness + +- [x] No [NEEDS CLARIFICATION] markers remain +- [x] Requirements are testable and unambiguous +- [x] Success criteria are measurable +- [x] Success criteria are technology-agnostic (no implementation details) +- [x] All acceptance scenarios are defined +- [x] Edge cases are identified +- [x] Scope is clearly bounded +- [x] Dependencies and assumptions identified + +## Feature Readiness + +- [x] All functional requirements have clear acceptance criteria +- [x] User scenarios cover primary flows +- [x] Feature meets measurable outcomes defined in Success Criteria +- [x] No implementation details leak into specification + +## Notes + +- All checklist items pass on the first iteration. The specification successfully balances providing clear expectations and acceptance boundaries without specifying exact API methods or logic implementation. +- No clarifications were needed as the prompt and general scope provided a clear pathway to a MVP per-application recording feature. diff --git a/specs/001-per-app-recording/spec.md b/specs/001-per-app-recording/spec.md new file mode 100644 index 0000000..234387b --- /dev/null +++ b/specs/001-per-app-recording/spec.md @@ -0,0 +1,89 @@ +# Feature Specification: Per-Application Audio Recording + +**Feature Branch**: `001-per-app-recording` +**Created**: 2026-05-16 +**Status**: Draft +**Input**: User description: "Change audio recording to target specific applications (per-app loopback) rather than system-wide audio endpoints." + +## Clarifications + +### Session 2026-05-16 +- Q: How should the choice between "System-Wide" and "Per-Application" be presented in the user interface to keep it clean and intuitive? → A: Add a new "Source" toggle (System-Wide vs. Specific App). When "Specific App" is selected, the app dropdown appears. The existing modes (Mic, Audio, Both) stay the same but now apply to the selected Source. +- Q: How should the system handle attempting to start a recording when the targeted application is no longer running? → A: Show an error prompt. When "Record" is clicked, check if the PID is still alive. If not, show an error ("Application not found") and do not start recording. + +### Session 2026-05-16 (UI Refinement) +- Q: How should the UI be structured for selecting the output source and input source? → A: The Settings UI should open automatically on startup. The "Capture Target" (Output Source) should be the first section, allowing a choice between "Hardware Output Device" (with a dropdown of physical speakers) or "Specific Application". The "Input Device" (Microphone) should follow below it. + +## User Scenarios & Testing *(mandatory)* + +### User Story 1 - Record Specific Application Communications (Two-Way) (Priority: P1) + +As a user, I want to select a specific application (like Discord or a game) and record BOTH what the application is playing (its output) and what it is receiving (its microphone input), so that I can capture a complete two-way conversation or session without unwanted background system noises. + +**Why this priority**: The core goal is to isolate a specific app's entire audio context (both sending and receiving) from the rest of the system's global sound devices. + +**Independent Test**: Can be tested by selecting a voice chat app, having a conversation, playing background music in another app, and verifying the recording contains *only* the chat app's incoming voices and your outgoing voice, with no background music. + +**Acceptance Scenarios**: + +1. **Given** the user opens the QuickAudioRecorder settings, **When** they look at the input options, **Then** they see a list of currently running applications. +2. **Given** the user has selected a specific application, **When** they start recording in "Application Both" mode, **Then** the app captures both the audio the application outputs and the audio the application receives. + +--- + +### User Story 2 - Refresh Running Applications List (Priority: P2) + +As a user, I want to be able to refresh the list of running applications so that I can select an app I just opened without restarting QuickAudioRecorder. + +**Why this priority**: Users frequently open the app they want to record *after* starting their recording tools. + +**Independent Test**: Can be tested by opening the settings, noting the list, opening a new app, clicking refresh, and verifying the new app appears. + +**Acceptance Scenarios**: + +1. **Given** the settings window is open, **When** the user clicks a "Refresh" button next to the application list, **Then** the list updates with the latest running applications. + +--- + +### Edge Cases + +- What happens when the selected application is closed while recording is active? + - System handles this by safely stopping the recording, saving the file up to that point, and displaying a notification to the user declaring that the application has been closed and the recording was saved. +- What happens if the selected application is closed *before* the recording starts? + - System handles this by validating the target Process ID when the "Record" button is clicked. If invalid, the system halts the recording attempt and displays an error message prompting the user to refresh the list. +- What happens if the selected application spawns child processes for audio (e.g., browsers)? + - System handles this by capturing audio from the main process tree (if supported by the underlying API). +- What happens if the user selects an application that doesn't produce audio? + - System records silence. + +## Requirements *(mandatory)* + +### Functional Requirements + +- **FR-001**: System MUST provide a UI element to list currently active applications that the user can select from. +- **FR-001a**: System MUST present a "Capture Target" choice (Hardware Output Device vs. Specific App). The hardware output dropdown MUST populate with available system speakers. The application list dropdown MUST only be visible/enabled when "Specific App" is selected. +- **FR-002**: System MUST filter the application list to show meaningful user-facing applications (e.g., processes with visible windows) to avoid cluttering the list with background services. +- **FR-003**: System MUST provide a way to refresh the application list. +- **FR-004**: System MUST capture the audio output (what the app plays) exclusively from the Process ID (PID) of the selected application when the per-app mode is active. +- **FR-004a**: System MUST retain the existing system-wide audio capture methods as selectable options, extending the application's functionality rather than replacing the current feature set. +- **FR-005**: System MUST capture the audio input (what the app hears/records) associated with the selected application. +- **FR-006**: If the operating system restricts isolating microphone inputs per-application, the System MUST fallback to capturing the physical microphone device selected by the user, while still isolating the application's audio output. +- **FR-007**: System MUST mix the isolated application output and the application input synchronously into a single output file. +- **FR-008**: System MUST safely terminate and save the recording if the target application's process terminates unexpectedly, and clearly notify the user that the recording was halted due to the application closing. +- **FR-009**: System MUST maintain the existing ability to save the output as an MP3 and copy the path to the clipboard. +- **FR-010**: System MUST open the Settings control window immediately on startup, which hides to the system tray on close. + +### Key Entities + +- **ApplicationTarget**: Represents a selectable running process, containing attributes like Process Name, Window Title, and Process ID (PID). +- **AudioStream**: The per-process audio capture stream that yields PCM chunks. + +## Success Criteria *(mandatory)* + +### Measurable Outcomes + +- **SC-001**: User can successfully record audio from a target application while simultaneously playing audio from a different application, and the resulting file contains *only* the target audio (100% isolation). +- **SC-002**: User can record their microphone and a target application simultaneously without noticeable audio drift or desynchronization. +- **SC-003**: The application list populates in under 1 second. +- **SC-004**: No regressions in CPU usage compared to the system-wide recording method (CPU usage remains below 5% on average). +- **SC-005**: The system successfully handles the target application closing during recording 100% of the time without corrupting the output file or crashing the recorder. diff --git a/specs/001-per-app-recording/tasks.md b/specs/001-per-app-recording/tasks.md new file mode 100644 index 0000000..890861c --- /dev/null +++ b/specs/001-per-app-recording/tasks.md @@ -0,0 +1,45 @@ +# Execution Plan: Per-Application Audio Recording + +**Feature Branch**: `001-per-app-recording` +**Created**: 2026-05-16 + +## Implementation Strategy +This plan extends the current `QuickAudioRecorder` to support per-application audio recording using the `proc-tap` Python library. Following the user's preference for TDD (Test-Driven Development) and SDD, we will write tests for all non-GUI components *before* implementing the business logic. GUI elements are explicitly excluded from test coverage. + +### Phase 1: Setup & Dependencies +- [x] T001 Update project dependencies to include `proc-tap`, `psutil` (for process discovery), and `pytest` (for our testing framework). +- [x] T002 Update `README.md` to reflect the new dependencies and testing instructions. +- [x] T003 Initialize the `tests/` directory and any basic test configuration (e.g., `conftest.py`) if required. + +### Phase 2: Foundational Components (Process Discovery) +- [x] T004 [P] Create TDD tests in `tests/test_process_utils.py` for a future `process_utils.py` module, defining expected behaviors for listing active applications and filtering out background services. +- [x] T005 Implement `process_utils.py` to fetch a list of active applications with visible windows using `psutil`, ensuring it passes the tests established in T004. + +### Phase 3: User Story 1 - Record Specific Application Communications (Two-Way) +- [x] T006 [P] [US1] Create TDD tests in `tests/test_audio_recorder.py` that mock the `ProcTap` and `soundcard` APIs. The tests must define the expected behavior for capturing app output individually, and mixing the app output with hardware microphone input. +- [x] T007 [US1] Update `audio_recorder.py` to import `ProcTap` and modify the capture engine to instantiate `ProcTap(pid)` when the per-app mode is selected for output capture, passing the relevant tests. +- [x] T008 [US1] Update `audio_recorder.py` to capture the selected hardware microphone and synchronize/mix it with the `ProcTap` application output stream when two-way communication recording is active, passing the relevant tests. +- [x] T009 [US1] Update `gui.py` to add a new "Recording Mode" selection (System-wide vs Per-Application) to the Settings UI. *(No tests required)* +- [x] T010 [P] [US1] Update `gui.py` to include a dropdown list for "Target Application" that populates using `process_utils.py`. *(No tests required)* +- [x] T011 [P] [US1] Update `gui.py` to add a "Refresh" button next to the "Target Application" dropdown. *(No tests required)* +- [x] T012 [US1] Update the integration between `gui.py` and `audio_recorder.py` to pass the selected target PID and recording mode correctly. *(No tests required)* + +### Phase 4: Polish & Error Handling +- [x] T013 Create TDD tests in `tests/test_audio_recorder.py` defining the expected behavior when a target application's process terminates unexpectedly mid-recording. +- [x] T014 Implement exception handling in `audio_recorder.py` for cases where the target application terminates mid-recording, ensuring the existing recorded chunks are safely finalized and saved to pass T013 tests. +- [x] T015 Verify UI states in `gui.py` (e.g., disabling the application dropdown if "System-wide" mode is selected). *(No tests required)* + +## Dependencies +- Phase 2 (Process Discovery) must be completed before Phase 3 UI updates can be fully integrated. +- Test tasks (T004, T006, T013) MUST be executed immediately prior to their respective implementation tasks. +- T009-T011 (UI updates) can happen in parallel with T007-T008 (Core audio logic updates). + +## Parallel Execution Opportunities +- The UI enhancements in `gui.py` (T009 - T011) can be developed independently of the core audio engine logic and tests in `audio_recorder.py` (T006 - T008). +- Foundational process discovery tests (T004) can be built simultaneously with audio recording tests (T006). + +### Phase 5: UI Refinements & Output Selection +- [x] T016 [P] Create TDD tests in `tests/test_audio_recorder.py` defining expected behavior when a specific hardware speaker is selected for system-wide loopback recording instead of the default speaker. +- [x] T017 Update `audio_recorder.py` to accept a `speaker_id` parameter and utilize it when acquiring the loopback device via `soundcard`. Ensure tests pass. +- [x] T018 Update `gui.py` to restructure the Settings window: Add "Capture Target" (Hardware Output Device vs Specific App) at the top, followed by "Input Device" (Microphone). Populating the Hardware Output Device with a list of system speakers. *(No tests required)* +- [x] T019 Update `main.py` and `gui.py` so the Settings control window opens automatically on application startup, allowing minimization to the system tray. *(No tests required)* diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..11754ee --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +# Tests module diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..5871ed8 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1 @@ +import pytest diff --git a/tests/test_audio_recorder.py b/tests/test_audio_recorder.py new file mode 100644 index 0000000..9702785 --- /dev/null +++ b/tests/test_audio_recorder.py @@ -0,0 +1,133 @@ +import pytest +from unittest.mock import patch, MagicMock +import os +import threading + +import audio_recorder + +@patch('audio_recorder.sc') +@patch('audio_recorder.ProcTapRecorder') +def test_audio_recorder_per_app_output(mock_proctap_recorder, mock_sc, tmp_path): + # Test recording app output only + out_dir = str(tmp_path) + recorder = audio_recorder.AudioRecorder( + mic_id="default", + source_mode="loopback", + output_folder=out_dir, + output_format="wav", + target_pid=1234 + ) + + mock_instance = MagicMock() + mock_proctap_recorder.return_value = mock_instance + + # We mock stop_event.wait to just stop immediately + recorder.stop_event.wait = lambda: None + + recorder.run() + + # Assert ProcTapRecorder was used + mock_proctap_recorder.assert_called_once() + args, kwargs = mock_proctap_recorder.call_args + assert args[0] == 1234 # PID + assert len(recorder.recorders) == 1 + assert recorder.recorders[0] == mock_instance + +@patch('audio_recorder.sc') +@patch('audio_recorder.ProcTapRecorder') +@patch('audio_recorder.RawRecorder') +def test_audio_recorder_per_app_both(mock_raw_recorder, mock_proctap_recorder, mock_sc, tmp_path): + # Test recording app output + mic + out_dir = str(tmp_path) + recorder = audio_recorder.AudioRecorder( + mic_id="mic1", + source_mode="both", + output_folder=out_dir, + output_format="wav", + target_pid=5678 + ) + + mock_mic_instance = MagicMock() + mock_raw_recorder.return_value = mock_mic_instance + + mock_app_instance = MagicMock() + mock_proctap_recorder.return_value = mock_app_instance + + # Mock sc to return a mock mic + mock_sc.get_microphone.return_value = MagicMock() + + # We mock stop_event.wait to just stop immediately + recorder.stop_event.wait = lambda: None + + # We need to mock mix_audio so it doesn't try to read fake wavs + recorder._mix_audio = MagicMock() + + recorder.run() + + # Both recorders should be initialized + mock_proctap_recorder.assert_called_once() + mock_raw_recorder.assert_called_once() + + assert args_pid_check(mock_proctap_recorder, 5678) + +def args_pid_check(mock_call, expected_pid): + args, kwargs = mock_call.call_args + return args[0] == expected_pid + +@patch('audio_recorder.sc') +@patch('audio_recorder.ProcTapRecorder') +def test_audio_recorder_app_crash(mock_proctap_recorder, mock_sc, tmp_path): + # Test that AudioRecorder finishes gracefully if ProcTapRecorder dies + out_dir = str(tmp_path) + recorder = audio_recorder.AudioRecorder( + mic_id="default", + source_mode="loopback", + output_folder=out_dir, + output_format="wav", + target_pid=9999 + ) + + mock_instance = MagicMock() + # Simulate the thread dying immediately with an error + mock_instance.is_alive.return_value = False + mock_instance.error = "Process terminated" + mock_proctap_recorder.return_value = mock_instance + + # Run should detect the dead thread and break out of its wait loop + recorder.run() + + # Ensure it still tried to mix/finalize what it had + assert recorder.final_filepath is not None + # Ensure error message reflects the crash + assert "terminated" in str(recorder.error_message).lower() or "closed" in str(recorder.error_message).lower() + +@patch('audio_recorder.sc') +def test_audio_recorder_custom_speaker(mock_sc, tmp_path): + # Test that providing a speaker_id uses that specific speaker for loopback instead of default + out_dir = str(tmp_path) + + mock_speaker1 = MagicMock() + mock_speaker1.id = "speaker1_id" + mock_speaker2 = MagicMock() + mock_speaker2.id = "speaker2_id" + mock_speaker2.name = "Speaker 2 Name" + + mock_loopback_mic = MagicMock() + mock_loopback_mic.name = "Speaker 2 Name Loopback" + mock_loopback_mic.isloopback = True + + mock_sc.all_speakers.return_value = [mock_speaker1, mock_speaker2] + mock_sc.default_speaker.return_value = mock_speaker1 + mock_sc.all_microphones.return_value = [mock_loopback_mic] + + recorder = audio_recorder.AudioRecorder( + mic_id="default", + source_mode="loopback", + output_folder=out_dir, + output_format="wav", + speaker_id="speaker2_id" + ) + + dev = recorder._get_device(is_loopback=True) + assert dev == mock_loopback_mic + mock_sc.default_speaker.assert_not_called() diff --git a/tests/test_process_utils.py b/tests/test_process_utils.py new file mode 100644 index 0000000..28e42f4 --- /dev/null +++ b/tests/test_process_utils.py @@ -0,0 +1,68 @@ +import pytest +from unittest.mock import patch, MagicMock + +import process_utils + +@patch('psutil.process_iter') +@patch('process_utils.win32gui') +def test_get_active_applications(mock_win32gui, mock_process_iter): + # Mock some processes + p1 = MagicMock() + p1.info = {'pid': 1000, 'name': 'explorer.exe'} + + p2 = MagicMock() + p2.info = {'pid': 2000, 'name': 'svchost.exe'} + + p3 = MagicMock() + p3.info = {'pid': 3000, 'name': 'Discord.exe'} + + mock_process_iter.return_value = [p1, p2, p3] + + # Mock window visibility checks + # Let's say win32gui.IsWindowVisible returns True for explorer and Discord, False for svchost + # And we also mock EnumWindows to return the HWNDs, which map to PIDs via GetWindowThreadProcessId + + def mock_enum_windows(callback, extra): + # Pass HWNDs to callback + callback(1, None) # Explorer + callback(2, None) # Discord + + mock_win32gui.EnumWindows = mock_enum_windows + mock_win32gui.IsWindowVisible.side_effect = lambda hwnd: True + mock_win32gui.GetWindowText.side_effect = lambda hwnd: "Window" if hwnd == 1 else "Discord - General" + + # We need to mock win32process as well if process_utils uses it + with patch('process_utils.win32process') as mock_win32process: + # HWND 1 -> PID 1000, HWND 2 -> PID 3000 + def mock_get_thread_process_id(hwnd): + if hwnd == 1: + return (0, 1000) + elif hwnd == 2: + return (0, 3000) + return (0, 0) + + mock_win32process.GetWindowThreadProcessId = mock_get_thread_process_id + + with patch('process_utils.psutil.Process') as mock_psutil_process: + def mock_process(pid): + p = MagicMock() + if pid == 1000: + p.name.return_value = 'explorer.exe' + elif pid == 3000: + p.name.return_value = 'Discord.exe' + else: + raise psutil.NoSuchProcess(pid) + return p + mock_psutil_process.side_effect = mock_process + + apps = process_utils.get_active_applications() + + assert len(apps) == 2 + pids = [app['pid'] for app in apps] + assert 1000 in pids + assert 3000 in pids + assert 2000 not in pids # svchost has no visible window + + discord_app = next(app for app in apps if app['pid'] == 3000) + assert discord_app['name'] == 'Discord.exe' + assert discord_app['title'] == 'Discord - General' From 89a5243d86cb2c48ef6e8959a9516411e0ca80ce Mon Sep 17 00:00:00 2001 From: RangeyRover Date: Sat, 16 May 2026 18:49:10 +0930 Subject: [PATCH 2/2] docs: update README with per-app features and test info --- README.md | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 56163a5..db93469 100644 --- a/README.md +++ b/README.md @@ -13,10 +13,13 @@ It sits quietly in your system tray and is always ready with a single click or g image +- **Capture Targets:** + - 🖥️ **Hardware Output Device:** Record system-wide audio directly from your chosen physical speakers. + - 🎯 **Specific Application:** Isolate and record audio *only* from a selected application (e.g., Discord or Firefox) without capturing background system noises, powered by WASAPI Process Loopback. - **Modes:** - 🎤 **Microphone:** Record your voice. - - 🔊 **System Audio:** Record what you hear (Loopback). - - 🎙️+🔊 **Both:** Record both tracks simultaneously (mixed). + - 🔊 **Loopback:** Record what you hear (or what the app is playing). + - 🎙️+🔊 **Both:** Record both tracks simultaneously (mixed into one file). - **Post-Processing:** - **Auto-Normalize:** Automatically adjusts volume to optimal levels after recording. - **Clipboard Integration:** Automatically copies the file (or file path) to your clipboard. @@ -34,17 +37,25 @@ It sits quietly in your system tray and is always ready with a single click or g ## Usage -1. **Right-click** the tray icon to open **Settings**. -2. Select your **Microphone** and **Output Folder**. -3. Set your **Hotkeys** (optional). -4. **Left-click** the tray icon or use a hotkey to start recording. -5. Click again to stop. The file is saved and ready to use! +1. The **Settings** window opens immediately upon launch. +2. Choose your **Capture Target** (Hardware Output or Specific Application). +3. Select your **Microphone** and **Output Folder**. +4. Set your **Hotkeys** and **Tray Icon Behavior** (optional). +5. Close the settings window to minimize to the tray. +6. **Left-click** the tray icon or use your configured hotkey to start recording. Click again to stop. ## Development ### Requirements - Python 3.12+ -- `pip install -r requirements.txt` +- Install dependencies: `pip install -r requirements.txt` +- Core libraries include `soundcard`, `soundfile`, `proctap` (for WASAPI loopback isolation), `psutil`, and `PyQt6`. + +### Testing +This project follows Test-Driven Development (TDD) for core audio logic. To run the test suite: +```bash +pytest tests/ +``` ### Build from Source To create the standalone executable: