From 96f9c0709931183305af82763ee9abe5b52dcd82 Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Thu, 23 Apr 2026 02:30:17 +0530 Subject: [PATCH 01/11] feat(screen-capture): native source enumerator + thumbnail capture (#713) New `screen_capture` module exposes two Tauri commands that back the in-page getDisplayMedia picker shim: - `screen_share_list_sources` enumerates real screens (CGGetActiveDisplayList) and on-screen windows (CGWindowListCopyWindowInfo), tagging each with a Chromium-compatible DesktopMediaID string (`screen::0` / `window::0`). No thumbnails are captured during enumeration so the picker opens instantly. - `screen_share_thumbnail` captures a single source's live thumbnail on demand via CGDisplayCreateImage / CGWindowListCreateImage, encodes it to PNG through ImageIO, and returns base64. The picker shim fires these in parallel so thumbnails fade in as they arrive. Uses raw CoreGraphics + CoreFoundation + ImageIO FFI so we don't pull in the full `core-graphics` crate (~50 extra transitive deps). Adds the existing-workspace `base64` crate for PNG serialization. Co-Authored-By: Claude Opus 4.7 --- app/src-tauri/Cargo.lock | 1 + app/src-tauri/Cargo.toml | 1 + app/src-tauri/src/screen_capture/mod.rs | 458 ++++++++++++++++++++++++ 3 files changed, 460 insertions(+) create mode 100644 app/src-tauri/src/screen_capture/mod.rs diff --git a/app/src-tauri/Cargo.lock b/app/src-tauri/Cargo.lock index f19858107..6212aa696 100644 --- a/app/src-tauri/Cargo.lock +++ b/app/src-tauri/Cargo.lock @@ -8,6 +8,7 @@ version = "0.52.28" dependencies = [ "anyhow", "async-trait", + "base64 0.22.1", "cef", "chrono", "env_logger", diff --git a/app/src-tauri/Cargo.toml b/app/src-tauri/Cargo.toml index 68f226737..b8903e3b4 100644 --- a/app/src-tauri/Cargo.toml +++ b/app/src-tauri/Cargo.toml @@ -60,6 +60,7 @@ semver = "1" log = "0.4" env_logger = "0.11" +base64 = "0.22" # Used by the imessage_scanner module. anyhow = "1.0" diff --git a/app/src-tauri/src/screen_capture/mod.rs b/app/src-tauri/src/screen_capture/mod.rs new file mode 100644 index 000000000..301fb2346 --- /dev/null +++ b/app/src-tauri/src/screen_capture/mod.rs @@ -0,0 +1,458 @@ +//! Screen-capture source enumeration + picker orchestration for #713. +//! +//! Background (see issue #713 plan): embedded webviews (Meet, Discord, Zoom) +//! run under the CEF Alloy runtime, which does not link Chromium's built-in +//! `DesktopMediaPicker`. When the page calls `navigator.mediaDevices +//! .getDisplayMedia`, Chromium falls back to auto-selecting the primary +//! display — the user never sees a picker and their whole screen streams. +//! +//! Our `OnRequestMediaAccessPermission` callback in tauri-cef grants the +//! `DESKTOP_VIDEO_CAPTURE` bit unconditionally. Stage 0 PoC proved that when +//! the page calls `getUserMedia` with a hand-crafted +//! `{ mandatory: { chromeMediaSource: 'desktop', chromeMediaSourceId: '' } }` +//! constraint, Chromium honours the ID and opens a real capture device — +//! even though this constraint shape is normally extension-only. +//! +//! This module is the host-side half of that flow: +//! * `screen_share_list_sources` — enumerate real screens and windows, +//! tag each with a Chromium-compatible `DesktopMediaID` string +//! (`screen::0` / `window::0`). +//! * `screen_share_thumbnail` — capture a single source's live thumbnail +//! as a base64 PNG. Called lazily per-source from the picker shim so +//! the picker UI opens immediately and thumbnails fade in as they +//! arrive, rather than blocking enumeration for ~1-2s on a many- +//! window desktop. +//! +//! The picker UI itself is injected directly into each child webview's +//! DOM by `webview_accounts/runtime.js` (see the `showInPagePicker` flow +//! there), which is why we only need IPCs for enumeration + thumbnail +//! capture and no picker-modal orchestration RPCs on the host side. +//! +//! macOS-first: other platforms stub out until the flow is proven end- +//! to-end. + +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ScreenSource { + /// `screen::0` or `window::0`. Chromium's + /// `DesktopMediaID::Parse` reads these directly; we rely on its existing + /// parser rather than round-tripping through the extension API. + pub id: String, + /// `"screen"` or `"window"`. + pub kind: String, + /// Human label shown in the picker (app name + window title, or display + /// name). + pub name: String, + /// Optional application name (windows only). + #[serde(skip_serializing_if = "Option::is_none")] + pub app_name: Option, + /// PNG thumbnail base64-encoded. Empty when enumeration cheap-path is + /// used — UI renders a placeholder. + #[serde(default)] + pub thumbnail_png_base64: String, +} + +// --------------------------------------------------------------------------- +// Enumeration +// --------------------------------------------------------------------------- + +#[tauri::command] +pub fn screen_share_list_sources() -> Result, String> { + #[cfg(target_os = "macos")] + { + macos::enumerate().map_err(|e| format!("enumerate failed: {e}")) + } + #[cfg(not(target_os = "macos"))] + { + Err("screen-share picker not implemented for this platform yet".to_string()) + } +} + +#[derive(Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ThumbnailArgs { + pub id: String, +} + +/// Capture a single source's thumbnail as base64 PNG. Called per-source in +/// parallel from the picker shim so the picker UI opens immediately and +/// thumbnails fade in as they arrive, rather than blocking the whole +/// enumeration call for 1-2 seconds on a many-window desktop. +#[tauri::command] +pub fn screen_share_thumbnail(args: ThumbnailArgs) -> Result { + #[cfg(target_os = "macos")] + { + macos::thumbnail_for_id(&args.id).ok_or_else(|| "thumbnail unavailable".to_string()) + } + #[cfg(not(target_os = "macos"))] + { + let _ = args; + Err("thumbnails not implemented for this platform yet".to_string()) + } +} + +// --------------------------------------------------------------------------- +// macOS backend +// --------------------------------------------------------------------------- + +#[cfg(target_os = "macos")] +mod macos { + use super::ScreenSource; + + use core::ffi::c_void; + use std::ffi::CStr; + + // Minimal CoreGraphics FFI so we don't need an extra `core-graphics` + // crate — these few symbols cover display + window enumeration and + // avoid pulling in ~50 extra transitive deps. + + #[link(name = "CoreGraphics", kind = "framework")] + extern "C" { + fn CGGetActiveDisplayList( + max_displays: u32, + active_displays: *mut u32, + display_count: *mut u32, + ) -> i32; + fn CGMainDisplayID() -> u32; + fn CGDisplayPixelsWide(display: u32) -> usize; + fn CGDisplayPixelsHigh(display: u32) -> usize; + fn CGWindowListCopyWindowInfo( + option: u32, + relative_to_window: u32, + ) -> *const c_void; // CFArrayRef + fn CGDisplayCreateImage(display: u32) -> *const c_void; // CGImageRef + fn CGWindowListCreateImage( + screen_bounds: CGRect, + list_option: u32, + window_id: u32, + image_option: u32, + ) -> *const c_void; + fn CGImageRelease(image: *const c_void); + fn CGImageGetWidth(image: *const c_void) -> usize; + fn CGImageGetHeight(image: *const c_void) -> usize; + } + + #[link(name = "ImageIO", kind = "framework")] + extern "C" { + fn CGImageDestinationCreateWithData( + data: *const c_void, // CFMutableDataRef + uti: *const c_void, // CFStringRef + count: usize, + options: *const c_void, + ) -> *const c_void; + fn CGImageDestinationAddImage( + dest: *const c_void, + image: *const c_void, + properties: *const c_void, + ); + fn CGImageDestinationFinalize(dest: *const c_void) -> bool; + } + + #[link(name = "CoreFoundation", kind = "framework")] + extern "C" { + fn CFRelease(cf: *const c_void); + fn CFArrayGetCount(array: *const c_void) -> isize; + fn CFArrayGetValueAtIndex(array: *const c_void, idx: isize) -> *const c_void; + fn CFDictionaryGetValue(dict: *const c_void, key: *const c_void) -> *const c_void; + fn CFStringGetCStringPtr(s: *const c_void, encoding: u32) -> *const i8; + fn CFStringGetCString( + s: *const c_void, + buffer: *mut i8, + buffer_size: isize, + encoding: u32, + ) -> bool; + fn CFStringGetLength(s: *const c_void) -> isize; + fn CFNumberGetValue(number: *const c_void, the_type: i32, value_ptr: *mut c_void) -> bool; + fn CFStringCreateWithCString( + alloc: *const c_void, + c_str: *const i8, + encoding: u32, + ) -> *const c_void; + fn CFDataCreateMutable(alloc: *const c_void, capacity: isize) -> *const c_void; + fn CFDataGetLength(data: *const c_void) -> isize; + fn CFDataGetBytePtr(data: *const c_void) -> *const u8; + } + + #[repr(C)] + #[derive(Copy, Clone)] + struct CGPoint { x: f64, y: f64 } + #[repr(C)] + #[derive(Copy, Clone)] + struct CGSize { width: f64, height: f64 } + #[repr(C)] + #[derive(Copy, Clone)] + struct CGRect { origin: CGPoint, size: CGSize } + + const CG_RECT_NULL: CGRect = CGRect { + origin: CGPoint { x: f64::INFINITY, y: f64::INFINITY }, + size: CGSize { width: 0.0, height: 0.0 }, + }; + // kCGWindowListOptionIncludingWindow. + const K_CG_WINDOW_LIST_OPTION_INCLUDING_WINDOW: u32 = 1 << 3; + // kCGWindowImageBoundsIgnoreFraming | kCGWindowImageNominalResolution. + const K_CG_WINDOW_IMAGE_BOUNDS_IGNORE_FRAMING: u32 = 1 << 0; + const K_CG_WINDOW_IMAGE_NOMINAL_RESOLUTION: u32 = 1 << 4; + + const K_CFSTRING_ENCODING_UTF8: u32 = 0x08000100; + const K_CFNUMBER_SINT64_TYPE: i32 = 4; + // kCGWindowListOptionOnScreenOnly | kCGWindowListExcludeDesktopElements. + const K_CG_WINDOW_LIST_ON_SCREEN_ONLY: u32 = 1 << 0; + const K_CG_WINDOW_LIST_EXCLUDE_DESKTOP_ELEMENTS: u32 = 1 << 4; + + fn cfstr(s: &str) -> *const c_void { + let c = std::ffi::CString::new(s).expect("cfstr contains NUL"); + unsafe { CFStringCreateWithCString(std::ptr::null(), c.as_ptr(), K_CFSTRING_ENCODING_UTF8) } + } + + fn cfstring_to_string(cf: *const c_void) -> Option { + if cf.is_null() { + return None; + } + unsafe { + let ptr = CFStringGetCStringPtr(cf, K_CFSTRING_ENCODING_UTF8); + if !ptr.is_null() { + return CStr::from_ptr(ptr).to_str().ok().map(|s| s.to_string()); + } + let len = CFStringGetLength(cf); + // UTF-8 safety margin: 4 bytes per codepoint + NUL. + let cap = (len as usize) * 4 + 1; + let mut buf = vec![0i8; cap]; + if CFStringGetCString(cf, buf.as_mut_ptr(), cap as isize, K_CFSTRING_ENCODING_UTF8) { + let c = CStr::from_ptr(buf.as_ptr()); + c.to_str().ok().map(|s| s.to_string()) + } else { + None + } + } + } + + fn cfnumber_to_u64(num: *const c_void) -> Option { + if num.is_null() { + return None; + } + let mut v: i64 = 0; + unsafe { + if CFNumberGetValue(num, K_CFNUMBER_SINT64_TYPE, &mut v as *mut _ as *mut c_void) { + Some(v as u64) + } else { + None + } + } + } + + /// Parse a `screen::0` / `window::0` source ID and capture its + /// thumbnail as base64 PNG. Returns `None` if the ID is malformed or + /// the underlying capture API returns a null/zero-size image. + pub(super) fn thumbnail_for_id(id: &str) -> Option { + let mut parts = id.splitn(3, ':'); + let kind = parts.next()?; + let num = parts.next()?.parse::().ok()?; + let b64 = match kind { + "screen" => screen_thumbnail_b64(num), + "window" => window_thumbnail_b64(num), + _ => return None, + }; + if b64.is_empty() { + None + } else { + Some(b64) + } + } + + pub(super) fn enumerate() -> Result, String> { + let mut out = Vec::new(); + out.extend(enumerate_screens()); + out.extend(enumerate_windows()); + Ok(out) + } + + /// Encode a CGImageRef as PNG bytes via ImageIO. Caller releases the + /// image. Returns `None` on any ImageIO error so enumeration never + /// fails because a single thumbnail couldn't be captured. + fn cgimage_to_png_bytes(image: *const c_void) -> Option> { + if image.is_null() { + return None; + } + unsafe { + let uti_key = cfstr("public.png"); + let data = CFDataCreateMutable(std::ptr::null(), 0); + if data.is_null() { + CFRelease(uti_key); + return None; + } + let dest = CGImageDestinationCreateWithData(data, uti_key, 1, std::ptr::null()); + if dest.is_null() { + CFRelease(uti_key); + CFRelease(data); + return None; + } + CGImageDestinationAddImage(dest, image, std::ptr::null()); + let ok = CGImageDestinationFinalize(dest); + CFRelease(dest); + CFRelease(uti_key); + if !ok { + CFRelease(data); + return None; + } + let len = CFDataGetLength(data) as usize; + let ptr = CFDataGetBytePtr(data); + let bytes = std::slice::from_raw_parts(ptr, len).to_vec(); + CFRelease(data); + Some(bytes) + } + } + + fn screen_thumbnail_b64(display_id: u32) -> String { + use base64::{engine::general_purpose::STANDARD, Engine as _}; + unsafe { + let image = CGDisplayCreateImage(display_id); + if image.is_null() { + return String::new(); + } + let png = cgimage_to_png_bytes(image); + CGImageRelease(image); + png.map(|b| STANDARD.encode(b)).unwrap_or_default() + } + } + + fn window_thumbnail_b64(window_id: u32) -> String { + use base64::{engine::general_purpose::STANDARD, Engine as _}; + unsafe { + let opts = K_CG_WINDOW_IMAGE_BOUNDS_IGNORE_FRAMING + | K_CG_WINDOW_IMAGE_NOMINAL_RESOLUTION; + let image = CGWindowListCreateImage( + CG_RECT_NULL, + K_CG_WINDOW_LIST_OPTION_INCLUDING_WINDOW, + window_id, + opts, + ); + if image.is_null() { + return String::new(); + } + if CGImageGetWidth(image) == 0 || CGImageGetHeight(image) == 0 { + CGImageRelease(image); + return String::new(); + } + let png = cgimage_to_png_bytes(image); + CGImageRelease(image); + png.map(|b| STANDARD.encode(b)).unwrap_or_default() + } + } + + fn enumerate_screens() -> Vec { + let mut ids = [0u32; 32]; + let mut count: u32 = 0; + let err = unsafe { CGGetActiveDisplayList(ids.len() as u32, ids.as_mut_ptr(), &mut count) }; + if err != 0 { + log::warn!("[screen-share] CGGetActiveDisplayList error={err}"); + return Vec::new(); + } + let main = unsafe { CGMainDisplayID() }; + ids.iter() + .take(count as usize) + .enumerate() + .map(|(idx, &display_id)| { + let w = unsafe { CGDisplayPixelsWide(display_id) }; + let h = unsafe { CGDisplayPixelsHigh(display_id) }; + let is_main = display_id == main; + let name = if is_main { + format!("Main Screen ({}×{})", w, h) + } else { + format!("Display {} ({}×{})", idx + 1, w, h) + }; + ScreenSource { + id: format!("screen:{}:0", display_id), + kind: "screen".to_string(), + name, + app_name: None, + // Thumbnails are now lazy-fetched by the shim via + // `screen_share_thumbnail` in parallel with the + // picker render, so enumeration stays fast. + thumbnail_png_base64: String::new(), + } + }) + .collect() + } + + fn enumerate_windows() -> Vec { + let opts = K_CG_WINDOW_LIST_ON_SCREEN_ONLY | K_CG_WINDOW_LIST_EXCLUDE_DESKTOP_ELEMENTS; + let array = unsafe { CGWindowListCopyWindowInfo(opts, 0) }; + if array.is_null() { + log::warn!("[screen-share] CGWindowListCopyWindowInfo returned null"); + return Vec::new(); + } + let key_window_number = cfstr("kCGWindowNumber"); + let key_window_name = cfstr("kCGWindowName"); + let key_owner_name = cfstr("kCGWindowOwnerName"); + let key_bounds = cfstr("kCGWindowBounds"); + let key_layer = cfstr("kCGWindowLayer"); + + let count = unsafe { CFArrayGetCount(array) }; + let mut out: Vec = Vec::new(); + for i in 0..count { + let dict = unsafe { CFArrayGetValueAtIndex(array, i) }; + if dict.is_null() { + continue; + } + let number_cf = unsafe { CFDictionaryGetValue(dict, key_window_number) }; + let layer_cf = unsafe { CFDictionaryGetValue(dict, key_layer) }; + let window_id = match cfnumber_to_u64(number_cf) { + Some(v) => v, + None => continue, + }; + // Skip menu bar / dock / system chrome (layer != 0 → non-normal + // window). Normal app windows live at layer 0. + let layer = cfnumber_to_u64(layer_cf).unwrap_or(0); + if layer != 0 { + continue; + } + // Skip microscopic windows (tooltips, hidden panels). + if let Some(bounds_dict) = unsafe { + CFDictionaryGetValue(dict, key_bounds).as_ref() + } { + // kCGWindowBounds is actually a CFDictionary with Width/Height + // keys. Cheap filter: if the dict has a "Width" key and it's + // < 50, skip. Implementing full parse isn't worth it for the + // MVP; Chromium renders a scrollable picker anyway. + let _ = bounds_dict; + } + let title = unsafe { CFDictionaryGetValue(dict, key_window_name) }; + let owner = unsafe { CFDictionaryGetValue(dict, key_owner_name) }; + let title_str = cfstring_to_string(title).unwrap_or_default(); + let owner_str = cfstring_to_string(owner).unwrap_or_default(); + // Windows with no title are usually uninteresting (background + // helpers). Skip unless owner is informative and the window is + // the owner's only one — for MVP, simpler to just drop them. + if title_str.is_empty() { + continue; + } + let name = if owner_str.is_empty() { + title_str.clone() + } else { + format!("{} — {}", owner_str, title_str) + }; + out.push(ScreenSource { + id: format!("window:{}:0", window_id), + kind: "window".to_string(), + name, + app_name: if owner_str.is_empty() { + None + } else { + Some(owner_str) + }, + thumbnail_png_base64: String::new(), + }); + } + unsafe { + CFRelease(key_window_number); + CFRelease(key_window_name); + CFRelease(key_owner_name); + CFRelease(key_bounds); + CFRelease(key_layer); + CFRelease(array); + } + out + } +} From f15759406cbb61c9eed3b134c89733070623ac42 Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Thu, 23 Apr 2026 02:30:24 +0530 Subject: [PATCH 02/11] feat(app): register screen-share commands (#713) Wire `screen_share_list_sources` and `screen_share_thumbnail` into the Tauri invoke_handler so the getDisplayMedia shim injected into child webviews (Meet, Slack, Discord, Zoom) can reach them over IPC. Co-Authored-By: Claude Opus 4.7 --- app/src-tauri/src/lib.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/app/src-tauri/src/lib.rs b/app/src-tauri/src/lib.rs index 7b41bd0bb..7004fa56c 100644 --- a/app/src-tauri/src/lib.rs +++ b/app/src-tauri/src/lib.rs @@ -11,6 +11,8 @@ mod discord_scanner; mod imessage_scanner; mod notification_settings; #[cfg(feature = "cef")] +mod screen_capture; +#[cfg(feature = "cef")] mod slack_scanner; #[cfg(feature = "cef")] mod telegram_scanner; @@ -921,6 +923,10 @@ pub fn run() { webview_accounts::webview_set_focused_account, notification_settings::notification_settings_get, notification_settings::notification_settings_set, + #[cfg(feature = "cef")] + screen_capture::screen_share_list_sources, + #[cfg(feature = "cef")] + screen_capture::screen_share_thumbnail, activate_main_window, show_native_notification ]) From 8b508a5c6c899c2374643fe5f0fa8d9a8c9dd97d Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Thu, 23 Apr 2026 02:30:32 +0530 Subject: [PATCH 03/11] feat(permissions): allow screen-share commands from host and recipe webviews (#713) Add `screen_share_list_sources` and `screen_share_thumbnail` to both permission scopes: - `allow-core-process` so the host window can call them if a future surface ever needs enumeration host-side. - `allow-webview-recipe` so the per-provider recipe runtime (which now includes the in-page getDisplayMedia picker) can enumerate and fetch thumbnails directly from the third-party origin. Co-Authored-By: Claude Opus 4.7 --- app/src-tauri/permissions/allow-core-process.toml | 2 ++ app/src-tauri/permissions/allow-webview-recipe.toml | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/app/src-tauri/permissions/allow-core-process.toml b/app/src-tauri/permissions/allow-core-process.toml index c1348ec9a..0c9b84383 100644 --- a/app/src-tauri/permissions/allow-core-process.toml +++ b/app/src-tauri/permissions/allow-core-process.toml @@ -20,5 +20,7 @@ allow = [ "webview_account_show", "webview_recipe_event", "activate_main_window", + "screen_share_list_sources", + "screen_share_thumbnail", ] deny = [] diff --git a/app/src-tauri/permissions/allow-webview-recipe.toml b/app/src-tauri/permissions/allow-webview-recipe.toml index 97cb1767e..ced3a5e91 100644 --- a/app/src-tauri/permissions/allow-webview-recipe.toml +++ b/app/src-tauri/permissions/allow-webview-recipe.toml @@ -1,7 +1,7 @@ [[permission]] identifier = "allow-webview-recipe" -description = "Allow injected per-provider recipe code (running inside the third-party site's origin) to invoke the recipe ingest command back to Rust." +description = "Allow injected per-provider recipe code (running inside the third-party site's origin) to invoke the recipe ingest command back to Rust. Also includes screen_share_list_sources so the in-page getDisplayMedia shim (#713) can enumerate real screens and windows to build its picker UI inside the child webview's DOM." [permission.commands] -allow = ["webview_recipe_event"] +allow = ["webview_recipe_event", "screen_share_list_sources", "screen_share_thumbnail"] deny = [] From aa250c6a1279a08f3c0cff9eea5e8ca1cc091d00 Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Thu, 23 Apr 2026 02:30:55 +0530 Subject: [PATCH 04/11] feat(webview): in-page screen-share picker for getDisplayMedia (#713) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Child webviews (Meet, Slack Huddles, Discord, Zoom) run under CEF Alloy, which does not link Chromium's DesktopMediaPicker. Without an interceptor the permission handler auto-grants `DESKTOP_VIDEO_CAPTURE` and Chromium silently picks the primary display — the user never sees a picker and their whole screen streams. This shim, injected via `WebviewBuilder.initialization_script`: - Overrides `navigator.mediaDevices.getDisplayMedia` on the MediaDevices prototype (plain assignment is non-writable in modern Chromium, so we `Object.defineProperty` on the prototype). - Enumerates real sources via the new `screen_share_list_sources` Tauri command, then renders a picker overlay directly into the host page's DOM (no main-window React modal) so it works uniformly for every provider without per-provider host-side glue and without native-view stacking problems. Picker DOM is built imperatively to satisfy Meet's Trusted Types CSP (innerHTML would throw). - Lazy-fetches thumbnails in parallel through `screen_share_thumbnail` so the picker opens instantly and images fade in. - On Share, calls `getUserMedia` with a hand-crafted `chromeMediaSource: 'desktop' + chromeMediaSourceId` constraint. Our CEF permission callback grants `DESKTOP_VIDEO_CAPTURE` bits, so Chromium honours the ID and opens a real capture device. - Patches the returned track's `getSettings()` to expose `displaySurface: 'monitor' | 'window'` and `contentHint: 'detail'`, because Meet refuses streams that lack the display-media metadata real getDisplayMedia would populate. - Falls back to video-only if the page asked for audio but audio+video getUserMedia rejects (stock macOS has no loopback audio driver). - Throws `AbortError` on picker cancel so Meet silently dismisses instead of showing "Meet needs permission to screen share". - Shims `navigator.permissions.query` to report `display-capture` as `granted`; CEF Alloy's Permissions API defaults to `prompt`/`denied` and pages that consult it otherwise refuse to call getDisplayMedia at all. Co-Authored-By: Claude Opus 4.7 --- app/src-tauri/src/webview_accounts/runtime.js | 581 ++++++++++++++++++ 1 file changed, 581 insertions(+) diff --git a/app/src-tauri/src/webview_accounts/runtime.js b/app/src-tauri/src/webview_accounts/runtime.js index 754339f8a..6dee34795 100644 --- a/app/src-tauri/src/webview_accounts/runtime.js +++ b/app/src-tauri/src/webview_accounts/runtime.js @@ -95,4 +95,585 @@ window.__openhumanRecipe = api; send('log', { level: 'info', msg: '[recipe-runtime] ready provider=' + ctx.provider + ' accountId=' + ctx.accountId }); + + // --- #713 getDisplayMedia shim --- + // + // Background: embedded webviews run under CEF Alloy, which does not link + // Chromium's DesktopMediaPicker. Without an interceptor, `getDisplayMedia` + // gets auto-granted by our permission handler and Chromium silently picks + // the primary display (issue #713 AC2: "OS screen/window picker appears"). + // + // The picker UI is injected DIRECTLY into the child webview's own DOM + // rather than rendered as a React modal in the main OpenHuman window. + // Two reasons: + // (a) Works uniformly for every embedded provider — Meet, Slack + // Huddles, Discord, Zoom — without per-provider host-side glue. + // (b) Dodges the CEF native-view stacking problem: a React modal in + // the main window is always occluded by the child webview's + // NSView, forcing a hide/bounds dance that flickers the embedded + // site. An overlay inside the page is stacked in the page's own + // compositing context, so it sits above Meet/Slack UI naturally. + // + // Flow: + // 1. Shim calls Tauri `screen_share_list_sources` to enumerate real + // screens (`screen::0`) and windows + // (`window::0`) natively. + // 2. Shim builds a fixed-position picker overlay inside the page's + // document and awaits the user's choice. + // 3. On Share, shim calls `getUserMedia` with a hand-crafted + // `chromeMediaSource: 'desktop' + chromeMediaSourceId` constraint. + // Stage 0 PoC proved Chromium honours the ID directly because our + // CEF permission callback grants `DESKTOP_VIDEO_CAPTURE` bits. + // 4. On Cancel, shim throws `NotAllowedError` — same shape the real + // Chromium picker emits so page error handling is unchanged. + (function installGetDisplayMediaShim() { + if (!navigator.mediaDevices || typeof navigator.mediaDevices.getDisplayMedia !== 'function') { + // Never had getDisplayMedia to begin with (non-WebRTC webview); skip. + return; + } + if (navigator.mediaDevices.__ohGdmShimInstalled) return; + + // `navigator.mediaDevices.getDisplayMedia` is a WebIDL-defined prototype + // method on `MediaDevices.prototype`. Chromium marks it + // `writable: true, configurable: true` but *only* on the prototype — + // plain `navigator.mediaDevices.getDisplayMedia = ...` on the instance + // creates an own-property shadow that Chromium's IDL bindings bypass + // when the page actually invokes the method. We override on the + // prototype with `defineProperty` so the shim is what runs for every + // MediaDevices instance in this page (including any iframes that + // inherit from the same prototype). + const proto = Object.getPrototypeOf(navigator.mediaDevices); + const descriptor = Object.getOwnPropertyDescriptor(proto, 'getDisplayMedia'); + const origGetDisplayMedia = (descriptor && descriptor.value + ? descriptor.value + : navigator.mediaDevices.getDisplayMedia + ).bind(navigator.mediaDevices); + + const shim = async function (constraints) { + constraints = constraints || {}; + send('log', { + level: 'info', + msg: '[gdm-shim] getDisplayMedia intercepted audio=' + !!constraints.audio, + }); + + let sources; + try { + sources = await rawInvoke('screen_share_list_sources', {}); + } catch (e) { + send('log', { + level: 'error', + msg: '[gdm-shim] list_sources IPC failed: ' + (e && e.message ? e.message : String(e)), + }); + return origGetDisplayMedia(constraints); + } + if (!Array.isArray(sources) || sources.length === 0) { + send('log', { level: 'warn', msg: '[gdm-shim] no sources enumerated, falling back' }); + return origGetDisplayMedia(constraints); + } + + const pick = await showInPagePicker(sources); + if (!pick) { + send('log', { level: 'info', msg: '[gdm-shim] user cancelled picker' }); + // Meet (and other video-conf sites) treat `NotAllowedError` on + // getDisplayMedia as "the browser blocked us" and pop a + // "needs permission" modal. Real Chrome ALSO throws + // NotAllowedError on picker cancel, but Meet silently swallows + // it there — presumably via a separate Permissions API check + // that reports 'granted'. Since we can't easily signal that + // state in CEF, throw `AbortError` instead: it's the MDN-blessed + // "user interrupted a UI operation" error and most sites (Meet + // included) dismiss it silently. + throw new DOMException('User cancelled screen share picker', 'AbortError'); + } + send('log', { + level: 'info', + msg: '[gdm-shim] picked id=' + pick.id + ' kind=' + pick.kind, + }); + const videoMandatory = { + chromeMediaSource: 'desktop', + chromeMediaSourceId: pick.id, + maxFrameRate: 30, + }; + // System-audio capture via `chromeMediaSource: 'desktop'` needs a + // loopback driver on macOS (no stock API). If the page requested + // audio we try with audio first and fall back to video-only on + // rejection so Meet/Slack/etc don't see a generic "Can't share" + // error on every attempt. Chromium cleanly handles a missing audio + // track in the SDP. + const videoOnly = { video: { mandatory: videoMandatory }, audio: false }; + + let stream; + if (constraints.audio) { + const audioMandatory = { + chromeMediaSource: 'desktop', + chromeMediaSourceId: pick.id, + }; + try { + stream = await navigator.mediaDevices.getUserMedia({ + video: { mandatory: videoMandatory }, + audio: { mandatory: audioMandatory }, + }); + } catch (e) { + send('log', { + level: 'warn', + msg: + '[gdm-shim] audio+video getUserMedia rejected (' + + (e && e.name ? e.name : '?') + + '), retrying video-only', + }); + stream = await navigator.mediaDevices.getUserMedia(videoOnly); + } + } else { + stream = await navigator.mediaDevices.getUserMedia(videoOnly); + } + + // Stream returned by the legacy `chromeMediaSource: 'desktop'` + // getUserMedia path is a real capture stream but its tracks lack + // the display-media metadata the page expects from real + // getDisplayMedia. Google Meet (and others) inspect + // `track.getSettings().displaySurface` before they will route the + // track over WebRTC — if the field is missing they throw "Can't + // share your screen — Something went wrong". + // + // Patch each video track to expose the right displaySurface and + // a `contentHint` of `detail` (standard WebRTC screen-capture + // content hint). The underlying capture pipeline is unchanged; + // we're only fixing the introspectable metadata the page relies + // on to identify a display-media track. + const displaySurface = pick.kind === 'screen' ? 'monitor' : 'window'; + stream.getVideoTracks().forEach(function (track) { + try { track.contentHint = 'detail'; } catch (_) { /* ignore */ } + try { + const origGetSettings = track.getSettings.bind(track); + Object.defineProperty(track, 'getSettings', { + configurable: true, + writable: true, + value: function () { + const base = origGetSettings() || {}; + return Object.assign({}, base, { + displaySurface: displaySurface, + logicalSurface: true, + cursor: 'motion', + }); + }, + }); + } catch (e) { + send('log', { + level: 'warn', + msg: '[gdm-shim] patch getSettings failed: ' + (e && e.message ? e.message : e), + }); + } + }); + + return stream; + }; + + // In-page picker. Renders straight into the host page's so the + // overlay stacks above the site's own compositor (Meet/Slack/Discord + // UI) without any native-view gymnastics. All nodes are namespaced + // under `__ohsp_*` class/ID prefixes and attached to a closed shadow + // root where possible to avoid colliding with the host page's CSS. + function showInPagePicker(sources) { + return new Promise(function (resolveOuter) { + function host() { return (document.body || document.documentElement); } + if (!host()) { + // Host not ready — retry once the DOM is parsed. + document.addEventListener('DOMContentLoaded', function () { + resolveOuter(null); + }, { once: true }); + return; + } + + const root = document.createElement('div'); + root.setAttribute('data-openhuman-screen-share-picker', ''); + root.style.cssText = [ + 'all: initial', + 'position: fixed', + 'inset: 0', + 'z-index: 2147483647', + 'display: flex', + 'align-items: center', + 'justify-content: center', + 'background: rgba(0,0,0,0.55)', + 'backdrop-filter: blur(6px)', + '-webkit-backdrop-filter: blur(6px)', + 'font-family: -apple-system, BlinkMacSystemFont, "Inter", "Segoe UI", sans-serif', + ].join(';'); + + const shadow = root.attachShadow ? root.attachShadow({ mode: 'closed' }) : root; + + const styleTag = document.createElement('style'); + styleTag.textContent = [ + '* { box-sizing: border-box; margin: 0; padding: 0; font-family: inherit; }', + '.card { background: #fff; color: #1C1917; border-radius: 16px; width: min(640px, 92vw);', + ' max-height: 86vh; box-shadow: 0 24px 64px rgba(0,0,0,0.35); overflow: hidden;', + ' display: flex; flex-direction: column; }', + '.head { padding: 20px 24px; border-bottom: 1px solid #E7E5E4; display: flex;', + ' align-items: flex-start; justify-content: space-between; gap: 16px; }', + '.title { font-size: 17px; font-weight: 600; color: #1C1917; }', + '.origin { margin-top: 4px; font-size: 13px; color: #78716C; }', + '.closebtn { width: 32px; height: 32px; border: none; background: transparent;', + ' color: #78716C; cursor: pointer; border-radius: 8px; font-size: 18px;', + ' display: flex; align-items: center; justify-content: center; }', + '.closebtn:hover { background: #F5F5F4; color: #1C1917; }', + '.tabs { display: flex; gap: 4px; padding: 0 24px; border-bottom: 1px solid #E7E5E4; }', + '.tab { appearance: none; -webkit-appearance: none; background: transparent; border: 0;', + ' padding: 12px 16px; font-size: 14px; font-weight: 500; color: #78716C;', + ' cursor: pointer; border-bottom: 2px solid transparent; }', + '.tab.active { color: #4A83DD; border-bottom-color: #4A83DD; }', + '.body { padding: 20px 24px; overflow-y: auto; }', + '.grid { display: grid; grid-template-columns: repeat(2, minmax(0,1fr)); gap: 12px; }', + '.srcbtn { background: #FAFAF9; border: 2px solid #E7E5E4; border-radius: 10px;', + ' padding: 0; cursor: pointer; text-align: left; overflow: hidden;', + ' transition: border-color .15s, box-shadow .15s; }', + '.srcbtn:hover { border-color: #D4D4D1; }', + '.srcbtn.selected { border-color: #4A83DD;', + ' box-shadow: 0 0 0 3px rgba(74,131,221,0.18); }', + '.srcthumb { aspect-ratio: 16/10; background: #F5F5F4; display: flex;', + ' align-items: center; justify-content: center; color: #A8A29E;', + ' font-size: 32px; }', + '.srcname { padding: 8px 10px; font-size: 13px; color: #1C1917; font-weight: 500;', + ' white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }', + '.srcapp { padding: 0 10px 8px; font-size: 11px; color: #78716C;', + ' white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }', + '.empty { padding: 32px 0; text-align: center; color: #78716C; font-size: 13px; }', + '.foot { padding: 12px 16px; border-top: 1px solid #E7E5E4; display: flex;', + ' justify-content: flex-end; gap: 8px; }', + '.btn { appearance: none; -webkit-appearance: none; border: 0; border-radius: 10px;', + ' padding: 9px 16px; font-size: 14px; font-weight: 500; cursor: pointer; }', + '.btn-secondary { background: transparent; color: #1C1917; }', + '.btn-secondary:hover { background: #F5F5F4; }', + '.btn-primary { background: #4A83DD; color: #fff; }', + '.btn-primary:hover { background: #3D6DC4; }', + '.btn-primary:disabled { background: #D4D4D1; cursor: not-allowed; }', + ].join('\n'); + shadow.appendChild(styleTag); + + function hostnameOf(url) { + try { return new URL(url).hostname || url; } catch (e) { return url; } + } + + const origin = (typeof location !== 'undefined' && location.origin) || 'this site'; + let activeTab = sources.some(function (s) { return s.kind === 'screen'; }) + ? 'screen' + : 'window'; + let selectedId = null; + + // DOM is constructed imperatively (no innerHTML) because hosts + // like Google Meet ship strict Trusted Types CSP that rejects + // string-based HTML assignment with a TypeError. `createElement` + // and `appendChild` are policy-free and work everywhere. + const card = document.createElement('div'); + card.className = 'card'; + + function el(tag, attrs, text) { + const node = document.createElement(tag); + if (attrs) { + Object.keys(attrs).forEach(function (k) { + if (k === 'className') node.className = attrs[k]; + else node.setAttribute(k, attrs[k]); + }); + } + if (text != null) node.textContent = text; + return node; + } + + const head = el('div', { className: 'head' }); + const headLeft = el('div'); + headLeft.appendChild(el('div', { className: 'title' }, 'Choose what to share')); + const originEl = el( + 'div', + { className: 'origin' }, + hostnameOf(origin) + ' wants to share your screen.' + ); + headLeft.appendChild(originEl); + head.appendChild(headLeft); + const closeBtn = el( + 'button', + { className: 'closebtn', 'data-action': 'cancel', 'aria-label': 'Cancel' }, + '✕' + ); + head.appendChild(closeBtn); + card.appendChild(head); + + const tabs = el('div', { className: 'tabs' }); + const screenTab = el('button', { className: 'tab', 'data-tab': 'screen' }, 'Entire Screen'); + const windowTab = el('button', { className: 'tab', 'data-tab': 'window' }, 'Window'); + tabs.appendChild(screenTab); + tabs.appendChild(windowTab); + card.appendChild(tabs); + + const bodyEl = el('div', { className: 'body' }); + const gridEl = el('div', { className: 'grid' }); + bodyEl.appendChild(gridEl); + card.appendChild(bodyEl); + + const foot = el('div', { className: 'foot' }); + const cancelBtn = el( + 'button', + { className: 'btn btn-secondary', 'data-action': 'cancel' }, + 'Cancel' + ); + const shareBtn = el('button', { className: 'btn btn-primary' }, 'Share'); + shareBtn.disabled = true; + foot.appendChild(cancelBtn); + foot.appendChild(shareBtn); + card.appendChild(foot); + + shadow.appendChild(card); + + const tabButtons = [screenTab, windowTab]; + + function setTab(next) { + activeTab = next; + tabButtons.forEach(function (btn) { + btn.classList.toggle('active', btn.getAttribute('data-tab') === activeTab); + }); + render(); + } + + function render() { + while (gridEl.firstChild) gridEl.removeChild(gridEl.firstChild); + const filtered = sources.filter(function (s) { return s.kind === activeTab; }); + if (filtered.length === 0) { + const empty = document.createElement('div'); + empty.className = 'empty'; + empty.textContent = + 'No ' + (activeTab === 'screen' ? 'screens' : 'windows') + ' available.'; + gridEl.appendChild(empty); + shareBtn.disabled = true; + return; + } + filtered.forEach(function (src) { + const btn = document.createElement('button'); + btn.className = 'srcbtn' + (selectedId === src.id ? ' selected' : ''); + btn.setAttribute('data-source-id', src.id); + const thumb = document.createElement('div'); + thumb.className = 'srcthumb'; + if (src.thumbnailPngBase64) { + const img = document.createElement('img'); + img.src = 'data:image/png;base64,' + src.thumbnailPngBase64; + img.alt = ''; + img.style.cssText = + 'width: 100%; height: 100%; object-fit: contain; display: block;'; + thumb.appendChild(img); + } else { + // Placeholder glyph until the lazy-loaded thumbnail arrives. + thumb.textContent = activeTab === 'screen' ? '□' : '▣'; + rawInvoke('screen_share_thumbnail', { args: { id: src.id } }) + .then(function (b64) { + if (!b64 || typeof b64 !== 'string') return; + // Stash on the source so future re-renders keep the + // thumbnail without re-requesting it. + src.thumbnailPngBase64 = b64; + // The grid may have re-rendered by the time the IPC + // resolves (tab switch, etc). Look up the live node. + const liveBtn = gridEl.querySelector( + '[data-source-id="' + src.id.replace(/"/g, '\\"') + '"]' + ); + if (!liveBtn) return; + const liveThumb = liveBtn.querySelector('.srcthumb'); + if (!liveThumb) return; + while (liveThumb.firstChild) liveThumb.removeChild(liveThumb.firstChild); + const img = document.createElement('img'); + img.src = 'data:image/png;base64,' + b64; + img.alt = ''; + img.style.cssText = + 'width: 100%; height: 100%; object-fit: contain; display: block;'; + liveThumb.appendChild(img); + }) + .catch(function () { /* thumbnail failures degrade gracefully to the glyph */ }); + } + const name = document.createElement('div'); + name.className = 'srcname'; + name.textContent = src.name; + btn.appendChild(thumb); + btn.appendChild(name); + if (src.appName) { + const app = document.createElement('div'); + app.className = 'srcapp'; + app.textContent = src.appName; + btn.appendChild(app); + } + btn.addEventListener('click', function () { + selectedId = src.id; + render(); + }); + btn.addEventListener('dblclick', function () { + selectedId = src.id; + finish(sources.find(function (s) { return s.id === selectedId; }) || null); + }); + gridEl.appendChild(btn); + }); + if (!selectedId || !filtered.some(function (s) { return s.id === selectedId; })) { + selectedId = filtered[0].id; + gridEl.firstChild && gridEl.firstChild.classList.add('selected'); + } + shareBtn.disabled = !selectedId; + } + + tabButtons.forEach(function (btn) { + btn.addEventListener('click', function () { setTab(btn.getAttribute('data-tab')); }); + }); + + let settled = false; + function finish(pick) { + if (settled) return; + settled = true; + window.removeEventListener('keydown', onKey, true); + try { root.remove(); } catch (e) { /* ignore */ } + resolveOuter(pick); + } + + card.querySelectorAll('[data-action="cancel"]').forEach(function (btn) { + btn.addEventListener('click', function () { finish(null); }); + }); + shareBtn.addEventListener('click', function () { + const pick = sources.find(function (s) { return s.id === selectedId; }) || null; + finish(pick); + }); + // Clicks on the backdrop (outside the card) cancel. Clicks inside + // the card bubble up to root too, but we stop them there. + root.addEventListener('click', function (e) { + if (e.target === root || e.composedPath()[0] === root) finish(null); + }); + card.addEventListener('click', function (e) { e.stopPropagation(); }); + + function onKey(e) { + if (e.key === 'Escape') { + e.preventDefault(); + e.stopPropagation(); + finish(null); + } + } + window.addEventListener('keydown', onKey, true); + + setTab(activeTab); + host().appendChild(root); + }); + } + + let installed = false; + try { + Object.defineProperty(proto, 'getDisplayMedia', { + configurable: true, + writable: true, + value: shim, + }); + installed = true; + } catch (e) { + send('log', { + level: 'error', + msg: '[gdm-shim] defineProperty(proto) failed: ' + (e && e.message ? e.message : String(e)), + }); + } + if (!installed) { + try { + Object.defineProperty(navigator.mediaDevices, 'getDisplayMedia', { + configurable: true, + writable: true, + value: shim, + }); + installed = true; + } catch (e2) { + send('log', { + level: 'error', + msg: '[gdm-shim] defineProperty(instance) failed: ' + (e2 && e2.message ? e2.message : String(e2)), + }); + } + } + navigator.mediaDevices.__ohGdmShimInstalled = installed; + + // Some pages (Meet) also consult `navigator.permissions.query` and + // branch on the reported state for `display-capture` / + // `camera` / `microphone`. CEF Alloy's Permissions API does not + // reflect what our OnRequestMediaAccessPermission callback will + // grant dynamically, so it defaults to 'prompt' or even 'denied' + // for `display-capture`. A page that sees 'denied' will assume + // sharing is structurally blocked and refuse to call + // getDisplayMedia — or show the "needs permission" modal on cancel. + // We shadow the query for these names so the page sees 'granted' + // and relies on our shim for the actual user decision. + try { + if ( + navigator.permissions && + typeof navigator.permissions.query === 'function' && + !navigator.permissions.__ohPermissionsShimInstalled + ) { + const permProto = Object.getPrototypeOf(navigator.permissions); + const permDescriptor = Object.getOwnPropertyDescriptor(permProto, 'query'); + const origQuery = (permDescriptor && permDescriptor.value + ? permDescriptor.value + : navigator.permissions.query + ).bind(navigator.permissions); + const spoofed = { 'display-capture': 'granted' }; + const spoofedQuery = async function (descriptor) { + const n = descriptor && descriptor.name; + if (n && spoofed[n]) { + return { + state: spoofed[n], + status: spoofed[n], + name: n, + onchange: null, + addEventListener: function () {}, + removeEventListener: function () {}, + dispatchEvent: function () { return true; }, + }; + } + return origQuery(descriptor); + }; + try { + Object.defineProperty(permProto, 'query', { + configurable: true, + writable: true, + value: spoofedQuery, + }); + } catch (e) { + Object.defineProperty(navigator.permissions, 'query', { + configurable: true, + writable: true, + value: spoofedQuery, + }); + } + navigator.permissions.__ohPermissionsShimInstalled = true; + send('log', { level: 'info', msg: '[gdm-shim] permissions.query shim installed' }); + } + } catch (e) { + send('log', { + level: 'warn', + msg: '[gdm-shim] permissions.query shim failed: ' + (e && e.message ? e.message : e), + }); + } + + send('log', { + level: 'info', + msg: + '[gdm-shim] install=' + installed + + ' on ' + ((typeof location !== 'undefined' && location.origin) || '?'), + }); + })(); + + // --- #713 Stage 0 PoC helper (kept for manual smoke-tests) --- + // Usage: `await window.__ohScreenShareTest('screen::0')`. + window.__ohScreenShareTest = async function (sourceId) { + sourceId = sourceId || 'screen:1:0'; + try { + const stream = await navigator.mediaDevices.getUserMedia({ + video: { + mandatory: { + chromeMediaSource: 'desktop', + chromeMediaSourceId: sourceId, + maxFrameRate: 30, + }, + }, + audio: false, + }); + const track = stream.getVideoTracks()[0]; + const label = track && track.label; + stream.getTracks().forEach(function (t) { t.stop(); }); + return { ok: true, trackLabel: label }; + } catch (e) { + return { ok: false, error: (e && (e.name + ': ' + e.message)) || String(e) }; + } + }; })(); From 07b562717a84df30fa9909235a6a13c853d9bec4 Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Thu, 23 Apr 2026 02:36:16 +0530 Subject: [PATCH 05/11] style(screen-capture): rustfmt pass on new module (#713) Co-Authored-By: Claude Opus 4.7 --- app/src-tauri/src/screen_capture/mod.rs | 38 ++++++++++++++++--------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/app/src-tauri/src/screen_capture/mod.rs b/app/src-tauri/src/screen_capture/mod.rs index 301fb2346..a51ebb6bb 100644 --- a/app/src-tauri/src/screen_capture/mod.rs +++ b/app/src-tauri/src/screen_capture/mod.rs @@ -118,10 +118,7 @@ mod macos { fn CGMainDisplayID() -> u32; fn CGDisplayPixelsWide(display: u32) -> usize; fn CGDisplayPixelsHigh(display: u32) -> usize; - fn CGWindowListCopyWindowInfo( - option: u32, - relative_to_window: u32, - ) -> *const c_void; // CFArrayRef + fn CGWindowListCopyWindowInfo(option: u32, relative_to_window: u32) -> *const c_void; // CFArrayRef fn CGDisplayCreateImage(display: u32) -> *const c_void; // CGImageRef fn CGWindowListCreateImage( screen_bounds: CGRect, @@ -177,17 +174,32 @@ mod macos { #[repr(C)] #[derive(Copy, Clone)] - struct CGPoint { x: f64, y: f64 } + struct CGPoint { + x: f64, + y: f64, + } #[repr(C)] #[derive(Copy, Clone)] - struct CGSize { width: f64, height: f64 } + struct CGSize { + width: f64, + height: f64, + } #[repr(C)] #[derive(Copy, Clone)] - struct CGRect { origin: CGPoint, size: CGSize } + struct CGRect { + origin: CGPoint, + size: CGSize, + } const CG_RECT_NULL: CGRect = CGRect { - origin: CGPoint { x: f64::INFINITY, y: f64::INFINITY }, - size: CGSize { width: 0.0, height: 0.0 }, + origin: CGPoint { + x: f64::INFINITY, + y: f64::INFINITY, + }, + size: CGSize { + width: 0.0, + height: 0.0, + }, }; // kCGWindowListOptionIncludingWindow. const K_CG_WINDOW_LIST_OPTION_INCLUDING_WINDOW: u32 = 1 << 3; @@ -320,8 +332,8 @@ mod macos { fn window_thumbnail_b64(window_id: u32) -> String { use base64::{engine::general_purpose::STANDARD, Engine as _}; unsafe { - let opts = K_CG_WINDOW_IMAGE_BOUNDS_IGNORE_FRAMING - | K_CG_WINDOW_IMAGE_NOMINAL_RESOLUTION; + let opts = + K_CG_WINDOW_IMAGE_BOUNDS_IGNORE_FRAMING | K_CG_WINDOW_IMAGE_NOMINAL_RESOLUTION; let image = CGWindowListCreateImage( CG_RECT_NULL, K_CG_WINDOW_LIST_OPTION_INCLUDING_WINDOW, @@ -409,9 +421,7 @@ mod macos { continue; } // Skip microscopic windows (tooltips, hidden panels). - if let Some(bounds_dict) = unsafe { - CFDictionaryGetValue(dict, key_bounds).as_ref() - } { + if let Some(bounds_dict) = unsafe { CFDictionaryGetValue(dict, key_bounds).as_ref() } { // kCGWindowBounds is actually a CFDictionary with Width/Height // keys. Cheap filter: if the dict has a "Width" key and it's // < 50, skip. Implementing full parse isn't worth it for the From 0dfd8bc1e63f7eaafaa60f94db184b651598cc98 Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Thu, 23 Apr 2026 03:02:39 +0530 Subject: [PATCH 06/11] test(screen-capture): cover DesktopMediaID parser (#713) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract the `screen::0` / `window::0` ID parser out of the macOS-only thumbnail helper into a platform-agnostic `parse_source_id` and add unit tests for: - valid screen and window IDs - trailing segments beyond the canonical `:0` - unknown kind prefixes (tab, browser, empty) - missing or non-numeric numeric segment - u32 overflow and negative numbers - enumerator → parser round-trip shape The parser is where bad shim input would silently degrade to "thumbnail unavailable" in production, so pinning its contract keeps the shim/host boundary honest even though the surrounding macOS capture layer itself can only be exercised in live-app testing. Co-Authored-By: Claude Opus 4.7 --- app/src-tauri/src/screen_capture/mod.rs | 98 +++++++++++++++++++++++-- 1 file changed, 92 insertions(+), 6 deletions(-) diff --git a/app/src-tauri/src/screen_capture/mod.rs b/app/src-tauri/src/screen_capture/mod.rs index a51ebb6bb..419611221 100644 --- a/app/src-tauri/src/screen_capture/mod.rs +++ b/app/src-tauri/src/screen_capture/mod.rs @@ -76,6 +76,30 @@ pub struct ThumbnailArgs { pub id: String, } +/// What kind of source a parsed DesktopMediaID-format string describes. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum SourceKind { + Screen, + Window, +} + +/// Parse a `screen::0` / `window::0` source ID into +/// `(kind, numeric id)`. Returns `None` if the prefix is unknown, the +/// numeric segment doesn't fit in a `u32`, or the shape otherwise doesn't +/// match what the shim constructed from `screen_share_list_sources`. Pure +/// logic so it can be unit-tested without touching platform APIs; macOS +/// callers use it before dispatching to the capture backend. +pub(crate) fn parse_source_id(id: &str) -> Option<(SourceKind, u32)> { + let mut parts = id.splitn(3, ':'); + let kind = match parts.next()? { + "screen" => SourceKind::Screen, + "window" => SourceKind::Window, + _ => return None, + }; + let num = parts.next()?.parse::().ok()?; + Some((kind, num)) +} + /// Capture a single source's thumbnail as base64 PNG. Called per-source in /// parallel from the picker shim so the picker UI opens immediately and /// thumbnails fade in as they arrive, rather than blocking the whole @@ -258,13 +282,10 @@ mod macos { /// thumbnail as base64 PNG. Returns `None` if the ID is malformed or /// the underlying capture API returns a null/zero-size image. pub(super) fn thumbnail_for_id(id: &str) -> Option { - let mut parts = id.splitn(3, ':'); - let kind = parts.next()?; - let num = parts.next()?.parse::().ok()?; + let (kind, num) = super::parse_source_id(id)?; let b64 = match kind { - "screen" => screen_thumbnail_b64(num), - "window" => window_thumbnail_b64(num), - _ => return None, + super::SourceKind::Screen => screen_thumbnail_b64(num), + super::SourceKind::Window => window_thumbnail_b64(num), }; if b64.is_empty() { None @@ -466,3 +487,68 @@ mod macos { out } } + +#[cfg(test)] +mod tests { + use super::{parse_source_id, SourceKind}; + + #[test] + fn parses_screen_id() { + assert_eq!(parse_source_id("screen:1:0"), Some((SourceKind::Screen, 1))); + assert_eq!( + parse_source_id("screen:69734208:0"), + Some((SourceKind::Screen, 69734208)) + ); + } + + #[test] + fn parses_window_id() { + assert_eq!(parse_source_id("window:42:0"), Some((SourceKind::Window, 42))); + } + + #[test] + fn trailing_segment_ignored() { + // Chromium always emits `:0` as the third segment; shim is tolerant + // of whatever trails as long as prefix + numeric are intact. + assert_eq!( + parse_source_id("screen:1:extra:stuff"), + Some((SourceKind::Screen, 1)) + ); + } + + #[test] + fn rejects_unknown_prefix() { + assert_eq!(parse_source_id("tab:1:0"), None); + assert_eq!(parse_source_id("browser:1:0"), None); + assert_eq!(parse_source_id(""), None); + } + + #[test] + fn rejects_missing_numeric() { + assert_eq!(parse_source_id("screen::0"), None); + assert_eq!(parse_source_id("screen:"), None); + assert_eq!(parse_source_id("screen"), None); + } + + #[test] + fn rejects_non_numeric_id() { + assert_eq!(parse_source_id("screen:abc:0"), None); + assert_eq!(parse_source_id("window:0x1:0"), None); + } + + #[test] + fn rejects_overflowing_id() { + // u32::MAX + 1. + assert_eq!(parse_source_id("screen:4294967296:0"), None); + // Negative numbers are never valid CGDirectDisplayID / CGWindowID. + assert_eq!(parse_source_id("screen:-1:0"), None); + } + + #[test] + fn list_source_roundtrip() { + // The enumerator produces the exact shape `parse_source_id` expects, + // so a round trip must succeed for every kind it can emit. + assert!(parse_source_id("screen:1:0").is_some()); + assert!(parse_source_id("window:12345:0").is_some()); + } +} From 10bb4891ea6ad85162e62eff8e1a0d2217e90d21 Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Thu, 23 Apr 2026 03:16:04 +0530 Subject: [PATCH 07/11] style(screen-capture): rustfmt follow-up on test module (#713) Co-Authored-By: Claude Opus 4.7 --- app/src-tauri/src/screen_capture/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/app/src-tauri/src/screen_capture/mod.rs b/app/src-tauri/src/screen_capture/mod.rs index 419611221..d19d2805b 100644 --- a/app/src-tauri/src/screen_capture/mod.rs +++ b/app/src-tauri/src/screen_capture/mod.rs @@ -503,7 +503,10 @@ mod tests { #[test] fn parses_window_id() { - assert_eq!(parse_source_id("window:42:0"), Some((SourceKind::Window, 42))); + assert_eq!( + parse_source_id("window:42:0"), + Some((SourceKind::Window, 42)) + ); } #[test] From 4c9274fc06d9bc0a65a6b58394eddf4eaa4ffe7a Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Thu, 23 Apr 2026 03:16:05 +0530 Subject: [PATCH 08/11] fix(webview): address coderabbit review on picker shim (#713) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Picks up the 2 critical + 2 major findings on #809 that do not require a host/tauri-cef architecture change: - Delete `window.__ohScreenShareTest` entirely. The PoC helper was a page-global screen-capture primitive (default `screen:1:0`, no picker) that any third-party script in the embedded site could have invoked to capture the main display silently. We have the live in-page picker now; the helper has no production value. - Retry `showInPagePicker` after `DOMContentLoaded` when `document.body` isn't ready yet, instead of resolving `null` and letting the shim surface an `AbortError` for a picker that was never actually shown. - Deduplicate in-flight `screen_share_thumbnail` IPCs per source. The picker's `render()` runs again on every selection change and tab switch; without a cache, every un-cached source would re-issue its thumbnail capture on each pass. Cache the in-flight promise on the source object and reuse it, which also lets all pending render passes settle when the first one completes. The third critical finding (commands still reachable from the recipe origin via direct `invoke()`) needs a host-driven nonce/session flow and lands in a follow-up PR — see issue tracker. Refs coderabbit review on #809. Co-Authored-By: Claude Opus 4.7 --- app/src-tauri/src/webview_accounts/runtime.js | 103 +++++++++--------- 1 file changed, 51 insertions(+), 52 deletions(-) diff --git a/app/src-tauri/src/webview_accounts/runtime.js b/app/src-tauri/src/webview_accounts/runtime.js index 6dee34795..80629f9be 100644 --- a/app/src-tauri/src/webview_accounts/runtime.js +++ b/app/src-tauri/src/webview_accounts/runtime.js @@ -274,13 +274,19 @@ // under `__ohsp_*` class/ID prefixes and attached to a closed shadow // root where possible to avoid colliding with the host page's CSS. function showInPagePicker(sources) { - return new Promise(function (resolveOuter) { + return new Promise(function (resolveOuter, rejectOuter) { function host() { return (document.body || document.documentElement); } if (!host()) { - // Host not ready — retry once the DOM is parsed. - document.addEventListener('DOMContentLoaded', function () { - resolveOuter(null); - }, { once: true }); + // DOM hasn't parsed yet — wait for it and retry. Previously we + // resolved null here, which the shim turned into an AbortError + // even though no picker was ever shown (coderabbit #809). + document.addEventListener( + 'DOMContentLoaded', + function () { + showInPagePicker(sources).then(resolveOuter, rejectOuter); + }, + { once: true } + ); return; } @@ -460,29 +466,46 @@ } else { // Placeholder glyph until the lazy-loaded thumbnail arrives. thumb.textContent = activeTab === 'screen' ? '□' : '▣'; - rawInvoke('screen_share_thumbnail', { args: { id: src.id } }) - .then(function (b64) { - if (!b64 || typeof b64 !== 'string') return; - // Stash on the source so future re-renders keep the - // thumbnail without re-requesting it. - src.thumbnailPngBase64 = b64; - // The grid may have re-rendered by the time the IPC - // resolves (tab switch, etc). Look up the live node. - const liveBtn = gridEl.querySelector( - '[data-source-id="' + src.id.replace(/"/g, '\\"') + '"]' - ); - if (!liveBtn) return; - const liveThumb = liveBtn.querySelector('.srcthumb'); - if (!liveThumb) return; - while (liveThumb.firstChild) liveThumb.removeChild(liveThumb.firstChild); - const img = document.createElement('img'); - img.src = 'data:image/png;base64,' + b64; - img.alt = ''; - img.style.cssText = - 'width: 100%; height: 100%; object-fit: contain; display: block;'; - liveThumb.appendChild(img); - }) - .catch(function () { /* thumbnail failures degrade gracefully to the glyph */ }); + // Dedup in-flight thumbnail IPCs: render() re-runs on every + // selection change and tab switch, and without this cache + // each pass would re-issue screen_share_thumbnail for every + // source that hadn't yet returned (coderabbit #809). + function paintThumb(b64) { + if (!b64 || typeof b64 !== 'string') return; + const liveBtn = gridEl.querySelector( + '[data-source-id="' + src.id.replace(/"/g, '\\"') + '"]' + ); + if (!liveBtn) return; + const liveThumb = liveBtn.querySelector('.srcthumb'); + if (!liveThumb) return; + while (liveThumb.firstChild) liveThumb.removeChild(liveThumb.firstChild); + const img = document.createElement('img'); + img.src = 'data:image/png;base64,' + b64; + img.alt = ''; + img.style.cssText = + 'width: 100%; height: 100%; object-fit: contain; display: block;'; + liveThumb.appendChild(img); + } + if (src.__thumbnailPromise) { + src.__thumbnailPromise.then(paintThumb, function () {}); + } else { + src.__thumbnailPromise = rawInvoke('screen_share_thumbnail', { + args: { id: src.id }, + }).then( + function (b64) { + if (b64 && typeof b64 === 'string') { + // Stash on the source so future re-renders keep + // the thumbnail without re-requesting it. + src.thumbnailPngBase64 = b64; + } + paintThumb(b64); + return b64; + }, + function () { + /* thumbnail failures degrade gracefully to the glyph */ + } + ); + } } const name = document.createElement('div'); name.className = 'srcname'; @@ -652,28 +675,4 @@ ' on ' + ((typeof location !== 'undefined' && location.origin) || '?'), }); })(); - - // --- #713 Stage 0 PoC helper (kept for manual smoke-tests) --- - // Usage: `await window.__ohScreenShareTest('screen::0')`. - window.__ohScreenShareTest = async function (sourceId) { - sourceId = sourceId || 'screen:1:0'; - try { - const stream = await navigator.mediaDevices.getUserMedia({ - video: { - mandatory: { - chromeMediaSource: 'desktop', - chromeMediaSourceId: sourceId, - maxFrameRate: 30, - }, - }, - audio: false, - }); - const track = stream.getVideoTracks()[0]; - const label = track && track.label; - stream.getTracks().forEach(function (t) { t.stop(); }); - return { ok: true, trackLabel: label }; - } catch (e) { - return { ok: false, error: (e && (e.name + ': ' + e.message)) || String(e) }; - } - }; })(); From 07a5cfce15980084b920cdec572b55d917276271 Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Thu, 23 Apr 2026 16:34:14 +0530 Subject: [PATCH 09/11] feat(screen-capture): session-gated commands + review blockers (#812) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the flat `screen_share_list_sources` + `screen_share_thumbnail` command pair with a short-lived session token flow, closing the privacy surface graycyrus and CodeRabbit flagged as a blocker on #809: page JS (including third-party scripts loaded by the embedded site) could previously call either command directly and exfiltrate open window titles + live thumbnails with no picker interaction and no user gesture. Session gating (#812 Stage A): - `screen_share_begin_session` — opens a 30s session. Requires (1) the caller's webview label to start with `acct_` (rejects main/overlay windows), (2) a live `navigator.userActivation.isActive` (frontend check; rejects timers and async continuations), and (3) ≤10 begin attempts per account in any 60s window. Returns `{ token, sources }` in one round-trip so the picker opens with zero extra latency. - `screen_share_thumbnail` — now requires the token from a live session AND an `id` that was in that session's enumerated source list. A leaked token can only replay thumbnails for the IDs the shim already showed the user. - `screen_share_finalize_session` — explicit cleanup from the shim on Share or Cancel. No-op on unknown tokens. Not label-gated because the only effect is dropping a token the caller already possesses. A new begin_session for an account replaces any in-flight session for the same account, which also fixes graycyrus' refactor note #6 (concurrent getDisplayMedia calls producing stacked overlays) at the host-state level. Review blockers / majors (graycyrus, #809): - `CGWindowID` was flowing through `cfnumber_to_u64` (u64) and being formatted straight into the DesktopMediaID. `parse_source_id` accepts `u32` only, so a window with an ID above u32::MAX would have been silently dropped with a grey placeholder. Enumerator now does a checked `u32::try_from` and logs + skips on overflow. - `cfstr` was calling `CString::new().expect()`; panicking through the Apple FFI frames is UB. Returns `Option<*const c_void>` now, callers bail cleanly. - `screen_thumbnail_b64` / `window_thumbnail_b64` now guard against images smaller than 4×4 pixels. macOS 15 Sequoia returns a valid 1×1 transparent CGImage when Screen Recording TCC is not granted (instead of the pre-Sequoia null); the old zero-dimension check wasn't catching that. - All three commands emit entry/exit `[screen-share]` debug logs with grep-friendly token prefixes so the flow is traceable in dev builds. - Dropped the no-op `bounds_dict` block from window enumeration; it was a TODO placeholder that never gained a filter. - Added `= 8` / `= 16` inline hex comments to the CG window-list bitmask constants. Unit tests cover: parse_source_id (existing), token generation / URL-safe shape / uniqueness, session expiry purge, rate-limit window (11th call blocked, scoped per account). Command-level tests would need a `tauri::Webview` mock the stable API doesn't expose; the live run will exercise the gate wiring. Permission allowlists switch from listing `screen_share_list_sources` to the three session commands on both `allow-webview-recipe` and `allow-core-process`. `docs/src-tauri/02-commands.md` gains a Screen-share section describing each command. Co-Authored-By: Claude Opus 4.7 --- .../permissions/allow-core-process.toml | 3 +- .../permissions/allow-webview-recipe.toml | 9 +- app/src-tauri/src/lib.rs | 6 +- app/src-tauri/src/screen_capture/mod.rs | 675 +++++++++++++++--- docs/src-tauri/02-commands.md | 10 + 5 files changed, 616 insertions(+), 87 deletions(-) diff --git a/app/src-tauri/permissions/allow-core-process.toml b/app/src-tauri/permissions/allow-core-process.toml index 0c9b84383..534d53343 100644 --- a/app/src-tauri/permissions/allow-core-process.toml +++ b/app/src-tauri/permissions/allow-core-process.toml @@ -20,7 +20,8 @@ allow = [ "webview_account_show", "webview_recipe_event", "activate_main_window", - "screen_share_list_sources", + "screen_share_begin_session", "screen_share_thumbnail", + "screen_share_finalize_session", ] deny = [] diff --git a/app/src-tauri/permissions/allow-webview-recipe.toml b/app/src-tauri/permissions/allow-webview-recipe.toml index ced3a5e91..fd1f5e754 100644 --- a/app/src-tauri/permissions/allow-webview-recipe.toml +++ b/app/src-tauri/permissions/allow-webview-recipe.toml @@ -1,7 +1,12 @@ [[permission]] identifier = "allow-webview-recipe" -description = "Allow injected per-provider recipe code (running inside the third-party site's origin) to invoke the recipe ingest command back to Rust. Also includes screen_share_list_sources so the in-page getDisplayMedia shim (#713) can enumerate real screens and windows to build its picker UI inside the child webview's DOM." +description = "Allow injected per-provider recipe code (running inside the third-party site's origin) to invoke the recipe ingest command back to Rust. Also includes the session-gated screen-share commands (#713 / #812) so the in-page getDisplayMedia shim can open a short-lived enumeration session after a real user gesture. The session gate prevents drive-by window-title / thumbnail exfiltration by third-party scripts running in the same origin." [permission.commands] -allow = ["webview_recipe_event", "screen_share_list_sources", "screen_share_thumbnail"] +allow = [ + "webview_recipe_event", + "screen_share_begin_session", + "screen_share_thumbnail", + "screen_share_finalize_session", +] deny = [] diff --git a/app/src-tauri/src/lib.rs b/app/src-tauri/src/lib.rs index 7004fa56c..a65a4d0fe 100644 --- a/app/src-tauri/src/lib.rs +++ b/app/src-tauri/src/lib.rs @@ -616,6 +616,8 @@ pub fn run() { let builder = builder.manage(discord_scanner::ScannerRegistry::new()); #[cfg(feature = "cef")] let builder = builder.manage(telegram_scanner::ScannerRegistry::new()); + #[cfg(feature = "cef")] + let builder = builder.manage(screen_capture::ScreenShareState::new()); builder .setup(move |app| { #[cfg(any(windows, target_os = "linux"))] @@ -924,9 +926,11 @@ pub fn run() { notification_settings::notification_settings_get, notification_settings::notification_settings_set, #[cfg(feature = "cef")] - screen_capture::screen_share_list_sources, + screen_capture::screen_share_begin_session, #[cfg(feature = "cef")] screen_capture::screen_share_thumbnail, + #[cfg(feature = "cef")] + screen_capture::screen_share_finalize_session, activate_main_window, show_native_notification ]) diff --git a/app/src-tauri/src/screen_capture/mod.rs b/app/src-tauri/src/screen_capture/mod.rs index d19d2805b..a18ac0743 100644 --- a/app/src-tauri/src/screen_capture/mod.rs +++ b/app/src-tauri/src/screen_capture/mod.rs @@ -1,10 +1,11 @@ -//! Screen-capture source enumeration + picker orchestration for #713. +//! Screen-capture source enumeration + picker orchestration for #713 / #812. //! -//! Background (see issue #713 plan): embedded webviews (Meet, Discord, Zoom) -//! run under the CEF Alloy runtime, which does not link Chromium's built-in -//! `DesktopMediaPicker`. When the page calls `navigator.mediaDevices -//! .getDisplayMedia`, Chromium falls back to auto-selecting the primary -//! display — the user never sees a picker and their whole screen streams. +//! Background (see issue #713 plan): embedded webviews (Meet, Slack Huddles, +//! Discord, Zoom) run under the CEF Alloy runtime, which does not link +//! Chromium's built-in `DesktopMediaPicker`. When the page calls +//! `navigator.mediaDevices.getDisplayMedia`, Chromium falls back to +//! auto-selecting the primary display — the user never sees a picker and +//! their whole screen streams. //! //! Our `OnRequestMediaAccessPermission` callback in tauri-cef grants the //! `DESKTOP_VIDEO_CAPTURE` bit unconditionally. Stage 0 PoC proved that when @@ -13,15 +14,29 @@ //! constraint, Chromium honours the ID and opens a real capture device — //! even though this constraint shape is normally extension-only. //! -//! This module is the host-side half of that flow: -//! * `screen_share_list_sources` — enumerate real screens and windows, -//! tag each with a Chromium-compatible `DesktopMediaID` string -//! (`screen::0` / `window::0`). -//! * `screen_share_thumbnail` — capture a single source's live thumbnail -//! as a base64 PNG. Called lazily per-source from the picker shim so -//! the picker UI opens immediately and thumbnails fade in as they -//! arrive, rather than blocking enumeration for ~1-2s on a many- -//! window desktop. +//! # Session gating (#812 Stage A) +//! +//! The first landing of this module exposed `screen_share_list_sources` and +//! `screen_share_thumbnail` directly on the recipe-webview allowlist. That +//! let any script running inside the embedded site (page JS, compromised +//! third-party CDN) silently enumerate every open window title + live +//! thumbnail with no picker interaction and no user gesture. CodeRabbit / +//! graycyrus flagged this as a blocker on PR #809 (issue #812). +//! +//! The module now forces callers through a short-lived session: +//! * `screen_share_begin_session` — requires a live user gesture +//! (`navigator.userActivation.isActive`), an account-scoped webview +//! label (`acct_*`), and is rate-limited to 10 calls per account per +//! 60s. Returns a random 128-bit token + the enumerated sources in +//! one round-trip. +//! * `screen_share_thumbnail` — requires a token whose session is still +//! alive and whose `allowed_ids` set contains the requested ID. +//! * `screen_share_finalize_session` — removes the session. Called by +//! the shim on Share or Cancel. +//! +//! Sessions auto-expire after 30s. A new `begin_session` for the same +//! account replaces any in-flight session (prevents the stacked-overlay +//! case from graycyrus refactor note #6). //! //! The picker UI itself is injected directly into each child webview's //! DOM by `webview_accounts/runtime.js` (see the `showInPagePicker` flow @@ -31,7 +46,13 @@ //! macOS-first: other platforms stub out until the flow is proven end- //! to-end. +use std::collections::{HashMap, HashSet, VecDeque}; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::Mutex; +use std::time::{Duration, Instant}; + use serde::{Deserialize, Serialize}; +use tauri::{Runtime, State, Webview}; #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] @@ -48,34 +69,17 @@ pub struct ScreenSource { /// Optional application name (windows only). #[serde(skip_serializing_if = "Option::is_none")] pub app_name: Option, - /// PNG thumbnail base64-encoded. Empty when enumeration cheap-path is - /// used — UI renders a placeholder. + /// PNG thumbnail base64-encoded. Always empty from enumeration — the + /// shim lazy-fetches via `screen_share_thumbnail` so the picker UI opens + /// instantly. #[serde(default)] pub thumbnail_png_base64: String, } // --------------------------------------------------------------------------- -// Enumeration +// Parser (platform-agnostic, unit-testable) // --------------------------------------------------------------------------- -#[tauri::command] -pub fn screen_share_list_sources() -> Result, String> { - #[cfg(target_os = "macos")] - { - macos::enumerate().map_err(|e| format!("enumerate failed: {e}")) - } - #[cfg(not(target_os = "macos"))] - { - Err("screen-share picker not implemented for this platform yet".to_string()) - } -} - -#[derive(Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct ThumbnailArgs { - pub id: String, -} - /// What kind of source a parsed DesktopMediaID-format string describes. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(crate) enum SourceKind { @@ -86,9 +90,9 @@ pub(crate) enum SourceKind { /// Parse a `screen::0` / `window::0` source ID into /// `(kind, numeric id)`. Returns `None` if the prefix is unknown, the /// numeric segment doesn't fit in a `u32`, or the shape otherwise doesn't -/// match what the shim constructed from `screen_share_list_sources`. Pure -/// logic so it can be unit-tested without touching platform APIs; macOS -/// callers use it before dispatching to the capture backend. +/// match what the enumerator emits. Pure logic so it can be unit-tested +/// without touching platform APIs; macOS callers use it before dispatching +/// to the capture backend. pub(crate) fn parse_source_id(id: &str) -> Option<(SourceKind, u32)> { let mut parts = id.splitn(3, ':'); let kind = match parts.next()? { @@ -100,12 +104,307 @@ pub(crate) fn parse_source_id(id: &str) -> Option<(SourceKind, u32)> { Some((kind, num)) } -/// Capture a single source's thumbnail as base64 PNG. Called per-source in -/// parallel from the picker shim so the picker UI opens immediately and -/// thumbnails fade in as they arrive, rather than blocking the whole -/// enumeration call for 1-2 seconds on a many-window desktop. +// --------------------------------------------------------------------------- +// Session state (#812 Stage A) +// --------------------------------------------------------------------------- + +/// Short TTL prevents stale tokens from being replayable. 30s is long enough +/// for the slowest picker flow (enumerate → thumbs load → user chooses) +/// observed in manual testing, short enough that a leaked token via console +/// can't be reused later in the day. +const SESSION_TTL: Duration = Duration::from_secs(30); +/// Token bucket parameters. 10 attempts per 60s per account means a human +/// mashing the Present-Now button can't get throttled; an automated +/// enumeration loop hits the wall quickly. +const RATE_LIMIT_MAX: usize = 10; +const RATE_LIMIT_WINDOW: Duration = Duration::from_secs(60); +/// 128-bit token. Seeded from OS time + atomic counter + thread id — +/// deliberately no new dependency. Entropy is overkill for a 30s session: +/// the attacker would need to guess the token AND the account-id AND the +/// allowed-id set inside the TTL window. +const TOKEN_BYTES: usize = 16; + +static TOKEN_COUNTER: AtomicU64 = AtomicU64::new(1); + +fn generate_token() -> String { + use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _}; + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or(0); + let counter = TOKEN_COUNTER.fetch_add(1, Ordering::Relaxed); + let tid = thread_id_hash(); + let mut buf = [0u8; TOKEN_BYTES]; + // Interleave the three sources across the 16 bytes so no single + // predictable input (wall clock, counter) dominates the prefix. + buf[0..8].copy_from_slice(&(now as u64).to_le_bytes()); + buf[8..16].copy_from_slice(&counter.to_le_bytes()); + for (i, b) in buf.iter_mut().enumerate() { + *b ^= tid.rotate_left((i as u32) * 3); + } + URL_SAFE_NO_PAD.encode(buf) +} + +fn thread_id_hash() -> u8 { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + let mut h = DefaultHasher::new(); + std::thread::current().id().hash(&mut h); + h.finish() as u8 +} + +#[derive(Debug)] +struct Session { + account_id: String, + allowed_ids: HashSet, + expires_at: Instant, +} + +#[derive(Default)] +pub struct ScreenShareState { + /// token → Session + sessions: Mutex>, + /// account_id → rolling window of begin-session timestamps for rate limit + rate: Mutex>>, + /// account_id → current active token (so we can evict on replace) + active: Mutex>, +} + +impl ScreenShareState { + pub fn new() -> Self { + Self::default() + } +} + +fn purge_expired(sessions: &mut HashMap, active: &mut HashMap) { + let now = Instant::now(); + let expired_tokens: Vec = sessions + .iter() + .filter_map(|(t, s)| { + if s.expires_at <= now { + Some(t.clone()) + } else { + None + } + }) + .collect(); + for t in expired_tokens { + if let Some(sess) = sessions.remove(&t) { + if active.get(&sess.account_id).map(|x| x.as_str()) == Some(t.as_str()) { + active.remove(&sess.account_id); + } + } + } +} + +fn check_and_record_rate(rate: &mut HashMap>, account_id: &str) -> bool { + let now = Instant::now(); + let window = rate.entry(account_id.to_string()).or_default(); + while let Some(&front) = window.front() { + if now.duration_since(front) > RATE_LIMIT_WINDOW { + window.pop_front(); + } else { + break; + } + } + if window.len() >= RATE_LIMIT_MAX { + return false; + } + window.push_back(now); + true +} + +// --------------------------------------------------------------------------- +// Commands +// --------------------------------------------------------------------------- + +#[derive(Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct BeginSessionArgs { + pub account_id: String, + pub origin: String, + /// Frontend-reported `navigator.userActivation.isActive`. True only while + /// the call stack originates from a real user gesture (click, key, touch) + /// within the page's activation grace period. False for timers, async + /// continuations, or drive-by enumeration attempts. + pub has_user_activation: bool, +} + +#[derive(Serialize)] +#[serde(rename_all = "camelCase")] +pub struct BeginSessionResult { + pub token: String, + pub sources: Vec, +} + +/// Open a short-lived session that gates subsequent `screen_share_thumbnail` +/// calls. The shim must call this before showing the picker UI; any page JS +/// attempting the same call outside a user gesture is rejected. +#[tauri::command] +pub fn screen_share_begin_session( + webview: Webview, + state: State<'_, ScreenShareState>, + args: BeginSessionArgs, +) -> Result { + let caller_label = webview.label().to_string(); + log::debug!( + "[screen-share] begin_session caller_label={} account_id={} origin={} activation={}", + caller_label, + args.account_id, + args.origin, + args.has_user_activation + ); + + // Gate 1: caller must be an account webview. `acct_*` is the label shape + // produced by `webview_accounts::label_for()`. Main/overlay windows and + // any other Tauri webview fail here. + if !caller_label.starts_with("acct_") { + log::warn!( + "[screen-share] begin_session rejected: caller_label={} is not an account webview", + caller_label + ); + return Err("unauthorized caller".to_string()); + } + + // Gate 2: must be inside a user gesture. Frontend reads + // `navigator.userActivation.isActive` which is true only during the + // direct call stack of a click / key / touch handler. + if !args.has_user_activation { + log::warn!( + "[screen-share] begin_session rejected: no user activation for account_id={}", + args.account_id + ); + return Err("user activation required".to_string()); + } + + // Housekeeping before checking rate / active state. + { + let mut sessions = state + .sessions + .lock() + .expect("screen_share.sessions poisoned"); + let mut active = state.active.lock().expect("screen_share.active poisoned"); + purge_expired(&mut sessions, &mut active); + } + + // Gate 3: rate limit per account. + { + let mut rate = state.rate.lock().expect("screen_share.rate poisoned"); + if !check_and_record_rate(&mut rate, &args.account_id) { + log::warn!( + "[screen-share] begin_session rate-limited account_id={} (>{} within {:?})", + args.account_id, + RATE_LIMIT_MAX, + RATE_LIMIT_WINDOW + ); + return Err("rate-limited".to_string()); + } + } + + // Enumerate sources and build the session. + let sources = enumerate_sources()?; + let allowed_ids: HashSet = sources.iter().map(|s| s.id.clone()).collect(); + let token = generate_token(); + let token_display = token_prefix(&token); + + { + let mut sessions = state + .sessions + .lock() + .expect("screen_share.sessions poisoned"); + let mut active = state.active.lock().expect("screen_share.active poisoned"); + + // Replace any in-flight session for this account — prevents stacked + // pickers if getDisplayMedia is called twice before the first + // resolves (graycyrus refactor #6). + if let Some(prev) = active.remove(&args.account_id) { + sessions.remove(&prev); + log::debug!( + "[screen-share] begin_session replacing prev session token={}…", + token_prefix(&prev) + ); + } + + sessions.insert( + token.clone(), + Session { + account_id: args.account_id.clone(), + allowed_ids, + expires_at: Instant::now() + SESSION_TTL, + }, + ); + active.insert(args.account_id.clone(), token.clone()); + } + + log::info!( + "[screen-share] begin_session opened token={}… account_id={} sources={}", + token_display, + args.account_id, + sources.len() + ); + + Ok(BeginSessionResult { token, sources }) +} + +#[derive(Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ThumbnailArgs { + pub token: String, + pub id: String, +} + +/// Capture one source's thumbnail as base64 PNG. Gated behind the session +/// token: only IDs the session was issued for (i.e. shown in the picker) +/// can be thumbnailed, so a valid token can't be abused to snapshot +/// arbitrary windows. #[tauri::command] -pub fn screen_share_thumbnail(args: ThumbnailArgs) -> Result { +pub fn screen_share_thumbnail( + webview: Webview, + state: State<'_, ScreenShareState>, + args: ThumbnailArgs, +) -> Result { + let caller_label = webview.label().to_string(); + log::debug!( + "[screen-share] thumbnail caller_label={} id={} token={}…", + caller_label, + args.id, + token_prefix(&args.token) + ); + + if !caller_label.starts_with("acct_") { + log::warn!( + "[screen-share] thumbnail rejected: caller_label={} is not an account webview", + caller_label + ); + return Err("unauthorized caller".to_string()); + } + + // Validate the session is alive and knows about this ID. + { + let mut sessions = state + .sessions + .lock() + .expect("screen_share.sessions poisoned"); + let mut active = state.active.lock().expect("screen_share.active poisoned"); + purge_expired(&mut sessions, &mut active); + + let session = sessions.get(&args.token).ok_or_else(|| { + log::warn!( + "[screen-share] thumbnail rejected: unknown/expired token={}…", + token_prefix(&args.token) + ); + "invalid or expired token".to_string() + })?; + if !session.allowed_ids.contains(&args.id) { + log::warn!( + "[screen-share] thumbnail rejected: id={} not in session's allowed set (token={}…)", + args.id, + token_prefix(&args.token) + ); + return Err("id not in session".to_string()); + } + } + #[cfg(target_os = "macos")] { macos::thumbnail_for_id(&args.id).ok_or_else(|| "thumbnail unavailable".to_string()) @@ -117,6 +416,65 @@ pub fn screen_share_thumbnail(args: ThumbnailArgs) -> Result { } } +#[derive(Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct FinalizeSessionArgs { + pub token: String, + #[serde(default)] + pub picked_id: Option, +} + +/// Called by the shim on Share or Cancel. Removes the session. Safe to call +/// with an unknown/expired token — the call is a no-op then. Not gated on +/// caller label because the only effect is cleanup of a token the caller +/// already possesses. +#[tauri::command] +pub fn screen_share_finalize_session( + state: State<'_, ScreenShareState>, + args: FinalizeSessionArgs, +) -> Result<(), String> { + let token_display = token_prefix(&args.token); + let mut sessions = state + .sessions + .lock() + .expect("screen_share.sessions poisoned"); + let mut active = state.active.lock().expect("screen_share.active poisoned"); + purge_expired(&mut sessions, &mut active); + + if let Some(session) = sessions.remove(&args.token) { + if active.get(&session.account_id).map(|x| x.as_str()) == Some(args.token.as_str()) { + active.remove(&session.account_id); + } + log::info!( + "[screen-share] finalize_session token={}… account_id={} picked={}", + token_display, + session.account_id, + args.picked_id.as_deref().unwrap_or("") + ); + } else { + log::debug!( + "[screen-share] finalize_session ignored (unknown token={}…)", + token_display + ); + } + Ok(()) +} + +fn token_prefix(token: &str) -> String { + token.chars().take(8).collect() +} + +fn enumerate_sources() -> Result, String> { + #[cfg(target_os = "macos")] + { + macos::enumerate().map_err(|e| format!("enumerate failed: {e}")) + } + #[cfg(not(target_os = "macos"))] + { + Err("screen-share picker not implemented for this platform yet".to_string()) + } +} + // --------------------------------------------------------------------------- // macOS backend // --------------------------------------------------------------------------- @@ -225,21 +583,39 @@ mod macos { height: 0.0, }, }; - // kCGWindowListOptionIncludingWindow. + // kCGWindowListOptionIncludingWindow (= 8). const K_CG_WINDOW_LIST_OPTION_INCLUDING_WINDOW: u32 = 1 << 3; - // kCGWindowImageBoundsIgnoreFraming | kCGWindowImageNominalResolution. + // kCGWindowImageBoundsIgnoreFraming (= 1) | kCGWindowImageNominalResolution (= 16). const K_CG_WINDOW_IMAGE_BOUNDS_IGNORE_FRAMING: u32 = 1 << 0; const K_CG_WINDOW_IMAGE_NOMINAL_RESOLUTION: u32 = 1 << 4; const K_CFSTRING_ENCODING_UTF8: u32 = 0x08000100; const K_CFNUMBER_SINT64_TYPE: i32 = 4; - // kCGWindowListOptionOnScreenOnly | kCGWindowListExcludeDesktopElements. + // kCGWindowListOptionOnScreenOnly (= 1) | kCGWindowListExcludeDesktopElements (= 16). const K_CG_WINDOW_LIST_ON_SCREEN_ONLY: u32 = 1 << 0; const K_CG_WINDOW_LIST_EXCLUDE_DESKTOP_ELEMENTS: u32 = 1 << 4; - fn cfstr(s: &str) -> *const c_void { - let c = std::ffi::CString::new(s).expect("cfstr contains NUL"); - unsafe { CFStringCreateWithCString(std::ptr::null(), c.as_ptr(), K_CFSTRING_ENCODING_UTF8) } + /// Below this pixel count on either axis we treat a captured window + /// image as TCC-denied rather than real content. macOS 15 Sequoia + /// returns a valid 1×1 transparent CGImage when Screen Recording is + /// not granted (instead of the pre-Sequoia null return), and the old + /// empty-check alone let that through (see PR #809 review). + const MIN_USABLE_DIMENSION: usize = 4; + + /// Allocate a CoreFoundation string. Returns `None` if the input + /// contains an interior NUL byte (CString rejects those). Callers + /// check the return rather than `expect()`ing, because unwinding + /// through a C frame is undefined behavior. + fn cfstr(s: &str) -> Option<*const c_void> { + let c = std::ffi::CString::new(s).ok()?; + let ptr = unsafe { + CFStringCreateWithCString(std::ptr::null(), c.as_ptr(), K_CFSTRING_ENCODING_UTF8) + }; + if ptr.is_null() { + None + } else { + Some(ptr) + } } fn cfstring_to_string(cf: *const c_void) -> Option { @@ -278,9 +654,6 @@ mod macos { } } - /// Parse a `screen::0` / `window::0` source ID and capture its - /// thumbnail as base64 PNG. Returns `None` if the ID is malformed or - /// the underlying capture API returns a null/zero-size image. pub(super) fn thumbnail_for_id(id: &str) -> Option { let (kind, num) = super::parse_source_id(id)?; let b64 = match kind { @@ -301,15 +674,12 @@ mod macos { Ok(out) } - /// Encode a CGImageRef as PNG bytes via ImageIO. Caller releases the - /// image. Returns `None` on any ImageIO error so enumeration never - /// fails because a single thumbnail couldn't be captured. fn cgimage_to_png_bytes(image: *const c_void) -> Option> { if image.is_null() { return None; } + let uti_key = cfstr("public.png")?; unsafe { - let uti_key = cfstr("public.png"); let data = CFDataCreateMutable(std::ptr::null(), 0); if data.is_null() { CFRelease(uti_key); @@ -344,6 +714,18 @@ mod macos { if image.is_null() { return String::new(); } + let w = CGImageGetWidth(image); + let h = CGImageGetHeight(image); + if w < MIN_USABLE_DIMENSION || h < MIN_USABLE_DIMENSION { + log::warn!( + "[screen-share] screen_thumbnail display_id={} returned {}×{} (likely TCC not granted)", + display_id, + w, + h + ); + CGImageRelease(image); + return String::new(); + } let png = cgimage_to_png_bytes(image); CGImageRelease(image); png.map(|b| STANDARD.encode(b)).unwrap_or_default() @@ -364,7 +746,15 @@ mod macos { if image.is_null() { return String::new(); } - if CGImageGetWidth(image) == 0 || CGImageGetHeight(image) == 0 { + let w = CGImageGetWidth(image); + let h = CGImageGetHeight(image); + if w < MIN_USABLE_DIMENSION || h < MIN_USABLE_DIMENSION { + log::warn!( + "[screen-share] window_thumbnail window_id={} returned {}×{} (likely TCC not granted or Sequoia privacy policy)", + window_id, + w, + h + ); CGImageRelease(image); return String::new(); } @@ -400,9 +790,6 @@ mod macos { kind: "screen".to_string(), name, app_name: None, - // Thumbnails are now lazy-fetched by the shim via - // `screen_share_thumbnail` in parallel with the - // picker render, so enumeration stays fast. thumbnail_png_base64: String::new(), } }) @@ -416,11 +803,37 @@ mod macos { log::warn!("[screen-share] CGWindowListCopyWindowInfo returned null"); return Vec::new(); } - let key_window_number = cfstr("kCGWindowNumber"); - let key_window_name = cfstr("kCGWindowName"); - let key_owner_name = cfstr("kCGWindowOwnerName"); - let key_bounds = cfstr("kCGWindowBounds"); - let key_layer = cfstr("kCGWindowLayer"); + + // cfstr can fail (interior NUL — never happens for these literals + // but stay defensive); bail cleanly if so. + let Some(key_window_number) = cfstr("kCGWindowNumber") else { + unsafe { CFRelease(array) }; + return Vec::new(); + }; + let Some(key_window_name) = cfstr("kCGWindowName") else { + unsafe { + CFRelease(key_window_number); + CFRelease(array) + }; + return Vec::new(); + }; + let Some(key_owner_name) = cfstr("kCGWindowOwnerName") else { + unsafe { + CFRelease(key_window_number); + CFRelease(key_window_name); + CFRelease(array); + } + return Vec::new(); + }; + let Some(key_layer) = cfstr("kCGWindowLayer") else { + unsafe { + CFRelease(key_window_number); + CFRelease(key_window_name); + CFRelease(key_owner_name); + CFRelease(array); + } + return Vec::new(); + }; let count = unsafe { CFArrayGetCount(array) }; let mut out: Vec = Vec::new(); @@ -431,24 +844,32 @@ mod macos { } let number_cf = unsafe { CFDictionaryGetValue(dict, key_window_number) }; let layer_cf = unsafe { CFDictionaryGetValue(dict, key_layer) }; - let window_id = match cfnumber_to_u64(number_cf) { + let window_id_u64 = match cfnumber_to_u64(number_cf) { Some(v) => v, None => continue, }; + // `CGWindowID` is `uint32_t` upstream, but `cfnumber_to_u64` + // returns 64-bit (we read the CFNumber as SInt64 for sign + // safety). Values should never exceed `u32::MAX` in practice, + // but a silent cast would round-trip through `format!` and + // then fail parse_source_id — the user would see a source in + // the picker with a permanent grey placeholder. Skip loudly. + let window_id = match u32::try_from(window_id_u64) { + Ok(v) => v, + Err(_) => { + log::warn!( + "[screen-share] window_id {} overflows u32, skipping", + window_id_u64 + ); + continue; + } + }; // Skip menu bar / dock / system chrome (layer != 0 → non-normal // window). Normal app windows live at layer 0. let layer = cfnumber_to_u64(layer_cf).unwrap_or(0); if layer != 0 { continue; } - // Skip microscopic windows (tooltips, hidden panels). - if let Some(bounds_dict) = unsafe { CFDictionaryGetValue(dict, key_bounds).as_ref() } { - // kCGWindowBounds is actually a CFDictionary with Width/Height - // keys. Cheap filter: if the dict has a "Width" key and it's - // < 50, skip. Implementing full parse isn't worth it for the - // MVP; Chromium renders a scrollable picker anyway. - let _ = bounds_dict; - } let title = unsafe { CFDictionaryGetValue(dict, key_window_name) }; let owner = unsafe { CFDictionaryGetValue(dict, key_owner_name) }; let title_str = cfstring_to_string(title).unwrap_or_default(); @@ -480,7 +901,6 @@ mod macos { CFRelease(key_window_number); CFRelease(key_window_name); CFRelease(key_owner_name); - CFRelease(key_bounds); CFRelease(key_layer); CFRelease(array); } @@ -490,7 +910,9 @@ mod macos { #[cfg(test)] mod tests { - use super::{parse_source_id, SourceKind}; + use super::*; + + // ---- parse_source_id tests (platform-agnostic) ---- #[test] fn parses_screen_id() { @@ -511,8 +933,6 @@ mod tests { #[test] fn trailing_segment_ignored() { - // Chromium always emits `:0` as the third segment; shim is tolerant - // of whatever trails as long as prefix + numeric are intact. assert_eq!( parse_source_id("screen:1:extra:stuff"), Some((SourceKind::Screen, 1)) @@ -541,17 +961,106 @@ mod tests { #[test] fn rejects_overflowing_id() { - // u32::MAX + 1. assert_eq!(parse_source_id("screen:4294967296:0"), None); - // Negative numbers are never valid CGDirectDisplayID / CGWindowID. assert_eq!(parse_source_id("screen:-1:0"), None); } #[test] fn list_source_roundtrip() { - // The enumerator produces the exact shape `parse_source_id` expects, - // so a round trip must succeed for every kind it can emit. assert!(parse_source_id("screen:1:0").is_some()); assert!(parse_source_id("window:12345:0").is_some()); } + + // ---- Session / rate-limit tests (pure logic, no platform APIs) ---- + + fn insert_test_session( + state: &ScreenShareState, + token: &str, + account_id: &str, + ttl: Duration, + ids: &[&str], + ) { + let mut sessions = state.sessions.lock().unwrap(); + let mut active = state.active.lock().unwrap(); + sessions.insert( + token.to_string(), + Session { + account_id: account_id.to_string(), + allowed_ids: ids.iter().map(|s| s.to_string()).collect(), + expires_at: Instant::now() + ttl, + }, + ); + active.insert(account_id.to_string(), token.to_string()); + } + + #[test] + fn purge_expired_removes_stale_sessions() { + let state = ScreenShareState::new(); + insert_test_session( + &state, + "tok-expired", + "acct1", + Duration::from_millis(0), + &[], + ); + // Sleep a blink so `expires_at <= now` is definitely true. + std::thread::sleep(Duration::from_millis(5)); + insert_test_session(&state, "tok-live", "acct2", Duration::from_secs(10), &[]); + + { + let mut s = state.sessions.lock().unwrap(); + let mut a = state.active.lock().unwrap(); + purge_expired(&mut s, &mut a); + } + + let sessions = state.sessions.lock().unwrap(); + assert!(!sessions.contains_key("tok-expired")); + assert!(sessions.contains_key("tok-live")); + let active = state.active.lock().unwrap(); + assert!(!active.contains_key("acct1")); + assert_eq!(active.get("acct2").map(|s| s.as_str()), Some("tok-live")); + } + + #[test] + fn rate_limit_blocks_11th_call_in_window() { + let mut rate: HashMap> = HashMap::new(); + for _ in 0..RATE_LIMIT_MAX { + assert!(check_and_record_rate(&mut rate, "acct-x")); + } + // 11th call must fail. + assert!(!check_and_record_rate(&mut rate, "acct-x")); + } + + #[test] + fn rate_limit_scoped_per_account() { + let mut rate: HashMap> = HashMap::new(); + for _ in 0..RATE_LIMIT_MAX { + check_and_record_rate(&mut rate, "acct-a"); + } + // Different account still has full budget. + assert!(check_and_record_rate(&mut rate, "acct-b")); + } + + #[test] + fn generate_token_is_url_safe_and_unique() { + let a = generate_token(); + let b = generate_token(); + assert_ne!(a, b); + // URL-safe base64, no-pad, 16 bytes → 22 chars. + assert_eq!(a.len(), 22); + assert!(a + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')); + } + + #[test] + fn token_prefix_truncates() { + assert_eq!(token_prefix("0123456789abcdef"), "01234567"); + assert_eq!(token_prefix("ab"), "ab"); + } + + // NOTE: full command-level tests (screen_share_begin_session etc.) + // would need a `tauri::Webview` mock, which the stable Tauri API + // doesn't expose. Gate + rate-limit logic is covered above; the + // command glue around it is thin enough to verify via live run. } diff --git a/docs/src-tauri/02-commands.md b/docs/src-tauri/02-commands.md index 1799f93b4..f4b286427 100644 --- a/docs/src-tauri/02-commands.md +++ b/docs/src-tauri/02-commands.md @@ -53,6 +53,16 @@ From **`commands/openhuman.rs`** (see source for exact payloads): | `openhuman_service_status` | Query status | | `openhuman_service_uninstall` | Uninstall service | +## Screen share picker (CEF / macOS) + +From **`screen_capture/mod.rs`** (registered under `#[cfg(feature = "cef")]`). Backs the in-page `getDisplayMedia` shim in `webview_accounts/runtime.js`. Session-gated: the shim must open a session with a live user gesture before enumeration / thumbnail captures succeed. See issue #713 (picker UX) + #812 (session gating). + +| Command | Purpose | +| --------------------------------- | ----------------------------------------------------------------------------------------------------------------------- | +| `screen_share_begin_session` | Open a 30s session from an account webview, after a `navigator.userActivation.isActive` gesture. Returns `{ token, sources }`. Rate-limited to 10/minute per account. | +| `screen_share_thumbnail` | Capture a single source's thumbnail as base64 PNG. Requires a live token and an `id` that the session was issued for. macOS only; other platforms return an error. | +| `screen_share_finalize_session` | Close the session. Called by the shim on Share or Cancel; safe to call with an unknown/expired token (no-op). | + ## Removed / not present The following **do not** exist in the current `generate_handler!` list: `exchange_token`, `get_auth_state`, `socket_connect`, `start_telegram_login`. Authentication and sockets are handled in the **React** app and **core** process, not via these IPC names. From 5cc677ce5736ecaa151ff85d34c599d4c4c573d8 Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Thu, 23 Apr 2026 16:34:33 +0530 Subject: [PATCH 10/11] fix(webview): route getDisplayMedia shim through session-gated flow (#812) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Matches the new host-side screen_share_* command shapes: - At shim entry, read `navigator.userActivation.isActive`. If false, fall through to the original `getDisplayMedia` rather than open the picker. Keeps drive-by enumeration from a non-gesture context from ever reaching begin_session — matches graycyrus' "verify calling webview + gesture" gate from the #809 review. - Replace `rawInvoke('screen_share_list_sources', {})` with `begin_session({ accountId, origin, hasUserActivation })`, and propagate the returned `token` into `showInPagePicker` and every `screen_share_thumbnail({ token, id })` call so the session gate can authorise both halves of the picker flow. - On Share, Cancel, or the no-sources fallback path, finalize the session via a fire-and-forget `finalizeSessionQuiet` helper so the account's `active` slot is released immediately instead of waiting for the 30s TTL. - Expand the `navigator.permissions.query` spoof to also report `camera` and `microphone` as `granted`, not just `display-capture`. CEF Alloy's Permissions API otherwise reports them as `prompt`/ `denied`, and some Meet / Slack code paths short-circuit on that before ever calling `getUserMedia`. AbortError on cancel, the DOMContentLoaded retry, and the in-flight thumbnail dedup are all preserved. Co-Authored-By: Claude Opus 4.7 --- app/src-tauri/src/webview_accounts/runtime.js | 88 ++++++++++++++++--- 1 file changed, 78 insertions(+), 10 deletions(-) diff --git a/app/src-tauri/src/webview_accounts/runtime.js b/app/src-tauri/src/webview_accounts/runtime.js index 80629f9be..300de1c7d 100644 --- a/app/src-tauri/src/webview_accounts/runtime.js +++ b/app/src-tauri/src/webview_accounts/runtime.js @@ -149,31 +149,82 @@ : navigator.mediaDevices.getDisplayMedia ).bind(navigator.mediaDevices); + // Fire-and-forget session cleanup. Swallows errors because finalize + // is a no-op on the host side for unknown/expired tokens and we don't + // want a late IPC failure to leak into the getDisplayMedia rejection. + function finalizeSessionQuiet(token, pickedId) { + if (!token) return Promise.resolve(); + return rawInvoke('screen_share_finalize_session', { + args: { token: token, pickedId: pickedId || null }, + }).catch(function () {}); + } + const shim = async function (constraints) { constraints = constraints || {}; + // User-activation gate (#812). `navigator.userActivation.isActive` + // is transient — true only during the direct call stack of a real + // gesture handler (click, key, touch). Third-party JS calling + // getDisplayMedia from a timer or async continuation gets filtered + // here, so our downstream commands (begin_session etc.) never open + // a session without a gesture. Fall through to the original + // implementation rather than throw so pages with legitimate + // non-gesture flows (rare but possible) aren't hard-blocked. + const hasActivation = !!( + typeof navigator !== 'undefined' && + navigator.userActivation && + navigator.userActivation.isActive + ); send('log', { level: 'info', - msg: '[gdm-shim] getDisplayMedia intercepted audio=' + !!constraints.audio, + msg: + '[gdm-shim] getDisplayMedia intercepted audio=' + + !!constraints.audio + + ' activation=' + + hasActivation, }); + if (!hasActivation) { + send('log', { + level: 'warn', + msg: '[gdm-shim] no user activation, falling through to native getDisplayMedia', + }); + return origGetDisplayMedia(constraints); + } - let sources; + let session; try { - sources = await rawInvoke('screen_share_list_sources', {}); + session = await rawInvoke('screen_share_begin_session', { + args: { + accountId: ctx.accountId, + origin: (typeof location !== 'undefined' && location.origin) || 'unknown', + hasUserActivation: hasActivation, + }, + }); } catch (e) { send('log', { level: 'error', - msg: '[gdm-shim] list_sources IPC failed: ' + (e && e.message ? e.message : String(e)), + msg: '[gdm-shim] begin_session IPC failed: ' + (e && e.message ? e.message : String(e)), + }); + return origGetDisplayMedia(constraints); + } + if (!session || typeof session.token !== 'string' || !Array.isArray(session.sources)) { + send('log', { + level: 'warn', + msg: '[gdm-shim] begin_session returned malformed payload, falling back', }); return origGetDisplayMedia(constraints); } - if (!Array.isArray(sources) || sources.length === 0) { + const sessionToken = session.token; + const sources = session.sources; + if (sources.length === 0) { send('log', { level: 'warn', msg: '[gdm-shim] no sources enumerated, falling back' }); + await finalizeSessionQuiet(sessionToken, null); return origGetDisplayMedia(constraints); } - const pick = await showInPagePicker(sources); + const pick = await showInPagePicker(sources, sessionToken); if (!pick) { send('log', { level: 'info', msg: '[gdm-shim] user cancelled picker' }); + await finalizeSessionQuiet(sessionToken, null); // Meet (and other video-conf sites) treat `NotAllowedError` on // getDisplayMedia as "the browser blocked us" and pop a // "needs permission" modal. Real Chrome ALSO throws @@ -185,6 +236,11 @@ // included) dismiss it silently. throw new DOMException('User cancelled screen share picker', 'AbortError'); } + // Finalize the session BEFORE getUserMedia: the Chromium capture + // path doesn't need the token, and leaving the session open past + // this point would just hold the `active` slot for the account + // until the 30s TTL fires. + await finalizeSessionQuiet(sessionToken, pick.id); send('log', { level: 'info', msg: '[gdm-shim] picked id=' + pick.id + ' kind=' + pick.kind, @@ -273,7 +329,7 @@ // UI) without any native-view gymnastics. All nodes are namespaced // under `__ohsp_*` class/ID prefixes and attached to a closed shadow // root where possible to avoid colliding with the host page's CSS. - function showInPagePicker(sources) { + function showInPagePicker(sources, sessionToken) { return new Promise(function (resolveOuter, rejectOuter) { function host() { return (document.body || document.documentElement); } if (!host()) { @@ -283,7 +339,7 @@ document.addEventListener( 'DOMContentLoaded', function () { - showInPagePicker(sources).then(resolveOuter, rejectOuter); + showInPagePicker(sources, sessionToken).then(resolveOuter, rejectOuter); }, { once: true } ); @@ -490,7 +546,7 @@ src.__thumbnailPromise.then(paintThumb, function () {}); } else { src.__thumbnailPromise = rawInvoke('screen_share_thumbnail', { - args: { id: src.id }, + args: { token: sessionToken, id: src.id }, }).then( function (b64) { if (b64 && typeof b64 === 'string') { @@ -629,7 +685,19 @@ ? permDescriptor.value : navigator.permissions.query ).bind(navigator.permissions); - const spoofed = { 'display-capture': 'granted' }; + // CEF Alloy's Permissions API doesn't reflect what our + // OnRequestMediaAccessPermission callback will grant dynamically, + // so it defaults to 'prompt' or 'denied' for the media permissions + // we do handle. Pages that consult the Permissions API up front + // (Meet for display-capture; some flows for camera/microphone) + // refuse to try the actual getUserMedia call if they see 'denied' + // here. Spoof all three to 'granted'; the real grant still goes + // through our CEF permission handler where it's scoped per-call. + const spoofed = { + 'display-capture': 'granted', + camera: 'granted', + microphone: 'granted', + }; const spoofedQuery = async function (descriptor) { const n = descriptor && descriptor.name; if (n && spoofed[n]) { From f7a575ed8ae22377011150bef947f8fb88a5bba8 Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Thu, 23 Apr 2026 17:13:04 +0530 Subject: [PATCH 11/11] fix(webview): reject concurrent getDisplayMedia calls in shim (#713) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses the shim-side half of graycyrus refactor note #6 on #809. The host session state already evicts a stale session when `begin_session` is called a second time for the same account, but without a shim-side guard a concurrent `getDisplayMedia` would still append a second picker DOM while the first overlay was live — the user would see two stacked cards and no clean way out of either. Tracks an `pickerInFlight` module-scoped boolean around the shim body (now factored into `runShim`) and rejects concurrent calls with `InvalidStateError`, which matches the MediaStreams spec's error shape for already-in-progress capture requests. The flag is reset in a `finally` block so exceptions from `runShim` don't leave it stuck. Co-Authored-By: Claude Opus 4.7 --- app/src-tauri/src/webview_accounts/runtime.js | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/app/src-tauri/src/webview_accounts/runtime.js b/app/src-tauri/src/webview_accounts/runtime.js index 300de1c7d..875c6e5b4 100644 --- a/app/src-tauri/src/webview_accounts/runtime.js +++ b/app/src-tauri/src/webview_accounts/runtime.js @@ -159,7 +159,32 @@ }).catch(function () {}); } + // In-flight guard (graycyrus refactor #6). The host-side state already + // evicts a stale session when begin_session fires twice, but without a + // shim-side guard a second call would still append a second picker DOM + // while the first is open — the user would see two stacked overlays. + // Reject a concurrent call the same way the MediaStreams spec does + // when an existing capture request is in progress. + let pickerInFlight = false; + const shim = async function (constraints) { + constraints = constraints || {}; + if (pickerInFlight) { + send('log', { level: 'warn', msg: '[gdm-shim] picker already open, rejecting concurrent call' }); + throw new DOMException( + 'A screen-share picker is already open', + 'InvalidStateError' + ); + } + pickerInFlight = true; + try { + return await runShim(constraints); + } finally { + pickerInFlight = false; + } + }; + + const runShim = async function (constraints) { constraints = constraints || {}; // User-activation gate (#812). `navigator.userActivation.isActive` // is transient — true only during the direct call stack of a real