diff --git a/app/src-tauri/Cargo.toml b/app/src-tauri/Cargo.toml index dff5effcb..8e149f9ec 100644 --- a/app/src-tauri/Cargo.toml +++ b/app/src-tauri/Cargo.toml @@ -69,6 +69,7 @@ semver = "1" log = "0.4" env_logger = "0.11" +base64 = "0.22" # Used by the imessage_scanner module. anyhow = "1.0" diff --git a/app/src-tauri/permissions/allow-core-process.toml b/app/src-tauri/permissions/allow-core-process.toml index c1348ec9a..534d53343 100644 --- a/app/src-tauri/permissions/allow-core-process.toml +++ b/app/src-tauri/permissions/allow-core-process.toml @@ -20,5 +20,8 @@ allow = [ "webview_account_show", "webview_recipe_event", "activate_main_window", + "screen_share_begin_session", + "screen_share_thumbnail", + "screen_share_finalize_session", ] deny = [] diff --git a/app/src-tauri/permissions/allow-webview-recipe.toml b/app/src-tauri/permissions/allow-webview-recipe.toml index 97cb1767e..fd1f5e754 100644 --- a/app/src-tauri/permissions/allow-webview-recipe.toml +++ b/app/src-tauri/permissions/allow-webview-recipe.toml @@ -1,7 +1,12 @@ [[permission]] identifier = "allow-webview-recipe" -description = "Allow injected per-provider recipe code (running inside the third-party site's origin) to invoke the recipe ingest command back to Rust." +description = "Allow injected per-provider recipe code (running inside the third-party site's origin) to invoke the recipe ingest command back to Rust. Also includes the session-gated screen-share commands (#713 / #812) so the in-page getDisplayMedia shim can open a short-lived enumeration session after a real user gesture. The session gate prevents drive-by window-title / thumbnail exfiltration by third-party scripts running in the same origin." [permission.commands] -allow = ["webview_recipe_event"] +allow = [ + "webview_recipe_event", + "screen_share_begin_session", + "screen_share_thumbnail", + "screen_share_finalize_session", +] deny = [] diff --git a/app/src-tauri/src/lib.rs b/app/src-tauri/src/lib.rs index c40a3a6f3..49197e726 100644 --- a/app/src-tauri/src/lib.rs +++ b/app/src-tauri/src/lib.rs @@ -12,6 +12,8 @@ mod gmail; mod imessage_scanner; mod notification_settings; #[cfg(feature = "cef")] +mod screen_capture; +#[cfg(feature = "cef")] mod slack_scanner; #[cfg(feature = "cef")] mod telegram_scanner; @@ -631,6 +633,8 @@ pub fn run() { let builder = builder.manage(discord_scanner::ScannerRegistry::new()); #[cfg(feature = "cef")] let builder = builder.manage(telegram_scanner::ScannerRegistry::new()); + #[cfg(feature = "cef")] + let builder = builder.manage(screen_capture::ScreenShareState::new()); builder .setup(move |app| { #[cfg(any(windows, target_os = "linux"))] @@ -1020,6 +1024,12 @@ pub fn run() { webview_accounts::webview_set_focused_account, notification_settings::notification_settings_get, notification_settings::notification_settings_set, + #[cfg(feature = "cef")] + screen_capture::screen_share_begin_session, + #[cfg(feature = "cef")] + screen_capture::screen_share_thumbnail, + #[cfg(feature = "cef")] + screen_capture::screen_share_finalize_session, gmail::gmail_list_labels, gmail::gmail_list_messages, gmail::gmail_search, diff --git a/app/src-tauri/src/screen_capture/mod.rs b/app/src-tauri/src/screen_capture/mod.rs new file mode 100644 index 000000000..a18ac0743 --- /dev/null +++ b/app/src-tauri/src/screen_capture/mod.rs @@ -0,0 +1,1066 @@ +//! Screen-capture source enumeration + picker orchestration for #713 / #812. +//! +//! Background (see issue #713 plan): embedded webviews (Meet, Slack Huddles, +//! Discord, Zoom) run under the CEF Alloy runtime, which does not link +//! Chromium's built-in `DesktopMediaPicker`. When the page calls +//! `navigator.mediaDevices.getDisplayMedia`, Chromium falls back to +//! auto-selecting the primary display — the user never sees a picker and +//! their whole screen streams. +//! +//! Our `OnRequestMediaAccessPermission` callback in tauri-cef grants the +//! `DESKTOP_VIDEO_CAPTURE` bit unconditionally. Stage 0 PoC proved that when +//! the page calls `getUserMedia` with a hand-crafted +//! `{ mandatory: { chromeMediaSource: 'desktop', chromeMediaSourceId: '' } }` +//! constraint, Chromium honours the ID and opens a real capture device — +//! even though this constraint shape is normally extension-only. +//! +//! # Session gating (#812 Stage A) +//! +//! The first landing of this module exposed `screen_share_list_sources` and +//! `screen_share_thumbnail` directly on the recipe-webview allowlist. That +//! let any script running inside the embedded site (page JS, compromised +//! third-party CDN) silently enumerate every open window title + live +//! thumbnail with no picker interaction and no user gesture. CodeRabbit / +//! graycyrus flagged this as a blocker on PR #809 (issue #812). +//! +//! The module now forces callers through a short-lived session: +//! * `screen_share_begin_session` — requires a live user gesture +//! (`navigator.userActivation.isActive`), an account-scoped webview +//! label (`acct_*`), and is rate-limited to 10 calls per account per +//! 60s. Returns a random 128-bit token + the enumerated sources in +//! one round-trip. +//! * `screen_share_thumbnail` — requires a token whose session is still +//! alive and whose `allowed_ids` set contains the requested ID. +//! * `screen_share_finalize_session` — removes the session. Called by +//! the shim on Share or Cancel. +//! +//! Sessions auto-expire after 30s. A new `begin_session` for the same +//! account replaces any in-flight session (prevents the stacked-overlay +//! case from graycyrus refactor note #6). +//! +//! The picker UI itself is injected directly into each child webview's +//! DOM by `webview_accounts/runtime.js` (see the `showInPagePicker` flow +//! there), which is why we only need IPCs for enumeration + thumbnail +//! capture and no picker-modal orchestration RPCs on the host side. +//! +//! macOS-first: other platforms stub out until the flow is proven end- +//! to-end. + +use std::collections::{HashMap, HashSet, VecDeque}; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::Mutex; +use std::time::{Duration, Instant}; + +use serde::{Deserialize, Serialize}; +use tauri::{Runtime, State, Webview}; + +#[derive(Clone, Debug, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ScreenSource { + /// `screen::0` or `window::0`. Chromium's + /// `DesktopMediaID::Parse` reads these directly; we rely on its existing + /// parser rather than round-tripping through the extension API. + pub id: String, + /// `"screen"` or `"window"`. + pub kind: String, + /// Human label shown in the picker (app name + window title, or display + /// name). + pub name: String, + /// Optional application name (windows only). + #[serde(skip_serializing_if = "Option::is_none")] + pub app_name: Option, + /// PNG thumbnail base64-encoded. Always empty from enumeration — the + /// shim lazy-fetches via `screen_share_thumbnail` so the picker UI opens + /// instantly. + #[serde(default)] + pub thumbnail_png_base64: String, +} + +// --------------------------------------------------------------------------- +// Parser (platform-agnostic, unit-testable) +// --------------------------------------------------------------------------- + +/// What kind of source a parsed DesktopMediaID-format string describes. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum SourceKind { + Screen, + Window, +} + +/// Parse a `screen::0` / `window::0` source ID into +/// `(kind, numeric id)`. Returns `None` if the prefix is unknown, the +/// numeric segment doesn't fit in a `u32`, or the shape otherwise doesn't +/// match what the enumerator emits. Pure logic so it can be unit-tested +/// without touching platform APIs; macOS callers use it before dispatching +/// to the capture backend. +pub(crate) fn parse_source_id(id: &str) -> Option<(SourceKind, u32)> { + let mut parts = id.splitn(3, ':'); + let kind = match parts.next()? { + "screen" => SourceKind::Screen, + "window" => SourceKind::Window, + _ => return None, + }; + let num = parts.next()?.parse::().ok()?; + Some((kind, num)) +} + +// --------------------------------------------------------------------------- +// Session state (#812 Stage A) +// --------------------------------------------------------------------------- + +/// Short TTL prevents stale tokens from being replayable. 30s is long enough +/// for the slowest picker flow (enumerate → thumbs load → user chooses) +/// observed in manual testing, short enough that a leaked token via console +/// can't be reused later in the day. +const SESSION_TTL: Duration = Duration::from_secs(30); +/// Token bucket parameters. 10 attempts per 60s per account means a human +/// mashing the Present-Now button can't get throttled; an automated +/// enumeration loop hits the wall quickly. +const RATE_LIMIT_MAX: usize = 10; +const RATE_LIMIT_WINDOW: Duration = Duration::from_secs(60); +/// 128-bit token. Seeded from OS time + atomic counter + thread id — +/// deliberately no new dependency. Entropy is overkill for a 30s session: +/// the attacker would need to guess the token AND the account-id AND the +/// allowed-id set inside the TTL window. +const TOKEN_BYTES: usize = 16; + +static TOKEN_COUNTER: AtomicU64 = AtomicU64::new(1); + +fn generate_token() -> String { + use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _}; + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or(0); + let counter = TOKEN_COUNTER.fetch_add(1, Ordering::Relaxed); + let tid = thread_id_hash(); + let mut buf = [0u8; TOKEN_BYTES]; + // Interleave the three sources across the 16 bytes so no single + // predictable input (wall clock, counter) dominates the prefix. + buf[0..8].copy_from_slice(&(now as u64).to_le_bytes()); + buf[8..16].copy_from_slice(&counter.to_le_bytes()); + for (i, b) in buf.iter_mut().enumerate() { + *b ^= tid.rotate_left((i as u32) * 3); + } + URL_SAFE_NO_PAD.encode(buf) +} + +fn thread_id_hash() -> u8 { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + let mut h = DefaultHasher::new(); + std::thread::current().id().hash(&mut h); + h.finish() as u8 +} + +#[derive(Debug)] +struct Session { + account_id: String, + allowed_ids: HashSet, + expires_at: Instant, +} + +#[derive(Default)] +pub struct ScreenShareState { + /// token → Session + sessions: Mutex>, + /// account_id → rolling window of begin-session timestamps for rate limit + rate: Mutex>>, + /// account_id → current active token (so we can evict on replace) + active: Mutex>, +} + +impl ScreenShareState { + pub fn new() -> Self { + Self::default() + } +} + +fn purge_expired(sessions: &mut HashMap, active: &mut HashMap) { + let now = Instant::now(); + let expired_tokens: Vec = sessions + .iter() + .filter_map(|(t, s)| { + if s.expires_at <= now { + Some(t.clone()) + } else { + None + } + }) + .collect(); + for t in expired_tokens { + if let Some(sess) = sessions.remove(&t) { + if active.get(&sess.account_id).map(|x| x.as_str()) == Some(t.as_str()) { + active.remove(&sess.account_id); + } + } + } +} + +fn check_and_record_rate(rate: &mut HashMap>, account_id: &str) -> bool { + let now = Instant::now(); + let window = rate.entry(account_id.to_string()).or_default(); + while let Some(&front) = window.front() { + if now.duration_since(front) > RATE_LIMIT_WINDOW { + window.pop_front(); + } else { + break; + } + } + if window.len() >= RATE_LIMIT_MAX { + return false; + } + window.push_back(now); + true +} + +// --------------------------------------------------------------------------- +// Commands +// --------------------------------------------------------------------------- + +#[derive(Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct BeginSessionArgs { + pub account_id: String, + pub origin: String, + /// Frontend-reported `navigator.userActivation.isActive`. True only while + /// the call stack originates from a real user gesture (click, key, touch) + /// within the page's activation grace period. False for timers, async + /// continuations, or drive-by enumeration attempts. + pub has_user_activation: bool, +} + +#[derive(Serialize)] +#[serde(rename_all = "camelCase")] +pub struct BeginSessionResult { + pub token: String, + pub sources: Vec, +} + +/// Open a short-lived session that gates subsequent `screen_share_thumbnail` +/// calls. The shim must call this before showing the picker UI; any page JS +/// attempting the same call outside a user gesture is rejected. +#[tauri::command] +pub fn screen_share_begin_session( + webview: Webview, + state: State<'_, ScreenShareState>, + args: BeginSessionArgs, +) -> Result { + let caller_label = webview.label().to_string(); + log::debug!( + "[screen-share] begin_session caller_label={} account_id={} origin={} activation={}", + caller_label, + args.account_id, + args.origin, + args.has_user_activation + ); + + // Gate 1: caller must be an account webview. `acct_*` is the label shape + // produced by `webview_accounts::label_for()`. Main/overlay windows and + // any other Tauri webview fail here. + if !caller_label.starts_with("acct_") { + log::warn!( + "[screen-share] begin_session rejected: caller_label={} is not an account webview", + caller_label + ); + return Err("unauthorized caller".to_string()); + } + + // Gate 2: must be inside a user gesture. Frontend reads + // `navigator.userActivation.isActive` which is true only during the + // direct call stack of a click / key / touch handler. + if !args.has_user_activation { + log::warn!( + "[screen-share] begin_session rejected: no user activation for account_id={}", + args.account_id + ); + return Err("user activation required".to_string()); + } + + // Housekeeping before checking rate / active state. + { + let mut sessions = state + .sessions + .lock() + .expect("screen_share.sessions poisoned"); + let mut active = state.active.lock().expect("screen_share.active poisoned"); + purge_expired(&mut sessions, &mut active); + } + + // Gate 3: rate limit per account. + { + let mut rate = state.rate.lock().expect("screen_share.rate poisoned"); + if !check_and_record_rate(&mut rate, &args.account_id) { + log::warn!( + "[screen-share] begin_session rate-limited account_id={} (>{} within {:?})", + args.account_id, + RATE_LIMIT_MAX, + RATE_LIMIT_WINDOW + ); + return Err("rate-limited".to_string()); + } + } + + // Enumerate sources and build the session. + let sources = enumerate_sources()?; + let allowed_ids: HashSet = sources.iter().map(|s| s.id.clone()).collect(); + let token = generate_token(); + let token_display = token_prefix(&token); + + { + let mut sessions = state + .sessions + .lock() + .expect("screen_share.sessions poisoned"); + let mut active = state.active.lock().expect("screen_share.active poisoned"); + + // Replace any in-flight session for this account — prevents stacked + // pickers if getDisplayMedia is called twice before the first + // resolves (graycyrus refactor #6). + if let Some(prev) = active.remove(&args.account_id) { + sessions.remove(&prev); + log::debug!( + "[screen-share] begin_session replacing prev session token={}…", + token_prefix(&prev) + ); + } + + sessions.insert( + token.clone(), + Session { + account_id: args.account_id.clone(), + allowed_ids, + expires_at: Instant::now() + SESSION_TTL, + }, + ); + active.insert(args.account_id.clone(), token.clone()); + } + + log::info!( + "[screen-share] begin_session opened token={}… account_id={} sources={}", + token_display, + args.account_id, + sources.len() + ); + + Ok(BeginSessionResult { token, sources }) +} + +#[derive(Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ThumbnailArgs { + pub token: String, + pub id: String, +} + +/// Capture one source's thumbnail as base64 PNG. Gated behind the session +/// token: only IDs the session was issued for (i.e. shown in the picker) +/// can be thumbnailed, so a valid token can't be abused to snapshot +/// arbitrary windows. +#[tauri::command] +pub fn screen_share_thumbnail( + webview: Webview, + state: State<'_, ScreenShareState>, + args: ThumbnailArgs, +) -> Result { + let caller_label = webview.label().to_string(); + log::debug!( + "[screen-share] thumbnail caller_label={} id={} token={}…", + caller_label, + args.id, + token_prefix(&args.token) + ); + + if !caller_label.starts_with("acct_") { + log::warn!( + "[screen-share] thumbnail rejected: caller_label={} is not an account webview", + caller_label + ); + return Err("unauthorized caller".to_string()); + } + + // Validate the session is alive and knows about this ID. + { + let mut sessions = state + .sessions + .lock() + .expect("screen_share.sessions poisoned"); + let mut active = state.active.lock().expect("screen_share.active poisoned"); + purge_expired(&mut sessions, &mut active); + + let session = sessions.get(&args.token).ok_or_else(|| { + log::warn!( + "[screen-share] thumbnail rejected: unknown/expired token={}…", + token_prefix(&args.token) + ); + "invalid or expired token".to_string() + })?; + if !session.allowed_ids.contains(&args.id) { + log::warn!( + "[screen-share] thumbnail rejected: id={} not in session's allowed set (token={}…)", + args.id, + token_prefix(&args.token) + ); + return Err("id not in session".to_string()); + } + } + + #[cfg(target_os = "macos")] + { + macos::thumbnail_for_id(&args.id).ok_or_else(|| "thumbnail unavailable".to_string()) + } + #[cfg(not(target_os = "macos"))] + { + let _ = args; + Err("thumbnails not implemented for this platform yet".to_string()) + } +} + +#[derive(Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct FinalizeSessionArgs { + pub token: String, + #[serde(default)] + pub picked_id: Option, +} + +/// Called by the shim on Share or Cancel. Removes the session. Safe to call +/// with an unknown/expired token — the call is a no-op then. Not gated on +/// caller label because the only effect is cleanup of a token the caller +/// already possesses. +#[tauri::command] +pub fn screen_share_finalize_session( + state: State<'_, ScreenShareState>, + args: FinalizeSessionArgs, +) -> Result<(), String> { + let token_display = token_prefix(&args.token); + let mut sessions = state + .sessions + .lock() + .expect("screen_share.sessions poisoned"); + let mut active = state.active.lock().expect("screen_share.active poisoned"); + purge_expired(&mut sessions, &mut active); + + if let Some(session) = sessions.remove(&args.token) { + if active.get(&session.account_id).map(|x| x.as_str()) == Some(args.token.as_str()) { + active.remove(&session.account_id); + } + log::info!( + "[screen-share] finalize_session token={}… account_id={} picked={}", + token_display, + session.account_id, + args.picked_id.as_deref().unwrap_or("") + ); + } else { + log::debug!( + "[screen-share] finalize_session ignored (unknown token={}…)", + token_display + ); + } + Ok(()) +} + +fn token_prefix(token: &str) -> String { + token.chars().take(8).collect() +} + +fn enumerate_sources() -> Result, String> { + #[cfg(target_os = "macos")] + { + macos::enumerate().map_err(|e| format!("enumerate failed: {e}")) + } + #[cfg(not(target_os = "macos"))] + { + Err("screen-share picker not implemented for this platform yet".to_string()) + } +} + +// --------------------------------------------------------------------------- +// macOS backend +// --------------------------------------------------------------------------- + +#[cfg(target_os = "macos")] +mod macos { + use super::ScreenSource; + + use core::ffi::c_void; + use std::ffi::CStr; + + // Minimal CoreGraphics FFI so we don't need an extra `core-graphics` + // crate — these few symbols cover display + window enumeration and + // avoid pulling in ~50 extra transitive deps. + + #[link(name = "CoreGraphics", kind = "framework")] + extern "C" { + fn CGGetActiveDisplayList( + max_displays: u32, + active_displays: *mut u32, + display_count: *mut u32, + ) -> i32; + fn CGMainDisplayID() -> u32; + fn CGDisplayPixelsWide(display: u32) -> usize; + fn CGDisplayPixelsHigh(display: u32) -> usize; + fn CGWindowListCopyWindowInfo(option: u32, relative_to_window: u32) -> *const c_void; // CFArrayRef + fn CGDisplayCreateImage(display: u32) -> *const c_void; // CGImageRef + fn CGWindowListCreateImage( + screen_bounds: CGRect, + list_option: u32, + window_id: u32, + image_option: u32, + ) -> *const c_void; + fn CGImageRelease(image: *const c_void); + fn CGImageGetWidth(image: *const c_void) -> usize; + fn CGImageGetHeight(image: *const c_void) -> usize; + } + + #[link(name = "ImageIO", kind = "framework")] + extern "C" { + fn CGImageDestinationCreateWithData( + data: *const c_void, // CFMutableDataRef + uti: *const c_void, // CFStringRef + count: usize, + options: *const c_void, + ) -> *const c_void; + fn CGImageDestinationAddImage( + dest: *const c_void, + image: *const c_void, + properties: *const c_void, + ); + fn CGImageDestinationFinalize(dest: *const c_void) -> bool; + } + + #[link(name = "CoreFoundation", kind = "framework")] + extern "C" { + fn CFRelease(cf: *const c_void); + fn CFArrayGetCount(array: *const c_void) -> isize; + fn CFArrayGetValueAtIndex(array: *const c_void, idx: isize) -> *const c_void; + fn CFDictionaryGetValue(dict: *const c_void, key: *const c_void) -> *const c_void; + fn CFStringGetCStringPtr(s: *const c_void, encoding: u32) -> *const i8; + fn CFStringGetCString( + s: *const c_void, + buffer: *mut i8, + buffer_size: isize, + encoding: u32, + ) -> bool; + fn CFStringGetLength(s: *const c_void) -> isize; + fn CFNumberGetValue(number: *const c_void, the_type: i32, value_ptr: *mut c_void) -> bool; + fn CFStringCreateWithCString( + alloc: *const c_void, + c_str: *const i8, + encoding: u32, + ) -> *const c_void; + fn CFDataCreateMutable(alloc: *const c_void, capacity: isize) -> *const c_void; + fn CFDataGetLength(data: *const c_void) -> isize; + fn CFDataGetBytePtr(data: *const c_void) -> *const u8; + } + + #[repr(C)] + #[derive(Copy, Clone)] + struct CGPoint { + x: f64, + y: f64, + } + #[repr(C)] + #[derive(Copy, Clone)] + struct CGSize { + width: f64, + height: f64, + } + #[repr(C)] + #[derive(Copy, Clone)] + struct CGRect { + origin: CGPoint, + size: CGSize, + } + + const CG_RECT_NULL: CGRect = CGRect { + origin: CGPoint { + x: f64::INFINITY, + y: f64::INFINITY, + }, + size: CGSize { + width: 0.0, + height: 0.0, + }, + }; + // kCGWindowListOptionIncludingWindow (= 8). + const K_CG_WINDOW_LIST_OPTION_INCLUDING_WINDOW: u32 = 1 << 3; + // kCGWindowImageBoundsIgnoreFraming (= 1) | kCGWindowImageNominalResolution (= 16). + const K_CG_WINDOW_IMAGE_BOUNDS_IGNORE_FRAMING: u32 = 1 << 0; + const K_CG_WINDOW_IMAGE_NOMINAL_RESOLUTION: u32 = 1 << 4; + + const K_CFSTRING_ENCODING_UTF8: u32 = 0x08000100; + const K_CFNUMBER_SINT64_TYPE: i32 = 4; + // kCGWindowListOptionOnScreenOnly (= 1) | kCGWindowListExcludeDesktopElements (= 16). + const K_CG_WINDOW_LIST_ON_SCREEN_ONLY: u32 = 1 << 0; + const K_CG_WINDOW_LIST_EXCLUDE_DESKTOP_ELEMENTS: u32 = 1 << 4; + + /// Below this pixel count on either axis we treat a captured window + /// image as TCC-denied rather than real content. macOS 15 Sequoia + /// returns a valid 1×1 transparent CGImage when Screen Recording is + /// not granted (instead of the pre-Sequoia null return), and the old + /// empty-check alone let that through (see PR #809 review). + const MIN_USABLE_DIMENSION: usize = 4; + + /// Allocate a CoreFoundation string. Returns `None` if the input + /// contains an interior NUL byte (CString rejects those). Callers + /// check the return rather than `expect()`ing, because unwinding + /// through a C frame is undefined behavior. + fn cfstr(s: &str) -> Option<*const c_void> { + let c = std::ffi::CString::new(s).ok()?; + let ptr = unsafe { + CFStringCreateWithCString(std::ptr::null(), c.as_ptr(), K_CFSTRING_ENCODING_UTF8) + }; + if ptr.is_null() { + None + } else { + Some(ptr) + } + } + + fn cfstring_to_string(cf: *const c_void) -> Option { + if cf.is_null() { + return None; + } + unsafe { + let ptr = CFStringGetCStringPtr(cf, K_CFSTRING_ENCODING_UTF8); + if !ptr.is_null() { + return CStr::from_ptr(ptr).to_str().ok().map(|s| s.to_string()); + } + let len = CFStringGetLength(cf); + // UTF-8 safety margin: 4 bytes per codepoint + NUL. + let cap = (len as usize) * 4 + 1; + let mut buf = vec![0i8; cap]; + if CFStringGetCString(cf, buf.as_mut_ptr(), cap as isize, K_CFSTRING_ENCODING_UTF8) { + let c = CStr::from_ptr(buf.as_ptr()); + c.to_str().ok().map(|s| s.to_string()) + } else { + None + } + } + } + + fn cfnumber_to_u64(num: *const c_void) -> Option { + if num.is_null() { + return None; + } + let mut v: i64 = 0; + unsafe { + if CFNumberGetValue(num, K_CFNUMBER_SINT64_TYPE, &mut v as *mut _ as *mut c_void) { + Some(v as u64) + } else { + None + } + } + } + + pub(super) fn thumbnail_for_id(id: &str) -> Option { + let (kind, num) = super::parse_source_id(id)?; + let b64 = match kind { + super::SourceKind::Screen => screen_thumbnail_b64(num), + super::SourceKind::Window => window_thumbnail_b64(num), + }; + if b64.is_empty() { + None + } else { + Some(b64) + } + } + + pub(super) fn enumerate() -> Result, String> { + let mut out = Vec::new(); + out.extend(enumerate_screens()); + out.extend(enumerate_windows()); + Ok(out) + } + + fn cgimage_to_png_bytes(image: *const c_void) -> Option> { + if image.is_null() { + return None; + } + let uti_key = cfstr("public.png")?; + unsafe { + let data = CFDataCreateMutable(std::ptr::null(), 0); + if data.is_null() { + CFRelease(uti_key); + return None; + } + let dest = CGImageDestinationCreateWithData(data, uti_key, 1, std::ptr::null()); + if dest.is_null() { + CFRelease(uti_key); + CFRelease(data); + return None; + } + CGImageDestinationAddImage(dest, image, std::ptr::null()); + let ok = CGImageDestinationFinalize(dest); + CFRelease(dest); + CFRelease(uti_key); + if !ok { + CFRelease(data); + return None; + } + let len = CFDataGetLength(data) as usize; + let ptr = CFDataGetBytePtr(data); + let bytes = std::slice::from_raw_parts(ptr, len).to_vec(); + CFRelease(data); + Some(bytes) + } + } + + fn screen_thumbnail_b64(display_id: u32) -> String { + use base64::{engine::general_purpose::STANDARD, Engine as _}; + unsafe { + let image = CGDisplayCreateImage(display_id); + if image.is_null() { + return String::new(); + } + let w = CGImageGetWidth(image); + let h = CGImageGetHeight(image); + if w < MIN_USABLE_DIMENSION || h < MIN_USABLE_DIMENSION { + log::warn!( + "[screen-share] screen_thumbnail display_id={} returned {}×{} (likely TCC not granted)", + display_id, + w, + h + ); + CGImageRelease(image); + return String::new(); + } + let png = cgimage_to_png_bytes(image); + CGImageRelease(image); + png.map(|b| STANDARD.encode(b)).unwrap_or_default() + } + } + + fn window_thumbnail_b64(window_id: u32) -> String { + use base64::{engine::general_purpose::STANDARD, Engine as _}; + unsafe { + let opts = + K_CG_WINDOW_IMAGE_BOUNDS_IGNORE_FRAMING | K_CG_WINDOW_IMAGE_NOMINAL_RESOLUTION; + let image = CGWindowListCreateImage( + CG_RECT_NULL, + K_CG_WINDOW_LIST_OPTION_INCLUDING_WINDOW, + window_id, + opts, + ); + if image.is_null() { + return String::new(); + } + let w = CGImageGetWidth(image); + let h = CGImageGetHeight(image); + if w < MIN_USABLE_DIMENSION || h < MIN_USABLE_DIMENSION { + log::warn!( + "[screen-share] window_thumbnail window_id={} returned {}×{} (likely TCC not granted or Sequoia privacy policy)", + window_id, + w, + h + ); + CGImageRelease(image); + return String::new(); + } + let png = cgimage_to_png_bytes(image); + CGImageRelease(image); + png.map(|b| STANDARD.encode(b)).unwrap_or_default() + } + } + + fn enumerate_screens() -> Vec { + let mut ids = [0u32; 32]; + let mut count: u32 = 0; + let err = unsafe { CGGetActiveDisplayList(ids.len() as u32, ids.as_mut_ptr(), &mut count) }; + if err != 0 { + log::warn!("[screen-share] CGGetActiveDisplayList error={err}"); + return Vec::new(); + } + let main = unsafe { CGMainDisplayID() }; + ids.iter() + .take(count as usize) + .enumerate() + .map(|(idx, &display_id)| { + let w = unsafe { CGDisplayPixelsWide(display_id) }; + let h = unsafe { CGDisplayPixelsHigh(display_id) }; + let is_main = display_id == main; + let name = if is_main { + format!("Main Screen ({}×{})", w, h) + } else { + format!("Display {} ({}×{})", idx + 1, w, h) + }; + ScreenSource { + id: format!("screen:{}:0", display_id), + kind: "screen".to_string(), + name, + app_name: None, + thumbnail_png_base64: String::new(), + } + }) + .collect() + } + + fn enumerate_windows() -> Vec { + let opts = K_CG_WINDOW_LIST_ON_SCREEN_ONLY | K_CG_WINDOW_LIST_EXCLUDE_DESKTOP_ELEMENTS; + let array = unsafe { CGWindowListCopyWindowInfo(opts, 0) }; + if array.is_null() { + log::warn!("[screen-share] CGWindowListCopyWindowInfo returned null"); + return Vec::new(); + } + + // cfstr can fail (interior NUL — never happens for these literals + // but stay defensive); bail cleanly if so. + let Some(key_window_number) = cfstr("kCGWindowNumber") else { + unsafe { CFRelease(array) }; + return Vec::new(); + }; + let Some(key_window_name) = cfstr("kCGWindowName") else { + unsafe { + CFRelease(key_window_number); + CFRelease(array) + }; + return Vec::new(); + }; + let Some(key_owner_name) = cfstr("kCGWindowOwnerName") else { + unsafe { + CFRelease(key_window_number); + CFRelease(key_window_name); + CFRelease(array); + } + return Vec::new(); + }; + let Some(key_layer) = cfstr("kCGWindowLayer") else { + unsafe { + CFRelease(key_window_number); + CFRelease(key_window_name); + CFRelease(key_owner_name); + CFRelease(array); + } + return Vec::new(); + }; + + let count = unsafe { CFArrayGetCount(array) }; + let mut out: Vec = Vec::new(); + for i in 0..count { + let dict = unsafe { CFArrayGetValueAtIndex(array, i) }; + if dict.is_null() { + continue; + } + let number_cf = unsafe { CFDictionaryGetValue(dict, key_window_number) }; + let layer_cf = unsafe { CFDictionaryGetValue(dict, key_layer) }; + let window_id_u64 = match cfnumber_to_u64(number_cf) { + Some(v) => v, + None => continue, + }; + // `CGWindowID` is `uint32_t` upstream, but `cfnumber_to_u64` + // returns 64-bit (we read the CFNumber as SInt64 for sign + // safety). Values should never exceed `u32::MAX` in practice, + // but a silent cast would round-trip through `format!` and + // then fail parse_source_id — the user would see a source in + // the picker with a permanent grey placeholder. Skip loudly. + let window_id = match u32::try_from(window_id_u64) { + Ok(v) => v, + Err(_) => { + log::warn!( + "[screen-share] window_id {} overflows u32, skipping", + window_id_u64 + ); + continue; + } + }; + // Skip menu bar / dock / system chrome (layer != 0 → non-normal + // window). Normal app windows live at layer 0. + let layer = cfnumber_to_u64(layer_cf).unwrap_or(0); + if layer != 0 { + continue; + } + let title = unsafe { CFDictionaryGetValue(dict, key_window_name) }; + let owner = unsafe { CFDictionaryGetValue(dict, key_owner_name) }; + let title_str = cfstring_to_string(title).unwrap_or_default(); + let owner_str = cfstring_to_string(owner).unwrap_or_default(); + // Windows with no title are usually uninteresting (background + // helpers). Skip unless owner is informative and the window is + // the owner's only one — for MVP, simpler to just drop them. + if title_str.is_empty() { + continue; + } + let name = if owner_str.is_empty() { + title_str.clone() + } else { + format!("{} — {}", owner_str, title_str) + }; + out.push(ScreenSource { + id: format!("window:{}:0", window_id), + kind: "window".to_string(), + name, + app_name: if owner_str.is_empty() { + None + } else { + Some(owner_str) + }, + thumbnail_png_base64: String::new(), + }); + } + unsafe { + CFRelease(key_window_number); + CFRelease(key_window_name); + CFRelease(key_owner_name); + CFRelease(key_layer); + CFRelease(array); + } + out + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // ---- parse_source_id tests (platform-agnostic) ---- + + #[test] + fn parses_screen_id() { + assert_eq!(parse_source_id("screen:1:0"), Some((SourceKind::Screen, 1))); + assert_eq!( + parse_source_id("screen:69734208:0"), + Some((SourceKind::Screen, 69734208)) + ); + } + + #[test] + fn parses_window_id() { + assert_eq!( + parse_source_id("window:42:0"), + Some((SourceKind::Window, 42)) + ); + } + + #[test] + fn trailing_segment_ignored() { + assert_eq!( + parse_source_id("screen:1:extra:stuff"), + Some((SourceKind::Screen, 1)) + ); + } + + #[test] + fn rejects_unknown_prefix() { + assert_eq!(parse_source_id("tab:1:0"), None); + assert_eq!(parse_source_id("browser:1:0"), None); + assert_eq!(parse_source_id(""), None); + } + + #[test] + fn rejects_missing_numeric() { + assert_eq!(parse_source_id("screen::0"), None); + assert_eq!(parse_source_id("screen:"), None); + assert_eq!(parse_source_id("screen"), None); + } + + #[test] + fn rejects_non_numeric_id() { + assert_eq!(parse_source_id("screen:abc:0"), None); + assert_eq!(parse_source_id("window:0x1:0"), None); + } + + #[test] + fn rejects_overflowing_id() { + assert_eq!(parse_source_id("screen:4294967296:0"), None); + assert_eq!(parse_source_id("screen:-1:0"), None); + } + + #[test] + fn list_source_roundtrip() { + assert!(parse_source_id("screen:1:0").is_some()); + assert!(parse_source_id("window:12345:0").is_some()); + } + + // ---- Session / rate-limit tests (pure logic, no platform APIs) ---- + + fn insert_test_session( + state: &ScreenShareState, + token: &str, + account_id: &str, + ttl: Duration, + ids: &[&str], + ) { + let mut sessions = state.sessions.lock().unwrap(); + let mut active = state.active.lock().unwrap(); + sessions.insert( + token.to_string(), + Session { + account_id: account_id.to_string(), + allowed_ids: ids.iter().map(|s| s.to_string()).collect(), + expires_at: Instant::now() + ttl, + }, + ); + active.insert(account_id.to_string(), token.to_string()); + } + + #[test] + fn purge_expired_removes_stale_sessions() { + let state = ScreenShareState::new(); + insert_test_session( + &state, + "tok-expired", + "acct1", + Duration::from_millis(0), + &[], + ); + // Sleep a blink so `expires_at <= now` is definitely true. + std::thread::sleep(Duration::from_millis(5)); + insert_test_session(&state, "tok-live", "acct2", Duration::from_secs(10), &[]); + + { + let mut s = state.sessions.lock().unwrap(); + let mut a = state.active.lock().unwrap(); + purge_expired(&mut s, &mut a); + } + + let sessions = state.sessions.lock().unwrap(); + assert!(!sessions.contains_key("tok-expired")); + assert!(sessions.contains_key("tok-live")); + let active = state.active.lock().unwrap(); + assert!(!active.contains_key("acct1")); + assert_eq!(active.get("acct2").map(|s| s.as_str()), Some("tok-live")); + } + + #[test] + fn rate_limit_blocks_11th_call_in_window() { + let mut rate: HashMap> = HashMap::new(); + for _ in 0..RATE_LIMIT_MAX { + assert!(check_and_record_rate(&mut rate, "acct-x")); + } + // 11th call must fail. + assert!(!check_and_record_rate(&mut rate, "acct-x")); + } + + #[test] + fn rate_limit_scoped_per_account() { + let mut rate: HashMap> = HashMap::new(); + for _ in 0..RATE_LIMIT_MAX { + check_and_record_rate(&mut rate, "acct-a"); + } + // Different account still has full budget. + assert!(check_and_record_rate(&mut rate, "acct-b")); + } + + #[test] + fn generate_token_is_url_safe_and_unique() { + let a = generate_token(); + let b = generate_token(); + assert_ne!(a, b); + // URL-safe base64, no-pad, 16 bytes → 22 chars. + assert_eq!(a.len(), 22); + assert!(a + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')); + } + + #[test] + fn token_prefix_truncates() { + assert_eq!(token_prefix("0123456789abcdef"), "01234567"); + assert_eq!(token_prefix("ab"), "ab"); + } + + // NOTE: full command-level tests (screen_share_begin_session etc.) + // would need a `tauri::Webview` mock, which the stable Tauri API + // doesn't expose. Gate + rate-limit logic is covered above; the + // command glue around it is thin enough to verify via live run. +} diff --git a/app/src-tauri/src/webview_accounts/runtime.js b/app/src-tauri/src/webview_accounts/runtime.js index 754339f8a..875c6e5b4 100644 --- a/app/src-tauri/src/webview_accounts/runtime.js +++ b/app/src-tauri/src/webview_accounts/runtime.js @@ -95,4 +95,677 @@ window.__openhumanRecipe = api; send('log', { level: 'info', msg: '[recipe-runtime] ready provider=' + ctx.provider + ' accountId=' + ctx.accountId }); + + // --- #713 getDisplayMedia shim --- + // + // Background: embedded webviews run under CEF Alloy, which does not link + // Chromium's DesktopMediaPicker. Without an interceptor, `getDisplayMedia` + // gets auto-granted by our permission handler and Chromium silently picks + // the primary display (issue #713 AC2: "OS screen/window picker appears"). + // + // The picker UI is injected DIRECTLY into the child webview's own DOM + // rather than rendered as a React modal in the main OpenHuman window. + // Two reasons: + // (a) Works uniformly for every embedded provider — Meet, Slack + // Huddles, Discord, Zoom — without per-provider host-side glue. + // (b) Dodges the CEF native-view stacking problem: a React modal in + // the main window is always occluded by the child webview's + // NSView, forcing a hide/bounds dance that flickers the embedded + // site. An overlay inside the page is stacked in the page's own + // compositing context, so it sits above Meet/Slack UI naturally. + // + // Flow: + // 1. Shim calls Tauri `screen_share_list_sources` to enumerate real + // screens (`screen::0`) and windows + // (`window::0`) natively. + // 2. Shim builds a fixed-position picker overlay inside the page's + // document and awaits the user's choice. + // 3. On Share, shim calls `getUserMedia` with a hand-crafted + // `chromeMediaSource: 'desktop' + chromeMediaSourceId` constraint. + // Stage 0 PoC proved Chromium honours the ID directly because our + // CEF permission callback grants `DESKTOP_VIDEO_CAPTURE` bits. + // 4. On Cancel, shim throws `NotAllowedError` — same shape the real + // Chromium picker emits so page error handling is unchanged. + (function installGetDisplayMediaShim() { + if (!navigator.mediaDevices || typeof navigator.mediaDevices.getDisplayMedia !== 'function') { + // Never had getDisplayMedia to begin with (non-WebRTC webview); skip. + return; + } + if (navigator.mediaDevices.__ohGdmShimInstalled) return; + + // `navigator.mediaDevices.getDisplayMedia` is a WebIDL-defined prototype + // method on `MediaDevices.prototype`. Chromium marks it + // `writable: true, configurable: true` but *only* on the prototype — + // plain `navigator.mediaDevices.getDisplayMedia = ...` on the instance + // creates an own-property shadow that Chromium's IDL bindings bypass + // when the page actually invokes the method. We override on the + // prototype with `defineProperty` so the shim is what runs for every + // MediaDevices instance in this page (including any iframes that + // inherit from the same prototype). + const proto = Object.getPrototypeOf(navigator.mediaDevices); + const descriptor = Object.getOwnPropertyDescriptor(proto, 'getDisplayMedia'); + const origGetDisplayMedia = (descriptor && descriptor.value + ? descriptor.value + : navigator.mediaDevices.getDisplayMedia + ).bind(navigator.mediaDevices); + + // Fire-and-forget session cleanup. Swallows errors because finalize + // is a no-op on the host side for unknown/expired tokens and we don't + // want a late IPC failure to leak into the getDisplayMedia rejection. + function finalizeSessionQuiet(token, pickedId) { + if (!token) return Promise.resolve(); + return rawInvoke('screen_share_finalize_session', { + args: { token: token, pickedId: pickedId || null }, + }).catch(function () {}); + } + + // In-flight guard (graycyrus refactor #6). The host-side state already + // evicts a stale session when begin_session fires twice, but without a + // shim-side guard a second call would still append a second picker DOM + // while the first is open — the user would see two stacked overlays. + // Reject a concurrent call the same way the MediaStreams spec does + // when an existing capture request is in progress. + let pickerInFlight = false; + + const shim = async function (constraints) { + constraints = constraints || {}; + if (pickerInFlight) { + send('log', { level: 'warn', msg: '[gdm-shim] picker already open, rejecting concurrent call' }); + throw new DOMException( + 'A screen-share picker is already open', + 'InvalidStateError' + ); + } + pickerInFlight = true; + try { + return await runShim(constraints); + } finally { + pickerInFlight = false; + } + }; + + const runShim = async function (constraints) { + constraints = constraints || {}; + // User-activation gate (#812). `navigator.userActivation.isActive` + // is transient — true only during the direct call stack of a real + // gesture handler (click, key, touch). Third-party JS calling + // getDisplayMedia from a timer or async continuation gets filtered + // here, so our downstream commands (begin_session etc.) never open + // a session without a gesture. Fall through to the original + // implementation rather than throw so pages with legitimate + // non-gesture flows (rare but possible) aren't hard-blocked. + const hasActivation = !!( + typeof navigator !== 'undefined' && + navigator.userActivation && + navigator.userActivation.isActive + ); + send('log', { + level: 'info', + msg: + '[gdm-shim] getDisplayMedia intercepted audio=' + + !!constraints.audio + + ' activation=' + + hasActivation, + }); + if (!hasActivation) { + send('log', { + level: 'warn', + msg: '[gdm-shim] no user activation, falling through to native getDisplayMedia', + }); + return origGetDisplayMedia(constraints); + } + + let session; + try { + session = await rawInvoke('screen_share_begin_session', { + args: { + accountId: ctx.accountId, + origin: (typeof location !== 'undefined' && location.origin) || 'unknown', + hasUserActivation: hasActivation, + }, + }); + } catch (e) { + send('log', { + level: 'error', + msg: '[gdm-shim] begin_session IPC failed: ' + (e && e.message ? e.message : String(e)), + }); + return origGetDisplayMedia(constraints); + } + if (!session || typeof session.token !== 'string' || !Array.isArray(session.sources)) { + send('log', { + level: 'warn', + msg: '[gdm-shim] begin_session returned malformed payload, falling back', + }); + return origGetDisplayMedia(constraints); + } + const sessionToken = session.token; + const sources = session.sources; + if (sources.length === 0) { + send('log', { level: 'warn', msg: '[gdm-shim] no sources enumerated, falling back' }); + await finalizeSessionQuiet(sessionToken, null); + return origGetDisplayMedia(constraints); + } + + const pick = await showInPagePicker(sources, sessionToken); + if (!pick) { + send('log', { level: 'info', msg: '[gdm-shim] user cancelled picker' }); + await finalizeSessionQuiet(sessionToken, null); + // Meet (and other video-conf sites) treat `NotAllowedError` on + // getDisplayMedia as "the browser blocked us" and pop a + // "needs permission" modal. Real Chrome ALSO throws + // NotAllowedError on picker cancel, but Meet silently swallows + // it there — presumably via a separate Permissions API check + // that reports 'granted'. Since we can't easily signal that + // state in CEF, throw `AbortError` instead: it's the MDN-blessed + // "user interrupted a UI operation" error and most sites (Meet + // included) dismiss it silently. + throw new DOMException('User cancelled screen share picker', 'AbortError'); + } + // Finalize the session BEFORE getUserMedia: the Chromium capture + // path doesn't need the token, and leaving the session open past + // this point would just hold the `active` slot for the account + // until the 30s TTL fires. + await finalizeSessionQuiet(sessionToken, pick.id); + send('log', { + level: 'info', + msg: '[gdm-shim] picked id=' + pick.id + ' kind=' + pick.kind, + }); + const videoMandatory = { + chromeMediaSource: 'desktop', + chromeMediaSourceId: pick.id, + maxFrameRate: 30, + }; + // System-audio capture via `chromeMediaSource: 'desktop'` needs a + // loopback driver on macOS (no stock API). If the page requested + // audio we try with audio first and fall back to video-only on + // rejection so Meet/Slack/etc don't see a generic "Can't share" + // error on every attempt. Chromium cleanly handles a missing audio + // track in the SDP. + const videoOnly = { video: { mandatory: videoMandatory }, audio: false }; + + let stream; + if (constraints.audio) { + const audioMandatory = { + chromeMediaSource: 'desktop', + chromeMediaSourceId: pick.id, + }; + try { + stream = await navigator.mediaDevices.getUserMedia({ + video: { mandatory: videoMandatory }, + audio: { mandatory: audioMandatory }, + }); + } catch (e) { + send('log', { + level: 'warn', + msg: + '[gdm-shim] audio+video getUserMedia rejected (' + + (e && e.name ? e.name : '?') + + '), retrying video-only', + }); + stream = await navigator.mediaDevices.getUserMedia(videoOnly); + } + } else { + stream = await navigator.mediaDevices.getUserMedia(videoOnly); + } + + // Stream returned by the legacy `chromeMediaSource: 'desktop'` + // getUserMedia path is a real capture stream but its tracks lack + // the display-media metadata the page expects from real + // getDisplayMedia. Google Meet (and others) inspect + // `track.getSettings().displaySurface` before they will route the + // track over WebRTC — if the field is missing they throw "Can't + // share your screen — Something went wrong". + // + // Patch each video track to expose the right displaySurface and + // a `contentHint` of `detail` (standard WebRTC screen-capture + // content hint). The underlying capture pipeline is unchanged; + // we're only fixing the introspectable metadata the page relies + // on to identify a display-media track. + const displaySurface = pick.kind === 'screen' ? 'monitor' : 'window'; + stream.getVideoTracks().forEach(function (track) { + try { track.contentHint = 'detail'; } catch (_) { /* ignore */ } + try { + const origGetSettings = track.getSettings.bind(track); + Object.defineProperty(track, 'getSettings', { + configurable: true, + writable: true, + value: function () { + const base = origGetSettings() || {}; + return Object.assign({}, base, { + displaySurface: displaySurface, + logicalSurface: true, + cursor: 'motion', + }); + }, + }); + } catch (e) { + send('log', { + level: 'warn', + msg: '[gdm-shim] patch getSettings failed: ' + (e && e.message ? e.message : e), + }); + } + }); + + return stream; + }; + + // In-page picker. Renders straight into the host page's so the + // overlay stacks above the site's own compositor (Meet/Slack/Discord + // UI) without any native-view gymnastics. All nodes are namespaced + // under `__ohsp_*` class/ID prefixes and attached to a closed shadow + // root where possible to avoid colliding with the host page's CSS. + function showInPagePicker(sources, sessionToken) { + return new Promise(function (resolveOuter, rejectOuter) { + function host() { return (document.body || document.documentElement); } + if (!host()) { + // DOM hasn't parsed yet — wait for it and retry. Previously we + // resolved null here, which the shim turned into an AbortError + // even though no picker was ever shown (coderabbit #809). + document.addEventListener( + 'DOMContentLoaded', + function () { + showInPagePicker(sources, sessionToken).then(resolveOuter, rejectOuter); + }, + { once: true } + ); + return; + } + + const root = document.createElement('div'); + root.setAttribute('data-openhuman-screen-share-picker', ''); + root.style.cssText = [ + 'all: initial', + 'position: fixed', + 'inset: 0', + 'z-index: 2147483647', + 'display: flex', + 'align-items: center', + 'justify-content: center', + 'background: rgba(0,0,0,0.55)', + 'backdrop-filter: blur(6px)', + '-webkit-backdrop-filter: blur(6px)', + 'font-family: -apple-system, BlinkMacSystemFont, "Inter", "Segoe UI", sans-serif', + ].join(';'); + + const shadow = root.attachShadow ? root.attachShadow({ mode: 'closed' }) : root; + + const styleTag = document.createElement('style'); + styleTag.textContent = [ + '* { box-sizing: border-box; margin: 0; padding: 0; font-family: inherit; }', + '.card { background: #fff; color: #1C1917; border-radius: 16px; width: min(640px, 92vw);', + ' max-height: 86vh; box-shadow: 0 24px 64px rgba(0,0,0,0.35); overflow: hidden;', + ' display: flex; flex-direction: column; }', + '.head { padding: 20px 24px; border-bottom: 1px solid #E7E5E4; display: flex;', + ' align-items: flex-start; justify-content: space-between; gap: 16px; }', + '.title { font-size: 17px; font-weight: 600; color: #1C1917; }', + '.origin { margin-top: 4px; font-size: 13px; color: #78716C; }', + '.closebtn { width: 32px; height: 32px; border: none; background: transparent;', + ' color: #78716C; cursor: pointer; border-radius: 8px; font-size: 18px;', + ' display: flex; align-items: center; justify-content: center; }', + '.closebtn:hover { background: #F5F5F4; color: #1C1917; }', + '.tabs { display: flex; gap: 4px; padding: 0 24px; border-bottom: 1px solid #E7E5E4; }', + '.tab { appearance: none; -webkit-appearance: none; background: transparent; border: 0;', + ' padding: 12px 16px; font-size: 14px; font-weight: 500; color: #78716C;', + ' cursor: pointer; border-bottom: 2px solid transparent; }', + '.tab.active { color: #4A83DD; border-bottom-color: #4A83DD; }', + '.body { padding: 20px 24px; overflow-y: auto; }', + '.grid { display: grid; grid-template-columns: repeat(2, minmax(0,1fr)); gap: 12px; }', + '.srcbtn { background: #FAFAF9; border: 2px solid #E7E5E4; border-radius: 10px;', + ' padding: 0; cursor: pointer; text-align: left; overflow: hidden;', + ' transition: border-color .15s, box-shadow .15s; }', + '.srcbtn:hover { border-color: #D4D4D1; }', + '.srcbtn.selected { border-color: #4A83DD;', + ' box-shadow: 0 0 0 3px rgba(74,131,221,0.18); }', + '.srcthumb { aspect-ratio: 16/10; background: #F5F5F4; display: flex;', + ' align-items: center; justify-content: center; color: #A8A29E;', + ' font-size: 32px; }', + '.srcname { padding: 8px 10px; font-size: 13px; color: #1C1917; font-weight: 500;', + ' white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }', + '.srcapp { padding: 0 10px 8px; font-size: 11px; color: #78716C;', + ' white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }', + '.empty { padding: 32px 0; text-align: center; color: #78716C; font-size: 13px; }', + '.foot { padding: 12px 16px; border-top: 1px solid #E7E5E4; display: flex;', + ' justify-content: flex-end; gap: 8px; }', + '.btn { appearance: none; -webkit-appearance: none; border: 0; border-radius: 10px;', + ' padding: 9px 16px; font-size: 14px; font-weight: 500; cursor: pointer; }', + '.btn-secondary { background: transparent; color: #1C1917; }', + '.btn-secondary:hover { background: #F5F5F4; }', + '.btn-primary { background: #4A83DD; color: #fff; }', + '.btn-primary:hover { background: #3D6DC4; }', + '.btn-primary:disabled { background: #D4D4D1; cursor: not-allowed; }', + ].join('\n'); + shadow.appendChild(styleTag); + + function hostnameOf(url) { + try { return new URL(url).hostname || url; } catch (e) { return url; } + } + + const origin = (typeof location !== 'undefined' && location.origin) || 'this site'; + let activeTab = sources.some(function (s) { return s.kind === 'screen'; }) + ? 'screen' + : 'window'; + let selectedId = null; + + // DOM is constructed imperatively (no innerHTML) because hosts + // like Google Meet ship strict Trusted Types CSP that rejects + // string-based HTML assignment with a TypeError. `createElement` + // and `appendChild` are policy-free and work everywhere. + const card = document.createElement('div'); + card.className = 'card'; + + function el(tag, attrs, text) { + const node = document.createElement(tag); + if (attrs) { + Object.keys(attrs).forEach(function (k) { + if (k === 'className') node.className = attrs[k]; + else node.setAttribute(k, attrs[k]); + }); + } + if (text != null) node.textContent = text; + return node; + } + + const head = el('div', { className: 'head' }); + const headLeft = el('div'); + headLeft.appendChild(el('div', { className: 'title' }, 'Choose what to share')); + const originEl = el( + 'div', + { className: 'origin' }, + hostnameOf(origin) + ' wants to share your screen.' + ); + headLeft.appendChild(originEl); + head.appendChild(headLeft); + const closeBtn = el( + 'button', + { className: 'closebtn', 'data-action': 'cancel', 'aria-label': 'Cancel' }, + '✕' + ); + head.appendChild(closeBtn); + card.appendChild(head); + + const tabs = el('div', { className: 'tabs' }); + const screenTab = el('button', { className: 'tab', 'data-tab': 'screen' }, 'Entire Screen'); + const windowTab = el('button', { className: 'tab', 'data-tab': 'window' }, 'Window'); + tabs.appendChild(screenTab); + tabs.appendChild(windowTab); + card.appendChild(tabs); + + const bodyEl = el('div', { className: 'body' }); + const gridEl = el('div', { className: 'grid' }); + bodyEl.appendChild(gridEl); + card.appendChild(bodyEl); + + const foot = el('div', { className: 'foot' }); + const cancelBtn = el( + 'button', + { className: 'btn btn-secondary', 'data-action': 'cancel' }, + 'Cancel' + ); + const shareBtn = el('button', { className: 'btn btn-primary' }, 'Share'); + shareBtn.disabled = true; + foot.appendChild(cancelBtn); + foot.appendChild(shareBtn); + card.appendChild(foot); + + shadow.appendChild(card); + + const tabButtons = [screenTab, windowTab]; + + function setTab(next) { + activeTab = next; + tabButtons.forEach(function (btn) { + btn.classList.toggle('active', btn.getAttribute('data-tab') === activeTab); + }); + render(); + } + + function render() { + while (gridEl.firstChild) gridEl.removeChild(gridEl.firstChild); + const filtered = sources.filter(function (s) { return s.kind === activeTab; }); + if (filtered.length === 0) { + const empty = document.createElement('div'); + empty.className = 'empty'; + empty.textContent = + 'No ' + (activeTab === 'screen' ? 'screens' : 'windows') + ' available.'; + gridEl.appendChild(empty); + shareBtn.disabled = true; + return; + } + filtered.forEach(function (src) { + const btn = document.createElement('button'); + btn.className = 'srcbtn' + (selectedId === src.id ? ' selected' : ''); + btn.setAttribute('data-source-id', src.id); + const thumb = document.createElement('div'); + thumb.className = 'srcthumb'; + if (src.thumbnailPngBase64) { + const img = document.createElement('img'); + img.src = 'data:image/png;base64,' + src.thumbnailPngBase64; + img.alt = ''; + img.style.cssText = + 'width: 100%; height: 100%; object-fit: contain; display: block;'; + thumb.appendChild(img); + } else { + // Placeholder glyph until the lazy-loaded thumbnail arrives. + thumb.textContent = activeTab === 'screen' ? '□' : '▣'; + // Dedup in-flight thumbnail IPCs: render() re-runs on every + // selection change and tab switch, and without this cache + // each pass would re-issue screen_share_thumbnail for every + // source that hadn't yet returned (coderabbit #809). + function paintThumb(b64) { + if (!b64 || typeof b64 !== 'string') return; + const liveBtn = gridEl.querySelector( + '[data-source-id="' + src.id.replace(/"/g, '\\"') + '"]' + ); + if (!liveBtn) return; + const liveThumb = liveBtn.querySelector('.srcthumb'); + if (!liveThumb) return; + while (liveThumb.firstChild) liveThumb.removeChild(liveThumb.firstChild); + const img = document.createElement('img'); + img.src = 'data:image/png;base64,' + b64; + img.alt = ''; + img.style.cssText = + 'width: 100%; height: 100%; object-fit: contain; display: block;'; + liveThumb.appendChild(img); + } + if (src.__thumbnailPromise) { + src.__thumbnailPromise.then(paintThumb, function () {}); + } else { + src.__thumbnailPromise = rawInvoke('screen_share_thumbnail', { + args: { token: sessionToken, id: src.id }, + }).then( + function (b64) { + if (b64 && typeof b64 === 'string') { + // Stash on the source so future re-renders keep + // the thumbnail without re-requesting it. + src.thumbnailPngBase64 = b64; + } + paintThumb(b64); + return b64; + }, + function () { + /* thumbnail failures degrade gracefully to the glyph */ + } + ); + } + } + const name = document.createElement('div'); + name.className = 'srcname'; + name.textContent = src.name; + btn.appendChild(thumb); + btn.appendChild(name); + if (src.appName) { + const app = document.createElement('div'); + app.className = 'srcapp'; + app.textContent = src.appName; + btn.appendChild(app); + } + btn.addEventListener('click', function () { + selectedId = src.id; + render(); + }); + btn.addEventListener('dblclick', function () { + selectedId = src.id; + finish(sources.find(function (s) { return s.id === selectedId; }) || null); + }); + gridEl.appendChild(btn); + }); + if (!selectedId || !filtered.some(function (s) { return s.id === selectedId; })) { + selectedId = filtered[0].id; + gridEl.firstChild && gridEl.firstChild.classList.add('selected'); + } + shareBtn.disabled = !selectedId; + } + + tabButtons.forEach(function (btn) { + btn.addEventListener('click', function () { setTab(btn.getAttribute('data-tab')); }); + }); + + let settled = false; + function finish(pick) { + if (settled) return; + settled = true; + window.removeEventListener('keydown', onKey, true); + try { root.remove(); } catch (e) { /* ignore */ } + resolveOuter(pick); + } + + card.querySelectorAll('[data-action="cancel"]').forEach(function (btn) { + btn.addEventListener('click', function () { finish(null); }); + }); + shareBtn.addEventListener('click', function () { + const pick = sources.find(function (s) { return s.id === selectedId; }) || null; + finish(pick); + }); + // Clicks on the backdrop (outside the card) cancel. Clicks inside + // the card bubble up to root too, but we stop them there. + root.addEventListener('click', function (e) { + if (e.target === root || e.composedPath()[0] === root) finish(null); + }); + card.addEventListener('click', function (e) { e.stopPropagation(); }); + + function onKey(e) { + if (e.key === 'Escape') { + e.preventDefault(); + e.stopPropagation(); + finish(null); + } + } + window.addEventListener('keydown', onKey, true); + + setTab(activeTab); + host().appendChild(root); + }); + } + + let installed = false; + try { + Object.defineProperty(proto, 'getDisplayMedia', { + configurable: true, + writable: true, + value: shim, + }); + installed = true; + } catch (e) { + send('log', { + level: 'error', + msg: '[gdm-shim] defineProperty(proto) failed: ' + (e && e.message ? e.message : String(e)), + }); + } + if (!installed) { + try { + Object.defineProperty(navigator.mediaDevices, 'getDisplayMedia', { + configurable: true, + writable: true, + value: shim, + }); + installed = true; + } catch (e2) { + send('log', { + level: 'error', + msg: '[gdm-shim] defineProperty(instance) failed: ' + (e2 && e2.message ? e2.message : String(e2)), + }); + } + } + navigator.mediaDevices.__ohGdmShimInstalled = installed; + + // Some pages (Meet) also consult `navigator.permissions.query` and + // branch on the reported state for `display-capture` / + // `camera` / `microphone`. CEF Alloy's Permissions API does not + // reflect what our OnRequestMediaAccessPermission callback will + // grant dynamically, so it defaults to 'prompt' or even 'denied' + // for `display-capture`. A page that sees 'denied' will assume + // sharing is structurally blocked and refuse to call + // getDisplayMedia — or show the "needs permission" modal on cancel. + // We shadow the query for these names so the page sees 'granted' + // and relies on our shim for the actual user decision. + try { + if ( + navigator.permissions && + typeof navigator.permissions.query === 'function' && + !navigator.permissions.__ohPermissionsShimInstalled + ) { + const permProto = Object.getPrototypeOf(navigator.permissions); + const permDescriptor = Object.getOwnPropertyDescriptor(permProto, 'query'); + const origQuery = (permDescriptor && permDescriptor.value + ? permDescriptor.value + : navigator.permissions.query + ).bind(navigator.permissions); + // CEF Alloy's Permissions API doesn't reflect what our + // OnRequestMediaAccessPermission callback will grant dynamically, + // so it defaults to 'prompt' or 'denied' for the media permissions + // we do handle. Pages that consult the Permissions API up front + // (Meet for display-capture; some flows for camera/microphone) + // refuse to try the actual getUserMedia call if they see 'denied' + // here. Spoof all three to 'granted'; the real grant still goes + // through our CEF permission handler where it's scoped per-call. + const spoofed = { + 'display-capture': 'granted', + camera: 'granted', + microphone: 'granted', + }; + const spoofedQuery = async function (descriptor) { + const n = descriptor && descriptor.name; + if (n && spoofed[n]) { + return { + state: spoofed[n], + status: spoofed[n], + name: n, + onchange: null, + addEventListener: function () {}, + removeEventListener: function () {}, + dispatchEvent: function () { return true; }, + }; + } + return origQuery(descriptor); + }; + try { + Object.defineProperty(permProto, 'query', { + configurable: true, + writable: true, + value: spoofedQuery, + }); + } catch (e) { + Object.defineProperty(navigator.permissions, 'query', { + configurable: true, + writable: true, + value: spoofedQuery, + }); + } + navigator.permissions.__ohPermissionsShimInstalled = true; + send('log', { level: 'info', msg: '[gdm-shim] permissions.query shim installed' }); + } + } catch (e) { + send('log', { + level: 'warn', + msg: '[gdm-shim] permissions.query shim failed: ' + (e && e.message ? e.message : e), + }); + } + + send('log', { + level: 'info', + msg: + '[gdm-shim] install=' + installed + + ' on ' + ((typeof location !== 'undefined' && location.origin) || '?'), + }); + })(); })(); diff --git a/docs/src-tauri/02-commands.md b/docs/src-tauri/02-commands.md index 1799f93b4..f4b286427 100644 --- a/docs/src-tauri/02-commands.md +++ b/docs/src-tauri/02-commands.md @@ -53,6 +53,16 @@ From **`commands/openhuman.rs`** (see source for exact payloads): | `openhuman_service_status` | Query status | | `openhuman_service_uninstall` | Uninstall service | +## Screen share picker (CEF / macOS) + +From **`screen_capture/mod.rs`** (registered under `#[cfg(feature = "cef")]`). Backs the in-page `getDisplayMedia` shim in `webview_accounts/runtime.js`. Session-gated: the shim must open a session with a live user gesture before enumeration / thumbnail captures succeed. See issue #713 (picker UX) + #812 (session gating). + +| Command | Purpose | +| --------------------------------- | ----------------------------------------------------------------------------------------------------------------------- | +| `screen_share_begin_session` | Open a 30s session from an account webview, after a `navigator.userActivation.isActive` gesture. Returns `{ token, sources }`. Rate-limited to 10/minute per account. | +| `screen_share_thumbnail` | Capture a single source's thumbnail as base64 PNG. Requires a live token and an `id` that the session was issued for. macOS only; other platforms return an error. | +| `screen_share_finalize_session` | Close the session. Called by the shim on Share or Cancel; safe to call with an unknown/expired token (no-op). | + ## Removed / not present The following **do not** exist in the current `generate_handler!` list: `exchange_token`, `get_auth_state`, `socket_connect`, `start_telegram_login`. Authentication and sockets are handled in the **React** app and **core** process, not via these IPC names.