diff --git a/README.md b/README.md index 9a1ba8ee..12e7c889 100644 --- a/README.md +++ b/README.md @@ -206,7 +206,6 @@ The record → transcribe → polish → insert state machine is owned exclusive Planned in the requirements docs but not in the 1.0 release: -- Hold-to-talk mode (today only toggle). - Local ASR (today only Volcengine cloud). - Snippets (no UI / trigger logic yet). - History enhancements: copy button, search, re-polish, re-insert. diff --git a/README.zh.md b/README.zh.md index d6cfceb5..4d5c5350 100644 --- a/README.zh.md +++ b/README.zh.md @@ -209,7 +209,6 @@ OpenLessApp // AppDelegate, 菜单栏, 设置窗口, 胶囊窗口, Dictat 下面这些功能在需求文档里有规划,但 1.0 没有发布: -- 按住说话模式(hold-to-talk):当前仅支持切换式。 - 本地 ASR:当前仅接入火山引擎云端 ASR。 - 常用片段 Snippets:尚无 UI 和触发逻辑。 - 历史增强:复制按钮、搜索、重新润色、重新插入。 diff --git a/Sources/OpenLessApp/DictationCoordinator.swift b/Sources/OpenLessApp/DictationCoordinator.swift index 78e8c79d..1d88564c 100644 --- a/Sources/OpenLessApp/DictationCoordinator.swift +++ b/Sources/OpenLessApp/DictationCoordinator.swift @@ -33,6 +33,8 @@ final class DictationCoordinator { private var asr: VolcengineStreamingASR? private var audioConsumer: BufferingAudioConsumer? private var sessionStartedAt: Date = Date() + /// hold 模式下,Esc 取消后下一次 .released 应被忽略(否则会再次触发结束流程)。 + private var suppressNextRelease = false /// 启动时一次性读 Keychain 缓存的凭据快照;会话热路径只读这里, /// 不再每次都打 SecItemCopyMatching 触发钥匙串弹窗。 @@ -138,8 +140,10 @@ final class DictationCoordinator { guard let self else { return } for await event in self.hotkey.events { switch event { - case .toggled: - self.handleToggle() + case .pressed: + self.handlePressed() + case .released: + self.handleReleased() case .cancelled: self.handleCancel() } @@ -147,7 +151,46 @@ final class DictationCoordinator { } } - // MARK: - Toggle 状态机 + // MARK: - Toggle / Hold 状态机 + + private func handlePressed() { + switch UserPreferences.shared.hotkeyMode { + case .toggle: + handleToggle() + case .hold: + handleHoldStart() + } + } + + private func handleReleased() { + guard UserPreferences.shared.hotkeyMode == .hold else { return } + if suppressNextRelease { + suppressNextRelease = false + return + } + switch sessionPhase { + case .listening: + sessionPhase = .processing + Task { await endSession() } + case .starting: + // 用户没等到 ASR 连上就松手 — 当作取消,不发送任何已采集音频。 + Log.write("[session] hold: starting 阶段松手,取消") + handleCancel() + case .idle, .processing: + return + } + } + + private func handleHoldStart() { + switch sessionPhase { + case .idle: + sessionPhase = .starting + Task { await beginSession() } + case .starting, .listening, .processing: + // hold 模式下重复 .pressed 通常来自系统自动重发;忽略即可。 + return + } + } private func handleToggle() { switch sessionPhase { @@ -173,6 +216,10 @@ final class DictationCoordinator { recorder.stop() audioConsumer?.clear() audioConsumer = nil + // hold 模式:如果用户还按着键,松手时会再来一次 .released —— 屏蔽掉,避免再次触发结束。 + if UserPreferences.shared.hotkeyMode == .hold { + suppressNextRelease = true + } capsule.update(state: .cancelled) DispatchQueue.main.asyncAfter(deadline: .now() + 0.6) { [weak self] in self?.capsule.update(state: .hidden) @@ -291,6 +338,21 @@ final class DictationCoordinator { } } + /// 润色环节实际发生了什么。决定胶囊提示色调和历史记录的真实 mode。 + private enum PolishOutcome { + case ok // 真润色完成 + case skippedNoCredentials // 没填 Ark,直接跳过 + case failed(String) // 调到了,但报错;error 文本仅作日志 + + var logTag: String { + switch self { + case .ok: return "ok" + case .skippedNoCredentials: return "skip-no-creds" + case .failed(let msg): return "failed(\(msg.prefix(120)))" + } + } + } + private func polishAndInsert( raw: RawTranscript, originalRawText: String? = nil, @@ -300,13 +362,14 @@ final class DictationCoordinator { let savedRaw = originalRawText ?? raw.text guard let arkCreds = loadArkCredentials() else { - Log.write("缺少 Ark 凭据;直接用 raw 插入") + Log.write("[polish] 缺少 Ark 凭据;跳过润色,插入 raw") await insertText( text: raw.text, raw: savedRaw, mode: mode, durationMs: raw.durationMs, - dictionaryEntryCount: dictionaryEntries.count + dictionaryEntryCount: dictionaryEntries.count, + polishOutcome: .skippedNoCredentials ) return } @@ -324,20 +387,18 @@ final class DictationCoordinator { raw: savedRaw, mode: mode, durationMs: raw.durationMs, - dictionaryEntryCount: dictionaryEntries.count + dictionaryEntryCount: dictionaryEntries.count, + polishOutcome: .ok ) } catch { - Log.write("[polish] 失败: \(error);fallback 用 raw") - // 让用户知道润色失败(最常见原因:Ark 模型 ID 写错)。 - // 1.5s 提示后用 raw 兜底插入,避免用户以为是"整理完成"。 - capsule.update(state: .error("整理失败 用原文")) - try? await Task.sleep(nanoseconds: 1_500_000_000) + Log.write("[polish] 失败: \(error);插入 raw") await insertText( text: raw.text, raw: savedRaw, mode: mode, durationMs: raw.durationMs, - dictionaryEntryCount: dictionaryEntries.count + dictionaryEntryCount: dictionaryEntries.count, + polishOutcome: .failed(String(describing: error)) ) } } @@ -347,7 +408,8 @@ final class DictationCoordinator { raw: String, mode: PolishMode, durationMs: Int?, - dictionaryEntryCount: Int + dictionaryEntryCount: Int, + polishOutcome: PolishOutcome = .ok ) async { let result = await inserter.insert(text) let frontApp = NSWorkspace.shared.frontmostApplication @@ -355,14 +417,20 @@ final class DictationCoordinator { if !learned.isEmpty { Log.write("[dictionary] 自动学习:\(learned.map { $0.phrase }.joined(separator: ", "))") } + // 润色没真跑时,历史里的 mode 应反映「实际只是 raw」,避免误导。 + let savedMode: PolishMode + switch polishOutcome { + case .ok: savedMode = mode + case .skippedNoCredentials, .failed: savedMode = .raw + } switch result { case .inserted: - capsule.update(state: .inserted) - Log.write("[insert] OK") + capsule.update(state: capsuleStateForInsert(polishOutcome)) + Log.write("[insert] OK (polish=\(polishOutcome.logTag))") saveSession( raw: raw, final: text, - mode: mode, + mode: savedMode, app: frontApp, status: .inserted, errorCode: nil, @@ -370,12 +438,12 @@ final class DictationCoordinator { dictionaryEntryCount: dictionaryEntryCount ) case .copiedFallback(let reason): - capsule.update(state: .copied) - Log.write("[insert] fallback: \(reason)") + capsule.update(state: capsuleStateForCopy(polishOutcome)) + Log.write("[insert] fallback: \(reason) (polish=\(polishOutcome.logTag))") saveSession( raw: raw, final: text, - mode: mode, + mode: savedMode, app: frontApp, status: .copiedFallback, errorCode: reason, @@ -391,6 +459,22 @@ final class DictationCoordinator { } } + private func capsuleStateForInsert(_ outcome: PolishOutcome) -> CapsuleState { + switch outcome { + case .ok: return .inserted + case .skippedNoCredentials: return .warning("已插入原文 · 未润色") + case .failed: return .warning("润色失败 · 已用原文") + } + } + + private func capsuleStateForCopy(_ outcome: PolishOutcome) -> CapsuleState { + switch outcome { + case .ok: return .copied + case .skippedNoCredentials: return .warning("已复制原文 · 未润色 ⌘V") + case .failed: return .warning("润色失败 · 已复制 ⌘V") + } + } + private func runMockPipeline() async { let mockText = "(演示)请到设置 → 凭据填入火山引擎 ASR + Ark API Key 后才能真实识别" Log.write("[mock] \(mockText)") diff --git a/Sources/OpenLessApp/Settings/SettingsView.swift b/Sources/OpenLessApp/Settings/SettingsView.swift index 44dcbb8d..7d15468f 100644 --- a/Sources/OpenLessApp/Settings/SettingsView.swift +++ b/Sources/OpenLessApp/Settings/SettingsView.swift @@ -423,11 +423,12 @@ private struct PasteableCredentialField: View { let placeholder: String let secure: Bool @Binding var text: String + @State private var revealed = false var body: some View { HStack(spacing: 8) { Group { - if secure { + if secure && !revealed { SecureField(placeholder, text: $text) } else { TextField(placeholder, text: $text) @@ -436,6 +437,17 @@ private struct PasteableCredentialField: View { .textFieldStyle(.roundedBorder) .frame(maxWidth: 390) + if secure { + Button { + revealed.toggle() + } label: { + Image(systemName: revealed ? "eye.slash" : "eye") + } + .buttonStyle(.bordered) + .controlSize(.small) + .help(revealed ? "隐藏密钥" : "显示密钥") + } + Button { if let value = NSPasteboard.general.string(forType: .string) { text = value.trimmingCharacters(in: .whitespacesAndNewlines) @@ -959,6 +971,7 @@ private struct SettingsHubTab: View { @State private var arkModelId = ArkCredentials.defaultModelId @State private var arkEndpoint = ArkCredentials.defaultEndpoint.absoluteString @State private var trigger: HotkeyBinding.Trigger = UserPreferences.shared.hotkeyTrigger + @State private var hotkeyMode: HotkeyMode = UserPreferences.shared.hotkeyMode @State private var mode: PolishMode = UserPreferences.shared.polishMode @State private var hasAccessibility = false @State private var hasMicrophone = false @@ -1021,6 +1034,26 @@ private struct SettingsHubTab: View { } } DividerLine() + SettingsRow(title: "录音方式") { + Picker("录音方式", selection: $hotkeyMode) { + ForEach(HotkeyMode.allCases, id: \.self) { item in + Text(item.displayName).tag(item) + } + } + .labelsHidden() + .pickerStyle(.segmented) + .frame(width: 220, alignment: .leading) + .onChange(of: hotkeyMode) { _, newValue in + UserPreferences.shared.hotkeyMode = newValue + NotificationCenter.default.post(name: .openLessHotkeyChanged, object: nil) + } + } + DividerLine() + Text(hotkeyMode.hint) + .font(.footnote) + .foregroundStyle(.secondary) + .padding(.top, 4) + DividerLine() SettingsRow(title: "默认模式") { Picker("模式", selection: $mode) { ForEach(PolishMode.allCases, id: \.self) { item in diff --git a/Sources/OpenLessApp/Settings/SettingsWindowController.swift b/Sources/OpenLessApp/Settings/SettingsWindowController.swift index 813d36c0..92399f6f 100644 --- a/Sources/OpenLessApp/Settings/SettingsWindowController.swift +++ b/Sources/OpenLessApp/Settings/SettingsWindowController.swift @@ -22,7 +22,9 @@ final class SettingsWindowController: NSObject, NSWindowDelegate { win.titleVisibility = .hidden win.titlebarAppearsTransparent = true win.toolbar = nil - win.isMovableByWindowBackground = true + // 只允许拖动原生顶栏区域(含 traffic lights 那条带); + // 否则 TextField 上的拖选手势会被整窗拖动吞掉。 + win.isMovableByWindowBackground = false win.setContentSize(NSSize(width: 1040, height: 700)) win.contentMinSize = NSSize(width: 960, height: 640) win.tabbingMode = .disallowed diff --git a/Sources/OpenLessCore/HotkeyMode.swift b/Sources/OpenLessCore/HotkeyMode.swift new file mode 100644 index 00000000..a32b7991 --- /dev/null +++ b/Sources/OpenLessCore/HotkeyMode.swift @@ -0,0 +1,22 @@ +import Foundation + +public enum HotkeyMode: String, Codable, Sendable, Equatable, CaseIterable { + /// 按一次开始,按一次结束。短按门槛低、适合长口述。 + case toggle + /// 按住录音、松手即停。适合短促、连续的口播(同 Wispr Flow / Typeless 默认行为)。 + case hold + + public var displayName: String { + switch self { + case .toggle: return "切换式" + case .hold: return "按住说话" + } + } + + public var hint: String { + switch self { + case .toggle: return "按一次开始录音,再按一次结束。" + case .hold: return "按住快捷键说话,松开立即停止。适合短句。" + } + } +} diff --git a/Sources/OpenLessHotkey/HotkeyEvent.swift b/Sources/OpenLessHotkey/HotkeyEvent.swift index fad551a8..ec51c1ae 100644 --- a/Sources/OpenLessHotkey/HotkeyEvent.swift +++ b/Sources/OpenLessHotkey/HotkeyEvent.swift @@ -1,8 +1,10 @@ import Foundation public enum HotkeyEvent: Sendable, Equatable { - /// Toggle 模式下:每次触发键按下时触发一次。 - case toggled - /// 录音中按 Esc + /// 触发键按下边沿。toggle 模式下解释为「开始/结束」翻转,hold 模式下解释为「开始」。 + case pressed + /// 触发键松开边沿。toggle 模式忽略;hold 模式解释为「结束」。 + case released + /// 录音中按 Esc。 case cancelled } diff --git a/Sources/OpenLessHotkey/HotkeyMonitor.swift b/Sources/OpenLessHotkey/HotkeyMonitor.swift index ca0b365b..8989d10e 100644 --- a/Sources/OpenLessHotkey/HotkeyMonitor.swift +++ b/Sources/OpenLessHotkey/HotkeyMonitor.swift @@ -12,8 +12,8 @@ public final class HotkeyMonitor: HotkeyServiceProtocol { private var eventTap: CFMachPort? private var runLoopSource: CFRunLoopSource? private var triggerHeld = false - /// Toggle 状态:每次触发键按下时发出 .toggled。 - /// 主链路侧负责把第一次 toggled 解释为"开始",第二次为"结束"。 + /// 边沿事件:触发键按下时发 .pressed,松开时发 .released。 + /// toggle / hold 的解释由协调器侧(DictationCoordinator)按用户偏好做。 public init() { var captured: AsyncStream.Continuation! @@ -105,9 +105,10 @@ public final class HotkeyMonitor: HotkeyServiceProtocol { if triggerActive && !triggerHeld { triggerHeld = true - continuation.yield(.toggled) + continuation.yield(.pressed) } else if !triggerActive && triggerHeld { triggerHeld = false + continuation.yield(.released) } // fn 默认拦截,规避系统 Globe 行为 diff --git a/Sources/OpenLessPersistence/UserPreferences.swift b/Sources/OpenLessPersistence/UserPreferences.swift index 4a05bbbb..3c965167 100644 --- a/Sources/OpenLessPersistence/UserPreferences.swift +++ b/Sources/OpenLessPersistence/UserPreferences.swift @@ -8,6 +8,7 @@ public final class UserPreferences: @unchecked Sendable { private enum Key { static let polishMode = "openless.polish_mode" static let hotkeyTrigger = "openless.hotkey_trigger" + static let hotkeyMode = "openless.hotkey_mode" static let hasCompletedOnboarding = "openless.onboarding_completed" } @@ -29,6 +30,14 @@ public final class UserPreferences: @unchecked Sendable { set { defaults.set(newValue.rawValue, forKey: Key.hotkeyTrigger) } } + public var hotkeyMode: HotkeyMode { + get { + let raw = defaults.string(forKey: Key.hotkeyMode) ?? HotkeyMode.toggle.rawValue + return HotkeyMode(rawValue: raw) ?? .toggle + } + set { defaults.set(newValue.rawValue, forKey: Key.hotkeyMode) } + } + public var hasCompletedOnboarding: Bool { get { defaults.bool(forKey: Key.hasCompletedOnboarding) } set { defaults.set(newValue, forKey: Key.hasCompletedOnboarding) } diff --git a/Sources/OpenLessPolish/PolishPrompts.swift b/Sources/OpenLessPolish/PolishPrompts.swift index aaf1359b..5b58e174 100644 --- a/Sources/OpenLessPolish/PolishPrompts.swift +++ b/Sources/OpenLessPolish/PolishPrompts.swift @@ -35,14 +35,34 @@ public enum PolishPrompts { """ case .structured: return """ - \(roleRule)\ - 你是语音输入文本整理器,擅长把口述内容整理为结构化段落。\ - 规则:\ - (1) 去口癖与重复,保留用户最终意图(中途改口以最终版本为准);\ - (2) 当用户口述列表/步骤/计划/总结时,自动转为段落、编号列表或项目符号;\ - (3) 标点自然,不机械切碎;\ - (4) 不新增用户没说过的事实;\ - (5) 中英混输和专有名词保留原样。\ + \(roleRule) + 你是语音输入文本整理器,专门把口述内容整理为脉络清晰、可直接用作 AI prompt 或工作文档的结构化文本。 + + 规则: + (1) 去口癖与重复,保留用户最终意图(中途改口以最终版本为准)。 + (2) 内容涉及 ≥2 个主题、步骤或要求时,强制使用以下三层层级输出: + - 第一层(大板块):行首用 "1." "2." "3." …,每个大板块一行短标题; + - 第二层(具体要点):在大板块下缩进 3 个空格,行首用 "1)" "2)" "3)" …,每条一句; + - 第三层(细分项):必要时再缩进 3 个空格,行首用 "a." "b." "c." …。 + (3) 即使原文没有显式说"第一/第二",只要可以归并到 ≥2 个主题,也要自动归类到大板块。 + (4) 当口述只有一个简单主题或长度很短时,直接输出连贯段落,不要硬塞层级。 + (5) 标点自然,不机械切碎;不新增用户没说过的事实;中英混输和专有名词保留原样。 + + 格式示例(只看层级与编号方式,不要复制内容): + 原始:发布前要做几件事,第一是回归测试,要测登录页和支付页,登录页里测正常登录、密码错和图形验证码,支付页测信用卡和微信,第二是文档要更新,要改 README 和 changelog + 输出: + 1. 回归测试 + 1) 登录页 + a. 正常登录。 + b. 密码错误提示。 + c. 图形验证码刷新。 + 2) 支付页 + a. 信用卡支付。 + b. 微信支付。 + 2. 文档更新 + 1) 更新 README。 + 2) 更新 changelog。 + \(outputRule) """ case .formal: diff --git a/Sources/OpenLessUI/CapsuleState.swift b/Sources/OpenLessUI/CapsuleState.swift index 0670b251..6530d4ee 100644 --- a/Sources/OpenLessUI/CapsuleState.swift +++ b/Sources/OpenLessUI/CapsuleState.swift @@ -8,4 +8,6 @@ public enum CapsuleState: Sendable, Equatable { case cancelled case copied case error(String) + /// 插入/复制成功但有需要告诉用户的非阻塞偏离(如润色被跳过、润色失败回退原文)。 + case warning(String) } diff --git a/Sources/OpenLessUI/CapsuleView.swift b/Sources/OpenLessUI/CapsuleView.swift index bc5232b6..dc198da6 100644 --- a/Sources/OpenLessUI/CapsuleView.swift +++ b/Sources/OpenLessUI/CapsuleView.swift @@ -83,6 +83,8 @@ public struct CapsuleView: View { statusText("已复制 ⌘V", color: .secondary) case .error(let msg): statusText(msg, color: .red) + case .warning(let msg): + statusText(msg, color: .orange) case .hidden: EmptyView() } diff --git a/Tests/OpenLessHotkeyTests/HotkeyEventTests.swift b/Tests/OpenLessHotkeyTests/HotkeyEventTests.swift index 40ac2438..7d5cf7ef 100644 --- a/Tests/OpenLessHotkeyTests/HotkeyEventTests.swift +++ b/Tests/OpenLessHotkeyTests/HotkeyEventTests.swift @@ -3,6 +3,8 @@ import XCTest final class HotkeyEventTests: XCTestCase { func test_eventsAreDistinct() { - XCTAssertNotEqual(HotkeyEvent.toggled, .cancelled) + XCTAssertNotEqual(HotkeyEvent.pressed, .released) + XCTAssertNotEqual(HotkeyEvent.pressed, .cancelled) + XCTAssertNotEqual(HotkeyEvent.released, .cancelled) } } diff --git a/USAGE.md b/USAGE.md index 53802041..c81996ea 100644 --- a/USAGE.md +++ b/USAGE.md @@ -51,6 +51,10 @@ OpenLess 不内置任何云端 Key,需要你自己提供: 5. 润色完的文字会自动插入到光标所在位置。如果当前应用拒绝写入,会复制到剪贴板,请手动 `Cmd+V`。 6. 按 `Esc` 在录音过程中取消,原始音频不会发送到 ASR。 +> **录音方式可在「设置」里切换:** +> - **切换式**(默认):按一次开始,再按一次结束。适合长口述。 +> - **按住说话**(hold-to-talk):按住快捷键说话,松开立即结束。适合短句、IM 消息、连续多次输入。 + 每一次会话都会保存到「历史记录」标签页,包含:录音时长、原始转写、润色后文本、采用的模式。 ## 5. 输出模式 diff --git a/scripts/build-app.sh b/scripts/build-app.sh index a132a2eb..d5db2bed 100755 --- a/scripts/build-app.sh +++ b/scripts/build-app.sh @@ -6,8 +6,8 @@ cd "$(dirname "$0")/.." APP_NAME="OpenLess" BUNDLE_ID="com.openless.app" -APP_VERSION="1.0.0" -BUILD_NUMBER="1" +APP_VERSION="1.0.01" +BUILD_NUMBER="A1003" BUILD_DIR="build" APP_DIR="${BUILD_DIR}/${APP_NAME}.app" BIN_DIR="${APP_DIR}/Contents/MacOS"