Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,6 @@ The record → transcribe → polish → insert state machine is owned exclusive

Planned in the requirements docs but not in the 1.0 release:

- Hold-to-talk mode (today only toggle).
- Local ASR (today only Volcengine cloud).
- Snippets (no UI / trigger logic yet).
- History enhancements: copy button, search, re-polish, re-insert.
Expand Down
1 change: 0 additions & 1 deletion README.zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,6 @@ OpenLessApp // AppDelegate, 菜单栏, 设置窗口, 胶囊窗口, Dictat

下面这些功能在需求文档里有规划,但 1.0 没有发布:

- 按住说话模式(hold-to-talk):当前仅支持切换式。
- 本地 ASR:当前仅接入火山引擎云端 ASR。
- 常用片段 Snippets:尚无 UI 和触发逻辑。
- 历史增强:复制按钮、搜索、重新润色、重新插入。
Expand Down
122 changes: 103 additions & 19 deletions Sources/OpenLessApp/DictationCoordinator.swift
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ final class DictationCoordinator {
private var asr: VolcengineStreamingASR?
private var audioConsumer: BufferingAudioConsumer?
private var sessionStartedAt: Date = Date()
/// hold 模式下,Esc 取消后下一次 .released 应被忽略(否则会再次触发结束流程)。
private var suppressNextRelease = false

/// 启动时一次性读 Keychain 缓存的凭据快照;会话热路径只读这里,
/// 不再每次都打 SecItemCopyMatching 触发钥匙串弹窗。
Expand Down Expand Up @@ -138,16 +140,57 @@ final class DictationCoordinator {
guard let self else { return }
for await event in self.hotkey.events {
switch event {
case .toggled:
self.handleToggle()
case .pressed:
self.handlePressed()
case .released:
self.handleReleased()
case .cancelled:
self.handleCancel()
}
}
}
}

// MARK: - Toggle 状态机
// MARK: - Toggle / Hold 状态机

private func handlePressed() {
switch UserPreferences.shared.hotkeyMode {
case .toggle:
handleToggle()
case .hold:
handleHoldStart()
}
}

private func handleReleased() {
guard UserPreferences.shared.hotkeyMode == .hold else { return }
if suppressNextRelease {
suppressNextRelease = false
return
}
switch sessionPhase {
case .listening:
sessionPhase = .processing
Task { await endSession() }
case .starting:
// 用户没等到 ASR 连上就松手 — 当作取消,不发送任何已采集音频。
Log.write("[session] hold: starting 阶段松手,取消")
handleCancel()
Comment on lines +175 to +178
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (bug_risk): Cancelling from .starting in hold mode sets suppressNextRelease, which will suppress the next legitimate release and prevent the next session from ending.

In this path, handleReleased calls handleCancel() while in .starting hold mode. By then the key is already up and the corresponding .released has been handled, but handleCancel() still sets suppressNextRelease = true. That flag then incorrectly applies to the next press/release cycle, causing the next .released to be ignored and the subsequent hold session to never exit .listening/.processing. To avoid this, the .starting early-release path should cancel without setting suppressNextRelease, or handleCancel should only set this flag when invoked while the key is actually still down (e.g., Esc-based cancel).

case .idle, .processing:
return
}
}

private func handleHoldStart() {
switch sessionPhase {
case .idle:
sessionPhase = .starting
Task { await beginSession() }
case .starting, .listening, .processing:
// hold 模式下重复 .pressed 通常来自系统自动重发;忽略即可。
return
}
}

private func handleToggle() {
switch sessionPhase {
Expand All @@ -173,6 +216,10 @@ final class DictationCoordinator {
recorder.stop()
audioConsumer?.clear()
audioConsumer = nil
// hold 模式:如果用户还按着键,松手时会再来一次 .released —— 屏蔽掉,避免再次触发结束。
if UserPreferences.shared.hotkeyMode == .hold {
suppressNextRelease = true
}
capsule.update(state: .cancelled)
DispatchQueue.main.asyncAfter(deadline: .now() + 0.6) { [weak self] in
self?.capsule.update(state: .hidden)
Expand Down Expand Up @@ -291,6 +338,21 @@ final class DictationCoordinator {
}
}

/// 润色环节实际发生了什么。决定胶囊提示色调和历史记录的真实 mode。
private enum PolishOutcome {
case ok // 真润色完成
case skippedNoCredentials // 没填 Ark,直接跳过
case failed(String) // 调到了,但报错;error 文本仅作日志

var logTag: String {
switch self {
case .ok: return "ok"
case .skippedNoCredentials: return "skip-no-creds"
case .failed(let msg): return "failed(\(msg.prefix(120)))"
}
}
}

private func polishAndInsert(
raw: RawTranscript,
originalRawText: String? = nil,
Expand All @@ -300,13 +362,14 @@ final class DictationCoordinator {
let savedRaw = originalRawText ?? raw.text

guard let arkCreds = loadArkCredentials() else {
Log.write("缺少 Ark 凭据;直接用 raw 插入")
Log.write("[polish] 缺少 Ark 凭据;跳过润色,插入 raw")
await insertText(
text: raw.text,
raw: savedRaw,
mode: mode,
durationMs: raw.durationMs,
dictionaryEntryCount: dictionaryEntries.count
dictionaryEntryCount: dictionaryEntries.count,
polishOutcome: .skippedNoCredentials
)
return
}
Expand All @@ -324,20 +387,18 @@ final class DictationCoordinator {
raw: savedRaw,
mode: mode,
durationMs: raw.durationMs,
dictionaryEntryCount: dictionaryEntries.count
dictionaryEntryCount: dictionaryEntries.count,
polishOutcome: .ok
)
} catch {
Log.write("[polish] 失败: \(error);fallback 用 raw")
// 让用户知道润色失败(最常见原因:Ark 模型 ID 写错)。
// 1.5s 提示后用 raw 兜底插入,避免用户以为是"整理完成"。
capsule.update(state: .error("整理失败 用原文"))
try? await Task.sleep(nanoseconds: 1_500_000_000)
Log.write("[polish] 失败: \(error);插入 raw")
await insertText(
text: raw.text,
raw: savedRaw,
mode: mode,
durationMs: raw.durationMs,
dictionaryEntryCount: dictionaryEntries.count
dictionaryEntryCount: dictionaryEntries.count,
polishOutcome: .failed(String(describing: error))
)
}
}
Expand All @@ -347,35 +408,42 @@ final class DictationCoordinator {
raw: String,
mode: PolishMode,
durationMs: Int?,
dictionaryEntryCount: Int
dictionaryEntryCount: Int,
polishOutcome: PolishOutcome = .ok
) async {
let result = await inserter.insert(text)
let frontApp = NSWorkspace.shared.frontmostApplication
let learned = dictionary.learnTerms(from: text)
if !learned.isEmpty {
Log.write("[dictionary] 自动学习:\(learned.map { $0.phrase }.joined(separator: ", "))")
}
// 润色没真跑时,历史里的 mode 应反映「实际只是 raw」,避免误导。
let savedMode: PolishMode
switch polishOutcome {
case .ok: savedMode = mode
case .skippedNoCredentials, .failed: savedMode = .raw
}
switch result {
case .inserted:
capsule.update(state: .inserted)
Log.write("[insert] OK")
capsule.update(state: capsuleStateForInsert(polishOutcome))
Log.write("[insert] OK (polish=\(polishOutcome.logTag))")
saveSession(
raw: raw,
final: text,
mode: mode,
mode: savedMode,
app: frontApp,
status: .inserted,
errorCode: nil,
durationMs: durationMs,
dictionaryEntryCount: dictionaryEntryCount
)
case .copiedFallback(let reason):
capsule.update(state: .copied)
Log.write("[insert] fallback: \(reason)")
capsule.update(state: capsuleStateForCopy(polishOutcome))
Log.write("[insert] fallback: \(reason) (polish=\(polishOutcome.logTag))")
saveSession(
raw: raw,
final: text,
mode: mode,
mode: savedMode,
app: frontApp,
status: .copiedFallback,
errorCode: reason,
Expand All @@ -391,6 +459,22 @@ final class DictationCoordinator {
}
}

private func capsuleStateForInsert(_ outcome: PolishOutcome) -> CapsuleState {
switch outcome {
case .ok: return .inserted
case .skippedNoCredentials: return .warning("已插入原文 · 未润色")
case .failed: return .warning("润色失败 · 已用原文")
}
}

private func capsuleStateForCopy(_ outcome: PolishOutcome) -> CapsuleState {
switch outcome {
case .ok: return .copied
case .skippedNoCredentials: return .warning("已复制原文 · 未润色 ⌘V")
case .failed: return .warning("润色失败 · 已复制 ⌘V")
}
}

private func runMockPipeline() async {
let mockText = "(演示)请到设置 → 凭据填入火山引擎 ASR + Ark API Key 后才能真实识别"
Log.write("[mock] \(mockText)")
Expand Down
35 changes: 34 additions & 1 deletion Sources/OpenLessApp/Settings/SettingsView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -423,11 +423,12 @@ private struct PasteableCredentialField: View {
let placeholder: String
let secure: Bool
@Binding var text: String
@State private var revealed = false

var body: some View {
HStack(spacing: 8) {
Group {
if secure {
if secure && !revealed {
SecureField(placeholder, text: $text)
} else {
TextField(placeholder, text: $text)
Expand All @@ -436,6 +437,17 @@ private struct PasteableCredentialField: View {
.textFieldStyle(.roundedBorder)
.frame(maxWidth: 390)

if secure {
Button {
revealed.toggle()
} label: {
Image(systemName: revealed ? "eye.slash" : "eye")
}
.buttonStyle(.bordered)
.controlSize(.small)
.help(revealed ? "隐藏密钥" : "显示密钥")
}

Button {
if let value = NSPasteboard.general.string(forType: .string) {
text = value.trimmingCharacters(in: .whitespacesAndNewlines)
Expand Down Expand Up @@ -959,6 +971,7 @@ private struct SettingsHubTab: View {
@State private var arkModelId = ArkCredentials.defaultModelId
@State private var arkEndpoint = ArkCredentials.defaultEndpoint.absoluteString
@State private var trigger: HotkeyBinding.Trigger = UserPreferences.shared.hotkeyTrigger
@State private var hotkeyMode: HotkeyMode = UserPreferences.shared.hotkeyMode
@State private var mode: PolishMode = UserPreferences.shared.polishMode
@State private var hasAccessibility = false
@State private var hasMicrophone = false
Expand Down Expand Up @@ -1021,6 +1034,26 @@ private struct SettingsHubTab: View {
}
}
DividerLine()
SettingsRow(title: "录音方式") {
Picker("录音方式", selection: $hotkeyMode) {
ForEach(HotkeyMode.allCases, id: \.self) { item in
Text(item.displayName).tag(item)
}
}
.labelsHidden()
.pickerStyle(.segmented)
.frame(width: 220, alignment: .leading)
.onChange(of: hotkeyMode) { _, newValue in
UserPreferences.shared.hotkeyMode = newValue
NotificationCenter.default.post(name: .openLessHotkeyChanged, object: nil)
}
}
DividerLine()
Text(hotkeyMode.hint)
.font(.footnote)
.foregroundStyle(.secondary)
.padding(.top, 4)
DividerLine()
SettingsRow(title: "默认模式") {
Picker("模式", selection: $mode) {
ForEach(PolishMode.allCases, id: \.self) { item in
Expand Down
4 changes: 3 additions & 1 deletion Sources/OpenLessApp/Settings/SettingsWindowController.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ final class SettingsWindowController: NSObject, NSWindowDelegate {
win.titleVisibility = .hidden
win.titlebarAppearsTransparent = true
win.toolbar = nil
win.isMovableByWindowBackground = true
// 只允许拖动原生顶栏区域(含 traffic lights 那条带);
// 否则 TextField 上的拖选手势会被整窗拖动吞掉。
win.isMovableByWindowBackground = false
win.setContentSize(NSSize(width: 1040, height: 700))
win.contentMinSize = NSSize(width: 960, height: 640)
win.tabbingMode = .disallowed
Expand Down
22 changes: 22 additions & 0 deletions Sources/OpenLessCore/HotkeyMode.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import Foundation

public enum HotkeyMode: String, Codable, Sendable, Equatable, CaseIterable {
/// 按一次开始,按一次结束。短按门槛低、适合长口述。
case toggle
/// 按住录音、松手即停。适合短促、连续的口播(同 Wispr Flow / Typeless 默认行为)。
case hold

public var displayName: String {
switch self {
case .toggle: return "切换式"
case .hold: return "按住说话"
}
}

public var hint: String {
switch self {
case .toggle: return "按一次开始录音,再按一次结束。"
case .hold: return "按住快捷键说话,松开立即停止。适合短句。"
}
}
}
8 changes: 5 additions & 3 deletions Sources/OpenLessHotkey/HotkeyEvent.swift
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import Foundation

public enum HotkeyEvent: Sendable, Equatable {
/// Toggle 模式下:每次触发键按下时触发一次。
case toggled
/// 录音中按 Esc
/// 触发键按下边沿。toggle 模式下解释为「开始/结束」翻转,hold 模式下解释为「开始」。
case pressed
/// 触发键松开边沿。toggle 模式忽略;hold 模式解释为「结束」。
case released
/// 录音中按 Esc。
case cancelled
}
7 changes: 4 additions & 3 deletions Sources/OpenLessHotkey/HotkeyMonitor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ public final class HotkeyMonitor: HotkeyServiceProtocol {
private var eventTap: CFMachPort?
private var runLoopSource: CFRunLoopSource?
private var triggerHeld = false
/// Toggle 状态:每次触发键按下时发出 .toggled
/// 主链路侧负责把第一次 toggled 解释为"开始",第二次为"结束"
/// 边沿事件:触发键按下时发 .pressed,松开时发 .released
/// toggle / hold 的解释由协调器侧(DictationCoordinator)按用户偏好做

public init() {
var captured: AsyncStream<HotkeyEvent>.Continuation!
Expand Down Expand Up @@ -105,9 +105,10 @@ public final class HotkeyMonitor: HotkeyServiceProtocol {

if triggerActive && !triggerHeld {
triggerHeld = true
continuation.yield(.toggled)
continuation.yield(.pressed)
} else if !triggerActive && triggerHeld {
triggerHeld = false
continuation.yield(.released)
}

// fn 默认拦截,规避系统 Globe 行为
Expand Down
9 changes: 9 additions & 0 deletions Sources/OpenLessPersistence/UserPreferences.swift
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ public final class UserPreferences: @unchecked Sendable {
private enum Key {
static let polishMode = "openless.polish_mode"
static let hotkeyTrigger = "openless.hotkey_trigger"
static let hotkeyMode = "openless.hotkey_mode"
static let hasCompletedOnboarding = "openless.onboarding_completed"
}

Expand All @@ -29,6 +30,14 @@ public final class UserPreferences: @unchecked Sendable {
set { defaults.set(newValue.rawValue, forKey: Key.hotkeyTrigger) }
}

public var hotkeyMode: HotkeyMode {
get {
let raw = defaults.string(forKey: Key.hotkeyMode) ?? HotkeyMode.toggle.rawValue
return HotkeyMode(rawValue: raw) ?? .toggle
}
set { defaults.set(newValue.rawValue, forKey: Key.hotkeyMode) }
}

public var hasCompletedOnboarding: Bool {
get { defaults.bool(forKey: Key.hasCompletedOnboarding) }
set { defaults.set(newValue, forKey: Key.hasCompletedOnboarding) }
Expand Down
Loading