Skip to content

Commit ead96e3

Browse files
committed
feat: add Parakeet MLX sidecar integration
- Implement Parakeet MLX sidecar for offline transcription - Add Python sidecar with Pydantic v2 compatibility - Fix SegmentPayload structure for AlignedSentence handling - Add build script for PyInstaller bundling - Integrate with Tauri command system - Add .gitignore for build artifacts
1 parent 0748230 commit ead96e3

33 files changed

+2205
-390
lines changed

scripts/build-parakeet-sidecar.sh

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
# Build the Parakeet MLX sidecar as a standalone binary using uv + PyInstaller.
5+
6+
ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)
7+
SIDECAR_DIR="$ROOT_DIR/sidecar/parakeet"
8+
DIST_DIR="$SIDECAR_DIR/dist"
9+
10+
cd "$SIDECAR_DIR"
11+
12+
# Ensure dependencies for the build group are installed
13+
uv sync --group build
14+
15+
# Clean previous artifacts
16+
rm -rf "$DIST_DIR"
17+
18+
# Build the single-file sidecar binary
19+
uv run --group build pyinstaller \
20+
--clean \
21+
--onefile \
22+
--name parakeet-sidecar \
23+
--hidden-import mlx._reprlib_fix \
24+
--collect-submodules mlx \
25+
--collect-submodules parakeet_mlx \
26+
--collect-data parakeet_mlx \
27+
--collect-data mlx \
28+
src/parakeet_sidecar/main.py
29+
30+
# Create a target-suffixed copy for Tauri bundling
31+
HOST_TRIPLE=$(rustc -vV | sed -n 's/^host: //p')
32+
BIN_PATH="$DIST_DIR/parakeet-sidecar"
33+
SUFFIXED_PATH="$DIST_DIR/parakeet-sidecar-$HOST_TRIPLE"
34+
35+
if [[ -f "$BIN_PATH" ]];
36+
then
37+
cp "$BIN_PATH" "$SUFFIXED_PATH"
38+
echo "Created suffixed binary: $SUFFIXED_PATH"
39+
else
40+
echo "ERROR: sidecar binary not found at $BIN_PATH" >&2
41+
exit 1
42+
fi
43+
44+
echo "Parakeet sidecar built at $DIST_DIR"

sidecar/parakeet/.gitignore

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Python
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
*.egg-info/
6+
*.egg
7+
.venv/
8+
venv/
9+
10+
# PyInstaller build artifacts
11+
build/
12+
dist/
13+
*.spec
14+
15+
# UV
16+
uv.lock
17+
.uv/
18+
19+
# IDE
20+
.idea/
21+
.vscode/
22+
*.swp
23+
*.swo
24+
25+
# OS
26+
.DS_Store
27+
Thumbs.db

sidecar/parakeet/pyproject.toml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
[project]
2+
name = "parakeet-sidecar"
3+
version = "0.1.0"
4+
description = "VoiceTypr Parakeet MLX transcription sidecar"
5+
authors = [{name = "VoiceTypr"}]
6+
requires-python = ">=3.11"
7+
dependencies = [
8+
"parakeet-mlx>=0.3.0",
9+
"mlx>=0.25.3",
10+
"huggingface-hub>=0.23",
11+
"soundfile>=0.12",
12+
"numpy>=1.26",
13+
"pydantic>=2.8",
14+
"uvloop>=0.19; platform_system == 'Linux'",
15+
"orjson>=3.10"
16+
]
17+
18+
[dependency-groups]
19+
dev = ["pytest>=8", "ruff>=0.5", "mypy>=1.11"]
20+
build = ["pyinstaller>=6.8"]
21+
22+
[project.scripts]
23+
parakeet-sidecar = "parakeet_sidecar.main:run"
24+
25+
[tool.uv]
26+
package = true
27+
28+
[build-system]
29+
requires = ["setuptools>=69"]
30+
build-backend = "setuptools.build_meta"
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
"""Parakeet MLX transcription sidecar for VoiceTypr."""
2+
3+
from .main import run # re-export for entry point discovery
4+
5+
__all__ = ["run"]
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
"""Entry point for the Parakeet MLX transcription sidecar."""
2+
from __future__ import annotations
3+
4+
import logging
5+
import signal
6+
import sys
7+
from typing import Any, Dict
8+
9+
import orjson
10+
from pydantic import ValidationError
11+
12+
from parakeet_sidecar.messages import (
13+
CommandRequest,
14+
ErrorResponse,
15+
LoadModelRequest,
16+
OkResponse,
17+
ShutdownRequest,
18+
StatusRequest,
19+
StatusResponse,
20+
TranscribeRequest,
21+
TranscriptionResponse,
22+
UnloadModelRequest,
23+
parse_command,
24+
)
25+
from parakeet_sidecar.model_manager import ModelManager
26+
27+
LOGGER = logging.getLogger("parakeet_sidecar")
28+
29+
30+
def configure_logging() -> None:
31+
logging.basicConfig(
32+
level=logging.INFO,
33+
format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
34+
)
35+
36+
37+
def write_response(response: Any) -> None:
38+
try:
39+
payload = orjson.dumps(response, default=_pydantic_default)
40+
except Exception as exc: # pragma: no cover - catastrophic failure
41+
LOGGER.exception("Failed to serialise response: %s", exc)
42+
payload = orjson.dumps(
43+
ErrorResponse(message="Internal serialisation error", code="serialisation_error"),
44+
default=_pydantic_default,
45+
)
46+
sys.stdout.buffer.write(payload + b"\n")
47+
sys.stdout.flush()
48+
49+
50+
def _pydantic_default(value: Any) -> Any:
51+
if hasattr(value, "model_dump"):
52+
return value.model_dump()
53+
if hasattr(value, "dict"):
54+
return value.dict()
55+
raise TypeError(f"Object of type {type(value)} is not JSON serialisable")
56+
57+
58+
def handle_command(manager: ModelManager, command: CommandRequest) -> None:
59+
try:
60+
if isinstance(command, LoadModelRequest):
61+
manager.load(command)
62+
write_response(
63+
OkResponse(
64+
command="load_model",
65+
payload={
66+
"model_id": command.model_id,
67+
"precision": command.precision,
68+
"attention": command.attention,
69+
},
70+
)
71+
)
72+
return
73+
74+
if isinstance(command, UnloadModelRequest):
75+
manager.unload()
76+
write_response(OkResponse(command="unload_model"))
77+
return
78+
79+
if isinstance(command, TranscribeRequest):
80+
result = manager.transcribe(command)
81+
write_response(result)
82+
return
83+
84+
if isinstance(command, StatusRequest):
85+
status = manager.status()
86+
write_response(StatusResponse(**status))
87+
return
88+
89+
if isinstance(command, ShutdownRequest):
90+
manager.unload()
91+
write_response(OkResponse(command="shutdown"))
92+
raise SystemExit(0)
93+
94+
except FileNotFoundError as exc:
95+
LOGGER.error("File not found: %s", exc)
96+
write_response(
97+
ErrorResponse(
98+
code="file_not_found",
99+
message=str(exc),
100+
)
101+
)
102+
except ValidationError as exc:
103+
LOGGER.error("Validation error: %s", exc)
104+
write_response(
105+
ErrorResponse(
106+
code="validation_error",
107+
message="Invalid command payload",
108+
details={"errors": exc.errors()},
109+
)
110+
)
111+
except RuntimeError as exc:
112+
LOGGER.error("Runtime error: %s", exc)
113+
write_response(
114+
ErrorResponse(
115+
code="runtime_error",
116+
message=str(exc),
117+
)
118+
)
119+
except Exception as exc: # pragma: no cover - defensive
120+
LOGGER.exception("Unhandled exception while processing command")
121+
write_response(
122+
ErrorResponse(
123+
code="internal_error",
124+
message=str(exc),
125+
)
126+
)
127+
128+
129+
def event_loop(manager: ModelManager) -> None:
130+
for raw_line in sys.stdin:
131+
line = raw_line.strip()
132+
if not line:
133+
continue
134+
135+
try:
136+
payload: Dict[str, Any] = orjson.loads(line)
137+
command = parse_command(payload)
138+
except ValidationError as exc:
139+
LOGGER.error("Received invalid payload: %s", exc)
140+
write_response(
141+
ErrorResponse(
142+
code="validation_error",
143+
message="Invalid payload",
144+
details={"errors": exc.errors()},
145+
)
146+
)
147+
continue
148+
except ValueError as exc:
149+
LOGGER.error("Failed to parse payload: %s", exc)
150+
write_response(ErrorResponse(code="parse_error", message=str(exc)))
151+
continue
152+
except Exception as exc: # pragma: no cover - defensive path
153+
LOGGER.exception("Unexpected error decoding JSON")
154+
write_response(ErrorResponse(code="parse_error", message=str(exc)))
155+
continue
156+
157+
handle_command(manager, command)
158+
159+
160+
def _graceful_shutdown(signum: int, _frame: Any) -> None: # pragma: no cover - signal path
161+
LOGGER.info("Received signal %s, shutting down", signum)
162+
raise SystemExit(0)
163+
164+
165+
def run() -> None:
166+
configure_logging()
167+
manager = ModelManager()
168+
169+
# Register signal handlers for graceful shutdown
170+
for sig in (signal.SIGINT, signal.SIGTERM):
171+
signal.signal(sig, _graceful_shutdown)
172+
173+
try:
174+
event_loop(manager)
175+
except SystemExit:
176+
pass
177+
except Exception: # pragma: no cover - ensure exit
178+
LOGGER.exception("Fatal error running sidecar loop")
179+
raise
180+
181+
182+
if __name__ == "__main__": # pragma: no cover
183+
run()

0 commit comments

Comments
 (0)