Skip to content

Commit 3fe6a84

Browse files
fix(vis): use Worker thread for ELK layout to fix stack overflow on large graphs
V8's --stack-size flag silently caps at values well below what's requested, causing "Maximum call stack size exceeded" on 1M+ node graphs. Switch to Node.js Worker threads with resourceLimits.stackSizeMb, which reliably delivers the requested stack size at the V8 isolate level. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 2c15b6b commit 3fe6a84

File tree

1 file changed

+31
-7
lines changed

1 file changed

+31
-7
lines changed

torchlens/visualization/elk_layout.py

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,17 +40,37 @@ def _unlimit_stack():
4040

4141
# Inline Node.js script that reads ELK JSON from stdin, runs layout, writes to stdout.
4242
_ELK_LAYOUT_SCRIPT = r"""
43+
const { Worker } = require('worker_threads');
44+
45+
// Run ELK layout in a worker thread with a large stack via resourceLimits.
46+
// resourceLimits.stackSizeMb is far more reliable than the --stack-size V8
47+
// flag for preventing "Maximum call stack size exceeded" in deeply recursive
48+
// ELK layout on large graphs (100k+ nodes).
49+
const stackMb = parseInt(process.env._TL_STACK_MB || '64', 10);
50+
51+
const workerCode = `
52+
const { parentPort, workerData } = require('worker_threads');
4353
const ELK = require('elkjs');
4454
const elk = new ELK();
55+
const graph = JSON.parse(workerData);
56+
elk.layout(graph).then((result) => {
57+
parentPort.postMessage(JSON.stringify(result));
58+
}).catch((err) => { throw err; });
59+
`;
4560
4661
let input = '';
4762
process.stdin.setEncoding('utf8');
4863
process.stdin.on('data', (chunk) => { input += chunk; });
4964
process.stdin.on('end', () => {
50-
const graph = JSON.parse(input);
51-
elk.layout(graph).then((result) => {
52-
process.stdout.write(JSON.stringify(result));
53-
}).catch((err) => {
65+
const worker = new Worker(workerCode, {
66+
eval: true,
67+
workerData: input,
68+
resourceLimits: { stackSizeMb: stackMb },
69+
});
70+
worker.on('message', (result) => {
71+
process.stdout.write(result);
72+
});
73+
worker.on('error', (err) => {
5474
process.stderr.write(err.toString());
5575
process.exit(1);
5676
});
@@ -387,22 +407,26 @@ def run_elk_layout(elk_graph: dict, timeout: Optional[int] = None) -> dict:
387407
graph_json = json.dumps(elk_graph)
388408
graph_kb = len(graph_json) // 1024
389409
heap_mb = max(16384, graph_kb * 48) # ~48x JSON size, 16GB floor
390-
stack_kb = max(4194304, graph_kb * 64) # ~64x JSON size, 4GB floor
410+
# Worker thread stack via resourceLimits.stackSizeMb (MB).
411+
# Much more reliable than --stack-size for deeply recursive ELK layout.
412+
stack_mb = max(64, graph_kb // 8) # ~128 bytes/KB of JSON, 64MB floor
413+
414+
env = _node_env()
415+
env["_TL_STACK_MB"] = str(stack_mb)
391416

392417
try:
393418
result = subprocess.run(
394419
[
395420
"node",
396421
f"--max-old-space-size={heap_mb}",
397-
f"--stack-size={stack_kb}",
398422
"-e",
399423
_ELK_LAYOUT_SCRIPT,
400424
],
401425
input=graph_json,
402426
capture_output=True,
403427
text=True,
404428
timeout=timeout,
405-
env=_node_env(),
429+
env=env,
406430
preexec_fn=_unlimit_stack,
407431
)
408432
except FileNotFoundError:

0 commit comments

Comments
 (0)