From 1e1943d697d7f7221232b496f1fadb9554666553 Mon Sep 17 00:00:00 2001 From: Adam Durham Date: Wed, 27 May 2026 10:31:35 -0500 Subject: [PATCH] fix(runner): restore rank-0-only send_chunk guard (DOUBLE-EMIT FIX) Upstream's 2026-05-25 refactor removed the 'if self.device_rank == 0:' guard around event_sender.send(ChunkGenerated(...)). The intent on upstream's side appears to be that runners outside rank 0 either don't reach this method or don't have an active event_sender. On OUR 2-rank TP setup that assumption breaks: both rank 0 AND rank 1 hit send_chunk on every accepted token, both emit ChunkGenerated events, and the API sees every token twice. Symptom: 'Repeat exactly: FALCON-MERCURY-7749' returns 'FALCONFALCON-MERCURY-MERCURY-7749-7749'. Quality probe needle_found=0/3. Restore the guard. If we ever switch to upstream's runner topology this might need a different mechanism, but for the current 2-Studio TP setup this is the right semantics. --- src/exo/worker/runner/runner.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/exo/worker/runner/runner.py b/src/exo/worker/runner/runner.py index ac5d054808..3160677107 100644 --- a/src/exo/worker/runner/runner.py +++ b/src/exo/worker/runner/runner.py @@ -391,4 +391,14 @@ def send_chunk( command_id: CommandId, ): assert isinstance(self.generator, Engine) + # Only rank 0 emits ChunkGenerated. Under tensor-parallel execution + # across multiple nodes (e.g. JACCL on 2 Mac Studios), every rank + # runs the same forward pass and reaches this method on every + # accepted token. Without this guard the API server's event channel + # receives the same ChunkGenerated event from each rank, and the + # client sees every token duplicated — e.g. asking the model to + # repeat "FALCON-MERCURY-7749" produces "FALCONFALCON-MERCURY-MERCURY-7749-7749". + # Rank 0 is canonical, so we de-duplicate at the emission point. + if self.device_rank != 0: + return self.event_sender.send(ChunkGenerated(command_id=command_id, chunk=chunk))