From 9f67b4fabd8bedd28f7436a7ff7170c9371731d5 Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Fri, 16 Jan 2026 15:28:08 +0100
Subject: [PATCH 01/25] wip http + ws transport for barge in

---
 agents/package.json                   |   1 +
 agents/src/utils/http_transport.ts    |  61 ++++++
 agents/src/utils/ws_transport.test.ts | 282 ++++++++++++++++++++++++++
 agents/src/utils/ws_transport.ts      |  22 ++
 pnpm-lock.yaml                        |  27 +++
 5 files changed, 393 insertions(+)
 create mode 100644 agents/src/utils/http_transport.ts
 create mode 100644 agents/src/utils/ws_transport.test.ts
 create mode 100644 agents/src/utils/ws_transport.ts
diff --git a/agents/package.json b/agents/package.json
index 51d539b6f..62cfcb0f7 100644
--- a/agents/package.json
+++ b/agents/package.json
@@ -69,6 +69,7 @@
     "heap-js": "^2.6.0",
     "json-schema": "^0.4.0",
     "livekit-server-sdk": "^2.14.1",
+    "ofetch": "^1.5.1",
     "openai": "^6.8.1",
     "pidusage": "^4.0.1",
     "pino": "^8.19.0",
diff --git a/agents/src/utils/http_transport.ts b/agents/src/utils/http_transport.ts
new file mode 100644
index 000000000..8a7750ec6
--- /dev/null
+++ b/agents/src/utils/http_transport.ts
@@ -0,0 +1,61 @@
+import { ofetch } from 'ofetch';
+
+export interface PostOptions {
+  baseUrl: string;
+  token: string;
+  signal?: AbortSignal;
+  timeout?: number;
+}
+
+export interface PredictOptions {
+  threshold: number;
+  minFrames: number;
+}
+
+export interface PredictEndpointResponse {
+  created_at: number;
+  is_bargein: boolean;
+  probabilities: number[];
+}
+
+export interface PredictResponse {
+  createdAt: number;
+  isBargein: boolean;
+  probabilities: number[];
+  predictionDuration: number;
+}
+
+export async function predict(
+  data: Uint8Array,
+  predictOptions: PredictOptions,
+  options: PostOptions,
+): Promise<PredictResponse> {
+  const createdAt = performance.now();
+  const url = new URL(`/bargein`, options.baseUrl);
+  url.searchParams.append('threshold', predictOptions.threshold.toString());
+  url.searchParams.append('min_frames', predictOptions.minFrames.toFixed());
+  url.searchParams.append('created_at', createdAt.toFixed());
+
+  const { created_at, is_bargein, probabilities } = await ofetch<PredictEndpointResponse>(
+    url.toString(),
+    {
+      retry: 1,
+      retryDelay: 100,
+      headers: {
+        'Content-Type': 'application/octet-stream',
+        Authorization: `Bearer ${options.token}`,
+      },
+      signal: options.signal,
+      timeout: options.timeout,
+      method: 'POST',
+      body: data,
+    },
+  );
+
+  return {
+    createdAt: created_at,
+    isBargein: is_bargein,
+    probabilities,
+    predictionDuration: (performance.now() - createdAt) / 1e9,
+  };
+}
diff --git a/agents/src/utils/ws_transport.test.ts b/agents/src/utils/ws_transport.test.ts
new file mode 100644
index 000000000..77c5fdc91
--- /dev/null
+++ b/agents/src/utils/ws_transport.test.ts
@@ -0,0 +1,282 @@
+// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { describe, expect, it } from 'vitest';
+import { WebSocket, WebSocketServer } from 'ws';
+import { webSocketStream } from './ws_transport.js';
+
+describe('webSocketStream', () => {
+  describe('readable stream', () => {
+    it('receives messages from the WebSocket', async () => {
+      const wss = await new Promise<WebSocketServer>((resolve) => {
+        const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
+      });
+
+      const port = (wss.address() as { port: number }).port;
+
+      wss.on('connection', (serverWs) => {
+        serverWs.send('hello');
+        serverWs.send('world');
+        serverWs.close();
+      });
+
+      const { readable } = webSocketStream(`ws://localhost:${port}`);
+      const reader = readable.getReader();
+
+      const messages: string[] = [];
+      try {
+        while (true) {
+          const { done, value } = await reader.read();
+          if (done) break;
+          messages.push(Buffer.from(value).toString());
+        }
+      } finally {
+        reader.releaseLock();
+      }
+
+      expect(messages).toEqual(['hello', 'world']);
+
+      wss.close();
+    });
+
+    it('handles binary messages', async () => {
+      const wss = await new Promise<WebSocketServer>((resolve) => {
+        const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
+      });
+
+      const port = (wss.address() as { port: number }).port;
+
+      const binaryData = new Uint8Array([1, 2, 3, 4, 5]);
+
+      wss.on('connection', (serverWs) => {
+        serverWs.send(binaryData);
+        serverWs.close();
+      });
+
+      const { readable } = webSocketStream(`ws://localhost:${port}`);
+      const reader = readable.getReader();
+
+      const chunks: Uint8Array[] = [];
+      try {
+        while (true) {
+          const { done, value } = await reader.read();
+          if (done) break;
+          chunks.push(new Uint8Array(value));
+        }
+      } finally {
+        reader.releaseLock();
+      }
+
+      expect(chunks).toHaveLength(1);
+      expect(Array.from(chunks[0]!)).toEqual([1, 2, 3, 4, 5]);
+
+      wss.close();
+    });
+
+    it('handles empty stream when connection closes immediately', async () => {
+      const wss = await new Promise<WebSocketServer>((resolve) => {
+        const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
+      });
+
+      const port = (wss.address() as { port: number }).port;
+
+      wss.on('connection', (serverWs) => {
+        serverWs.close();
+      });
+      const { readable } = webSocketStream(`ws://localhost:${port}`);
+      const reader = readable.getReader();
+
+      const chunks: Uint8Array[] = [];
+      try {
+        while (true) {
+          const { done, value } = await reader.read();
+          if (done) break;
+          chunks.push(value);
+        }
+      } finally {
+        reader.releaseLock();
+      }
+
+      expect(chunks).toEqual([]);
+
+      wss.close();
+    });
+  });
+
+  describe('writable stream', () => {
+    it('sends messages through the WebSocket', async () => {
+      const wss = await new Promise<WebSocketServer>((resolve) => {
+        const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
+      });
+
+      const port = (wss.address() as { port: number }).port;
+      const ws = new WebSocket(`ws://localhost:${port}`);
+
+      const connected = new Promise<void>((resolve) => {
+        ws.on('open', resolve);
+      });
+
+      const messagesReceived: string[] = [];
+      const serverClosed = new Promise<void>((resolve) => {
+        wss.on('connection', (serverWs) => {
+          serverWs.on('message', (data) => {
+            messagesReceived.push(data.toString());
+          });
+          serverWs.on('close', resolve);
+        });
+      });
+
+      await connected;
+      const { writable } = webSocketStream(`ws://localhost:${port}`);
+      const writer = writable.getWriter();
+
+      await writer.write(new TextEncoder().encode('hello'));
+      await writer.write(new TextEncoder().encode('world'));
+      await writer.close();
+
+      await serverClosed;
+
+      expect(messagesReceived).toEqual(['hello', 'world']);
+
+      wss.close();
+    });
+
+    it('sends binary data through the WebSocket', async () => {
+      const wss = await new Promise<WebSocketServer>((resolve) => {
+        const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
+      });
+
+      const port = (wss.address() as { port: number }).port;
+
+      const chunksReceived: Buffer[] = [];
+      const serverClosed = new Promise<void>((resolve) => {
+        wss.on('connection', (serverWs) => {
+          serverWs.on('message', (data) => {
+            chunksReceived.push(Buffer.from(data as Buffer));
+          });
+          serverWs.on('close', resolve);
+        });
+      });
+
+      const { writable } = webSocketStream(`ws://localhost:${port}`);
+      const writer = writable.getWriter();
+
+      const binaryData = new Uint8Array([10, 20, 30, 40, 50]);
+      await writer.write(binaryData);
+      await writer.close();
+
+      await serverClosed;
+
+      expect(chunksReceived).toHaveLength(1);
+      expect(Array.from(chunksReceived[0]!)).toEqual([10, 20, 30, 40, 50]);
+
+      wss.close();
+    });
+
+    it('buffers writes if readyState is CONNECTING', async () => {
+      const wss = await new Promise<WebSocketServer>((resolve) => {
+        const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
+      });
+
+      const port = (wss.address() as { port: number }).port;
+
+      const { writable } = webSocketStream(`ws://localhost:${port}`);
+      const writer = writable.getWriter();
+
+      const messagesReceived: string[] = [];
+      const serverClosed = new Promise<void>((resolve) => {
+        wss.on('connection', (serverWs) => {
+          serverWs.on('message', (data) => {
+            messagesReceived.push(data.toString());
+          });
+          serverWs.on('close', resolve);
+        });
+      });
+
+      // These writes should be buffered
+      await writer.write(new TextEncoder().encode('buffered message'));
+      await writer.close();
+
+      await serverClosed;
+
+      expect(messagesReceived).toEqual(['buffered message']);
+
+      wss.close();
+    });
+  });
+
+  describe('bidirectional communication', () => {
+    it('supports echo pattern with readable and writable', async () => {
+      const wss = await new Promise<WebSocketServer>((resolve) => {
+        const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
+      });
+
+      const port = (wss.address() as { port: number }).port;
+
+      // Server echoes messages back
+      wss.on('connection', (serverWs) => {
+        serverWs.on('message', (data) => {
+          serverWs.send(data);
+        });
+      });
+
+      const { readable, writable } = webSocketStream(`ws://localhost:${port}`);
+      const writer = writable.getWriter();
+      const reader = readable.getReader();
+
+      // Send messages
+      await writer.write(new TextEncoder().encode('ping1'));
+      await writer.write(new TextEncoder().encode('ping2'));
+
+      // Read echoed responses
+      const { value: response1 } = await reader.read();
+      const { value: response2 } = await reader.read();
+
+      expect(Buffer.from(response1!).toString()).toBe('ping1');
+      expect(Buffer.from(response2!).toString()).toBe('ping2');
+
+      reader.releaseLock();
+      await writer.close();
+
+      wss.close();
+    });
+  });
+
+  describe('error handling', () => {
+    it('readable stream ends when WebSocket closes unexpectedly', async () => {
+      const wss = await new Promise<WebSocketServer>((resolve) => {
+        const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
+      });
+
+      const port = (wss.address() as { port: number }).port;
+
+      wss.on('connection', (serverWs) => {
+        serverWs.send('before close');
+        // Terminate connection abruptly
+        serverWs.terminate();
+      });
+
+      const { readable } = webSocketStream(`ws://localhost:${port}`);
+      const reader = readable.getReader();
+
+      const chunks: string[] = [];
+      try {
+        while (true) {
+          const { done, value } = await reader.read();
+          if (done) break;
+          chunks.push(Buffer.from(value).toString());
+        }
+      } catch (error) {
+        console.error(error);
+        // Connection terminated, stream may error
+      } finally {
+        reader.releaseLock();
+      }
+
+      // Should have received the message sent before termination
+      expect(chunks).toContain('before close');
+
+      wss.close();
+    });
+  });
+});
diff --git a/agents/src/utils/ws_transport.ts b/agents/src/utils/ws_transport.ts
new file mode 100644
index 000000000..4af4f906b
--- /dev/null
+++ b/agents/src/utils/ws_transport.ts
@@ -0,0 +1,22 @@
+import { Readable, Writable } from 'node:stream';
+import WebSocket, { createWebSocketStream } from 'ws';
+
+export function webSocketStream(wsUrl: string) {
+  const ws = new WebSocket(wsUrl);
+  const duplex = createWebSocketStream(ws);
+  duplex.on('error', console.error);
+
+  // End the write side when the read side ends to properly close the stream.
+  // This is needed because Readable.toWeb() waits for both sides of the duplex
+  // to close before signaling done on the ReadableStream.
+  duplex.on('end', () => {
+    duplex.end();
+  });
+
+  // Convert the writable side
+  const writable = Writable.toWeb(duplex);
+  // Convert the readable side
+  const readable = Readable.toWeb(duplex);
+
+  return { readable, writable, close: ws.close };
+}
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 1dce72646..6dc766f85 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -163,6 +163,9 @@ importers:
       livekit-server-sdk:
         specifier: ^2.14.1
         version: 2.14.1
+      ofetch:
+        specifier: ^1.5.1
+        version: 1.5.1
       openai:
         specifier: ^6.8.1
         version: 6.8.1(ws@8.18.3)(zod@3.25.76)
@@ -3145,6 +3148,9 @@ packages:
     resolution: {integrity: sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==}
     engines: {node: '>=6'}
 
+  destr@2.0.5:
+    resolution: {integrity: sha512-ugFTXCtDZunbzasqBxrK93Ik/DRYsO6S/fedkWEMKqt04xZ4csmnmwGDBAb07QWNaGMAmnTIemsYZCksjATwsA==}
+
   detect-indent@6.1.0:
     resolution: {integrity: sha512-reYkTUJAZb9gUuZ2RvVCNhVHdg62RHnJ7WJl8ftMi4diZ6NWlciOzQN88pUhSELEwflJht4oQDv0F0BMlwaYtA==}
     engines: {node: '>=8'}
@@ -4285,6 +4291,9 @@ packages:
     engines: {node: '>=10.5.0'}
     deprecated: Use your platform's native DOMException instead
 
+  node-fetch-native@1.6.7:
+    resolution: {integrity: sha512-g9yhqoedzIUm0nTnTqAQvueMPVOuIY16bqgAJJC8XOOubYFNwz6IER9qs0Gq2Xd0+CecCKFjtdDTMA4u4xG06Q==}
+
   node-fetch@2.7.0:
     resolution: {integrity: sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==}
     engines: {node: 4.x || >=6.0.0}
@@ -4340,6 +4349,9 @@ packages:
   obug@2.1.1:
     resolution: {integrity: sha512-uTqF9MuPraAQ+IsnPf366RG4cP9RtUi7MLO1N3KEc+wb0a6yKpeL0lmk2IB1jY5KHPAlTc6T/JRdC/YqxHNwkQ==}
 
+  ofetch@1.5.1:
+    resolution: {integrity: sha512-2W4oUZlVaqAPAil6FUg/difl6YhqhUR7x2eZY4bQCko22UXg3hptq9KLQdqFClV+Wu85UX7hNtdGTngi/1BxcA==}
+
   on-exit-leak-free@2.1.2:
     resolution: {integrity: sha512-0eJJY6hXLGf1udHwfNftBqH+g73EU4B504nZeKpz1sYRKafAghwxEJunB2O7rDZkL4PGfsMVnTXZ2EjibbqcsA==}
     engines: {node: '>=14.0.0'}
@@ -5179,6 +5191,9 @@ packages:
   ufo@1.5.3:
     resolution: {integrity: sha512-Y7HYmWaFwPUmkoQCUIAYpKqkOf+SbVj/2fJJZ4RJMCfZp0rTGwRbzQD+HghfnhKOjL9E01okqz+ncJskGYfBNw==}
 
+  ufo@1.6.3:
+    resolution: {integrity: sha512-yDJTmhydvl5lJzBmy/hyOAA0d+aqCBuwl818haVdYCRrWV84o7YyeVm4QlVHStqNrrJSTb6jKuFAVqAFsr+K3Q==}
+
   unbox-primitive@1.0.2:
     resolution: {integrity: sha512-61pPlCD9h51VoreyJ0BReideM3MDKMKnh6+V9L08331ipq6Q8OFXZYiqP6n/tbHx4s5I9uRhcye6BrbkizkBDw==}
 
@@ -7639,6 +7654,8 @@ snapshots:
 
   dequal@2.0.3: {}
 
+  destr@2.0.5: {}
+
   detect-indent@6.1.0: {}
 
   detect-libc@2.0.4: {}
@@ -8981,6 +8998,8 @@ snapshots:
 
   node-domexception@1.0.0: {}
 
+  node-fetch-native@1.6.7: {}
+
   node-fetch@2.7.0:
     dependencies:
       whatwg-url: 5.0.0
@@ -9041,6 +9060,12 @@ snapshots:
 
   obug@2.1.1: {}
 
+  ofetch@1.5.1:
+    dependencies:
+      destr: 2.0.5
+      node-fetch-native: 1.6.7
+      ufo: 1.6.3
+
   on-exit-leak-free@2.1.2: {}
 
   once@1.4.0:
@@ -10048,6 +10073,8 @@ snapshots:
 
   ufo@1.5.3: {}
 
+  ufo@1.6.3: {}
+
   unbox-primitive@1.0.2:
     dependencies:
       call-bind: 1.0.7

From cf3d72347c4f069de50eac7b0418a1e325b074cb Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Tue, 20 Jan 2026 13:51:01 +0100
Subject: [PATCH 02/25] refactor

---
 .../AdaptiveInterruptionDetector.ts           | 119 ++++++
 .../interruption/InterruptionStream.ts        | 350 ++++++++++++++++++
 agents/src/inference/interruption/defaults.ts |  33 ++
 agents/src/inference/interruption/errors.ts   |   0
 .../interruption}/http_transport.ts           |   4 +-
 .../inference/interruption/interruption.ts    |  87 +++++
 agents/src/inference/utils.test.ts            |  31 ++
 agents/src/inference/utils.ts                 |  15 +
 agents/src/stream/stream_channel.ts           |   8 +-
 9 files changed, 643 insertions(+), 4 deletions(-)
 create mode 100644 agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
 create mode 100644 agents/src/inference/interruption/InterruptionStream.ts
 create mode 100644 agents/src/inference/interruption/defaults.ts
 create mode 100644 agents/src/inference/interruption/errors.ts
 rename agents/src/{utils => inference/interruption}/http_transport.ts (96%)
 create mode 100644 agents/src/inference/interruption/interruption.ts
 create mode 100644 agents/src/inference/utils.test.ts

diff --git a/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts b/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
new file mode 100644
index 000000000..a2181cdda
--- /dev/null
+++ b/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
@@ -0,0 +1,119 @@
+import type { TypedEventEmitter } from '@livekit/typed-emitter';
+import EventEmitter from 'events';
+import {
+  DEFAULT_BASE_URL,
+  FRAMES_PER_SECOND,
+  SAMPLE_RATE,
+  interruptionOptionDefaults,
+} from './defaults.js';
+import type { InterruptionDetectionError } from './interruption.js';
+
+type InterruptionCallbacks = {
+  interruptionDetected: () => void;
+  overlapSpeechDetected: () => void;
+  error: (error: InterruptionDetectionError) => void;
+};
+
+export interface InterruptionOptions {
+  sampleRate: number;
+  threshold: number;
+  minFrames: number;
+  maxAudioDuration: number;
+  audioPrefixDuration: number;
+  detectionInterval: number;
+  inferenceTimeout: number;
+  minInterruptionDuration: number;
+  baseUrl: string;
+  apiKey: string;
+  apiSecret: string;
+  useProxy: boolean;
+}
+
+export type AdaptiveInterruptionDetectorOptions = Partial<InterruptionOptions>;
+
+export class AdaptiveInterruptionDetector extends (EventEmitter as new () => TypedEventEmitter<InterruptionCallbacks>) {
+  options: InterruptionOptions;
+  private label: string;
+  private streams: WeakSet<object>; // TODO: Union of InterruptionHttpStream | InterruptionWebSocketStream
+
+  constructor(options: AdaptiveInterruptionDetectorOptions = {}) {
+    super();
+
+    const {
+      maxAudioDuration,
+      baseUrl,
+      apiKey,
+      apiSecret,
+      useProxy: useProxyArg,
+      audioPrefixDuration,
+      threshold,
+      detectionInterval,
+      inferenceTimeout,
+      minInterruptionDuration,
+    } = { ...interruptionOptionDefaults, ...options };
+
+    if (maxAudioDuration > 3.0) {
+      throw new Error('maxAudioDuration must be less than or equal to 3.0 seconds');
+    }
+
+    const lkBaseUrl = baseUrl ?? process.env.LIVEKIT_REMOTE_EOT_URL ?? DEFAULT_BASE_URL;
+    let lkApiKey = apiKey ?? '';
+    let lkApiSecret = apiSecret ?? '';
+    let useProxy: boolean;
+
+    // use LiveKit credentials if using the default base URL (inference)
+    if (lkBaseUrl === DEFAULT_BASE_URL) {
+      lkApiKey =
+        apiKey ?? process.env.LIVEKIT_INFERENCE_API_KEY ?? process.env.LIVEKIT_API_KEY ?? '';
+      if (!lkApiKey) {
+        throw new Error(
+          'apiKey is required, either as argument or set LIVEKIT_API_KEY environmental variable',
+        );
+      }
+
+      lkApiSecret =
+        apiSecret ??
+        process.env.LIVEKIT_INFERENCE_API_SECRET ??
+        process.env.LIVEKIT_API_SECRET ??
+        '';
+      if (!lkApiSecret) {
+        throw new Error(
+          'apiSecret is required, either as argument or set LIVEKIT_API_SECRET environmental variable',
+        );
+      }
+
+      useProxy = true;
+    } else {
+      useProxy = useProxyArg ?? false;
+    }
+
+    this.options = {
+      sampleRate: SAMPLE_RATE,
+      threshold,
+      minFrames: Math.ceil(minInterruptionDuration * FRAMES_PER_SECOND),
+      maxAudioDuration,
+      audioPrefixDuration,
+      detectionInterval,
+      inferenceTimeout,
+      baseUrl: lkBaseUrl,
+      apiKey: lkApiKey,
+      apiSecret: lkApiSecret,
+      useProxy,
+      minInterruptionDuration,
+    };
+
+    this.label = `${this.constructor.name}`;
+    this.streams = new WeakSet();
+
+    console.info('adaptive interruption detector initialized', {
+      baseUrl: this.options.baseUrl,
+      detectionInterval: this.options.detectionInterval,
+      audioPrefixDuration: this.options.audioPrefixDuration,
+      maxAudioDuration: this.options.maxAudioDuration,
+      minFrames: this.options.minFrames,
+      threshold: this.options.threshold,
+      inferenceTimeout: this.options.inferenceTimeout,
+      useProxy: this.options.useProxy,
+    });
+  }
+}
diff --git a/agents/src/inference/interruption/InterruptionStream.ts b/agents/src/inference/interruption/InterruptionStream.ts
new file mode 100644
index 000000000..c0f7136b9
--- /dev/null
+++ b/agents/src/inference/interruption/InterruptionStream.ts
@@ -0,0 +1,350 @@
+import { AudioFrame, AudioResampler } from '@livekit/rtc-node';
+import type { Span } from '@opentelemetry/sdk-trace-base';
+import { type ReadableStream, TransformStream, WritableStream } from 'stream/web';
+import { log } from '../../log.js';
+import { type StreamChannel, createStreamChannel } from '../../stream/stream_channel.js';
+import { createAccessToken } from '../utils.js';
+import type {
+  AdaptiveInterruptionDetector,
+  InterruptionOptions,
+} from './AdaptiveInterruptionDetector.js';
+import { apiConnectDefaults } from './defaults.js';
+import { predictHTTP } from './http_transport.js';
+import {
+  InterruptionCacheEntry,
+  type InterruptionDetectionError,
+  type InterruptionEvent,
+  InterruptionEventType,
+} from './interruption.js';
+
+export interface AgentSpeechStarted {
+  type: 'agent-speech-started';
+}
+
+export interface AgentSpeechEnded {
+  type: 'agent-speech-ended';
+}
+
+export interface OverlapSpeechStarted {
+  type: 'overlap-speech-started';
+  speechDuration: number;
+  userSpeakingSpan: Span;
+}
+
+export interface OverlapSpeechEnded {
+  type: 'overlap-speech-ended';
+}
+
+export interface Flush {
+  type: 'flush';
+}
+
+export type InterruptionSentinel =
+  | AgentSpeechStarted
+  | AgentSpeechEnded
+  | OverlapSpeechStarted
+  | OverlapSpeechEnded
+  | Flush;
+
+export class InterruptionStreamSentinel {
+  static speechStarted(): AgentSpeechEnded {
+    return { type: 'agent-speech-ended' };
+  }
+
+  static speechEnded(): AgentSpeechEnded {
+    return { type: 'agent-speech-ended' };
+  }
+
+  static overlapSpeechStarted(
+    speechDuration: number,
+    userSpeakingSpan: Span,
+  ): OverlapSpeechStarted {
+    return { type: 'overlap-speech-started', speechDuration, userSpeakingSpan };
+  }
+
+  static overlapSpeechEnded(): OverlapSpeechEnded {
+    return { type: 'overlap-speech-ended' };
+  }
+
+  static flush(): Flush {
+    return { type: 'flush' };
+  }
+}
+
+export interface ApiConnectOptions {
+  maxRetries: number;
+  retryInterval: number;
+  timeout: number;
+}
+
+abstract class InterruptionStreamBase {
+  private inputStream: StreamChannel<InterruptionSentinel | AudioFrame, InterruptionDetectionError>;
+
+  private eventStream: StreamChannel<InterruptionEvent, InterruptionDetectionError>;
+
+  private resampler?: AudioResampler;
+
+  private userSpeakingSpan: Span | undefined;
+
+  private overlapSpeechStartedAt: number | undefined;
+
+  private options: InterruptionOptions;
+
+  private apiOptions: ApiConnectOptions;
+
+  private model: AdaptiveInterruptionDetector;
+
+  constructor(model: AdaptiveInterruptionDetector, apiOptions: Partial<ApiConnectOptions>) {
+    this.inputStream = createStreamChannel<
+      InterruptionSentinel | AudioFrame,
+      InterruptionDetectionError
+    >();
+
+    this.eventStream = createStreamChannel<InterruptionEvent, InterruptionDetectionError>();
+
+    this.model = model;
+    this.options = model.options;
+    this.apiOptions = { ...apiConnectDefaults, ...apiOptions };
+  }
+
+  private setupTransform() {
+    let agentSpeechStarted = false;
+    let startIdx = 0;
+    let accumulatedSamples = 0;
+    let overlapSpeechStarted = false;
+    const cache = new Map<number, InterruptionCacheEntry>(); // TODO limit cache size
+    const inferenceS16Data = new Int16Array(
+      Math.ceil(this.options.maxAudioDuration * this.options.sampleRate),
+    ).fill(0);
+
+    const transformer = new TransformStream<InterruptionSentinel | AudioFrame, Int16Array>(
+      {
+        transform: (chunk, controller) => {
+          if (chunk instanceof AudioFrame) {
+            if (!agentSpeechStarted) {
+              return;
+            }
+            if (this.options.sampleRate !== chunk.sampleRate) {
+              controller.error('the sample rate of the input frames must be consistent');
+              return;
+            }
+            const result = writeToInferenceS16Data(
+              chunk,
+              startIdx,
+              inferenceS16Data,
+              this.options.maxAudioDuration,
+            );
+            startIdx = result.startIdx;
+            accumulatedSamples += result.samplesWritten;
+
+            // Send data for inference when enough samples accumulated during overlap
+            if (
+              accumulatedSamples >=
+                Math.floor(this.options.detectionInterval * this.options.sampleRate) &&
+              overlapSpeechStarted
+            ) {
+              // Send a copy of the audio data up to startIdx for inference
+              const audioSlice = inferenceS16Data.slice(0, startIdx);
+              // TODO: send to data channel - dataChan.send(audioSlice);
+              accumulatedSamples = 0;
+              controller.enqueue(audioSlice);
+            }
+          } else if (chunk.type === 'agent-speech-started') {
+            log().debug('agent speech started');
+
+            agentSpeechStarted = true;
+            overlapSpeechStarted = false;
+            accumulatedSamples = 0;
+            startIdx = 0;
+            cache.clear();
+          } else if (chunk.type === 'agent-speech-ended') {
+            log().debug('agent speech ended');
+
+            agentSpeechStarted = false;
+            overlapSpeechStarted = false;
+            accumulatedSamples = 0;
+            startIdx = 0;
+            cache.clear();
+          } else if (chunk.type === 'overlap-speech-started' && agentSpeechStarted) {
+            this.userSpeakingSpan = chunk.userSpeakingSpan;
+            log().debug('overlap speech started, starting interruption inference');
+            overlapSpeechStarted = true;
+            accumulatedSamples = 0;
+            const shiftSize = Math.min(
+              startIdx,
+              Math.round(chunk.speechDuration * this.options.sampleRate),
+            );
+            // Shift the buffer: copy the last `shiftSize` samples before startIdx
+            // to the beginning of the buffer. This preserves recent audio context
+            // (the user's speech that occurred just before overlap was detected).
+            inferenceS16Data.copyWithin(0, startIdx - shiftSize, startIdx);
+            startIdx = shiftSize;
+            cache.clear();
+          } else if (chunk.type === 'overlap-speech-ended') {
+            log().debug('overlap speech ended');
+
+            if (overlapSpeechStarted) {
+              this.userSpeakingSpan = undefined;
+              let latestEntry = Array.from(cache.values()).at(-1);
+              if (!latestEntry) {
+                log().debug('no request made for overlap speech');
+                latestEntry = InterruptionCacheEntry.default();
+              } else {
+                cache.delete(latestEntry.createdAt);
+              }
+              const event: InterruptionEvent = {
+                type: InterruptionEventType.OVERLAP_SPEECH_ENDED,
+                timestamp: Date.now(),
+                isInterruption: false,
+                overlapSpeechStartedAt: this.overlapSpeechStartedAt,
+                speechInput: latestEntry.speechInput,
+                probabilities: latestEntry.probabilities,
+                totalDuration: latestEntry.totalDuration,
+                detectionDelay: latestEntry.detectionDelay,
+                predictionDuration: latestEntry.predictionDuration,
+                probability: latestEntry.probability,
+              };
+              this.eventStream.write(event);
+            }
+          } else if (chunk.type === 'flush') {
+            log().debug('flushing');
+            // do nothing
+          }
+        },
+      },
+      { highWaterMark: Number.MAX_SAFE_INTEGER },
+      { highWaterMark: Number.MAX_SAFE_INTEGER },
+    );
+
+    const httpPostWriter = new WritableStream<Int16Array>(
+      {
+        // Implement the sink
+        write: async (chunk) => {
+          if (this.overlapSpeechStartedAt) {
+            return;
+          }
+          await predictHTTP(
+            chunk,
+            { threshold: this.options.threshold, minFrames: this.options.minFrames },
+            {
+              baseUrl: this.options.baseUrl,
+              timeout: this.options.inferenceTimeout,
+              token: await createAccessToken(),
+            },
+          );
+        },
+        close() {
+          const listItem = document.createElement('li');
+          listItem.textContent = `[MESSAGE RECEIVED] ${result}`;
+          list.appendChild(listItem);
+        },
+        abort(err) {
+          console.log('Sink error:', err);
+        },
+      },
+      { highWaterMark: Number.MAX_SAFE_INTEGER },
+    );
+
+    this.inputStream.stream().pipeThrough(transformer).pipeTo(httpPostWriter);
+  }
+
+  private ensureInputNotEnded() {
+    if (this.inputStream.closed) {
+      throw new Error('input stream is closed');
+    }
+  }
+
+  private ensureStreamsNotEnded() {
+    this.ensureInputNotEnded();
+  }
+
+  private getResamplerFor(inputSampleRate: number): AudioResampler {
+    if (!this.resampler) {
+      this.resampler = new AudioResampler(inputSampleRate, this.options.sampleRate);
+    }
+    return this.resampler;
+  }
+
+  get stream(): ReadableStream<InterruptionEvent> {
+    return this.eventStream.stream();
+  }
+
+  async pushFrame(frame: InterruptionSentinel | AudioFrame): Promise<void> {
+    this.ensureStreamsNotEnded();
+    if (!(frame instanceof AudioFrame)) {
+      return this.inputStream.write(frame);
+    } else if (this.options.sampleRate !== frame.sampleRate) {
+      const resampler = this.getResamplerFor(frame.sampleRate);
+      if (resampler.inputRate !== frame.sampleRate) {
+        throw new Error('the sample rate of the input frames must be consistent');
+      }
+      for (const resampledFrame of resampler.push(frame)) {
+        await this.inputStream.write(resampledFrame);
+      }
+    } else {
+      await this.inputStream.write(frame);
+    }
+  }
+
+  async flush(): Promise<void> {
+    this.ensureStreamsNotEnded();
+    this.inputStream.write(InterruptionStreamSentinel.flush());
+  }
+
+  async endInput(): Promise<void> {
+    await this.flush();
+    await this.inputStream.close();
+  }
+
+  async close(): Promise<void> {
+    if (!this.inputStream.closed) await this.inputStream.close();
+  }
+}
+
+/**
+ * Write the audio frame to the output data array and return the new start index
+ * and the number of samples written.
+ */
+function writeToInferenceS16Data(
+  frame: AudioFrame,
+  startIdx: number,
+  outData: Int16Array,
+  maxAudioDuration: number,
+): { startIdx: number; samplesWritten: number } {
+  const maxWindowSize = Math.floor(maxAudioDuration * frame.sampleRate);
+
+  if (frame.samplesPerChannel > outData.length) {
+    throw new Error('frame samples are greater than the max window size');
+  }
+
+  // Shift the data to the left if the window would overflow
+  const shift = startIdx + frame.samplesPerChannel - maxWindowSize;
+  if (shift > 0) {
+    outData.copyWithin(0, shift, startIdx);
+    startIdx -= shift;
+  }
+
+  // Get the frame data as Int16Array
+  const frameData = new Int16Array(
+    frame.data.buffer,
+    frame.data.byteOffset,
+    frame.samplesPerChannel * frame.channels,
+  );
+
+  if (frame.channels > 1) {
+    // Mix down multiple channels to mono by averaging
+    for (let i = 0; i < frame.samplesPerChannel; i++) {
+      let sum = 0;
+      for (let ch = 0; ch < frame.channels; ch++) {
+        sum += frameData[i * frame.channels + ch] ?? 0;
+      }
+      outData[startIdx + i] = Math.floor(sum / frame.channels);
+    }
+  } else {
+    // Single channel - copy directly
+    outData.set(frameData, startIdx);
+  }
+
+  startIdx += frame.samplesPerChannel;
+  return { startIdx, samplesWritten: frame.samplesPerChannel };
+}
diff --git a/agents/src/inference/interruption/defaults.ts b/agents/src/inference/interruption/defaults.ts
new file mode 100644
index 000000000..e5e2ba6b3
--- /dev/null
+++ b/agents/src/inference/interruption/defaults.ts
@@ -0,0 +1,33 @@
+import type { InterruptionOptions } from './AdaptiveInterruptionDetector.js';
+import type { ApiConnectOptions } from './InterruptionStream.js';
+
+export const MIN_INTERRUPTION_DURATION = 0.025 * 2; // 25ms per frame, 2 consecutive frames
+export const THRESHOLD = 0.65;
+export const MAX_AUDIO_DURATION = 3.0;
+export const AUDIO_PREFIX_DURATION = 0.5;
+export const DETECTION_INTERVAL = 0.1;
+export const REMOTE_INFERENCE_TIMEOUT = 1.0;
+export const SAMPLE_RATE = 16000;
+export const FRAMES_PER_SECOND = 40;
+export const DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';
+
+export const apiConnectDefaults: ApiConnectOptions = {
+  maxRetries: 3,
+  retryInterval: 2_000,
+  timeout: 10_000,
+} as const;
+
+export const interruptionOptionDefaults: InterruptionOptions = {
+  sampleRate: SAMPLE_RATE,
+  threshold: THRESHOLD,
+  minFrames: Math.ceil(MIN_INTERRUPTION_DURATION * FRAMES_PER_SECOND),
+  maxAudioDuration: MAX_AUDIO_DURATION,
+  audioPrefixDuration: AUDIO_PREFIX_DURATION,
+  detectionInterval: DETECTION_INTERVAL,
+  inferenceTimeout: 10_000,
+  baseUrl: DEFAULT_BASE_URL,
+  apiKey: process.env.LIVEKIT_API_KEY || '',
+  apiSecret: process.env.LIVEKIT_API_SECRET || '',
+  useProxy: false,
+  minInterruptionDuration: MIN_INTERRUPTION_DURATION,
+} as const;
diff --git a/agents/src/inference/interruption/errors.ts b/agents/src/inference/interruption/errors.ts
new file mode 100644
index 000000000..e69de29bb
diff --git a/agents/src/utils/http_transport.ts b/agents/src/inference/interruption/http_transport.ts
similarity index 96%
rename from agents/src/utils/http_transport.ts
rename to agents/src/inference/interruption/http_transport.ts
index 8a7750ec6..dc2a9ddd4 100644
--- a/agents/src/utils/http_transport.ts
+++ b/agents/src/inference/interruption/http_transport.ts
@@ -25,8 +25,8 @@ export interface PredictResponse {
   predictionDuration: number;
 }
 
-export async function predict(
-  data: Uint8Array,
+export async function predictHTTP(
+  data: Int16Array,
   predictOptions: PredictOptions,
   options: PostOptions,
 ): Promise<PredictResponse> {
diff --git a/agents/src/inference/interruption/interruption.ts b/agents/src/inference/interruption/interruption.ts
new file mode 100644
index 000000000..f13767120
--- /dev/null
+++ b/agents/src/inference/interruption/interruption.ts
@@ -0,0 +1,87 @@
+import { slidingWindowMinMax } from '../utils.js';
+
+export enum InterruptionEventType {
+  INTERRUPTION = 'interruption',
+  OVERLAP_SPEECH_ENDED = 'overlap_speech_ended',
+}
+export interface InterruptionEvent {
+  type: InterruptionEventType;
+  timestamp: number;
+  isInterruption: boolean;
+  totalDuration: number;
+  predictionDuration: number;
+  detectionDelay: number;
+  overlapSpeechStartedAt?: number;
+  speechInput?: Int16Array;
+  probabilities?: Float32Array;
+  probability: number;
+}
+
+export class InterruptionDetectionError extends Error {
+  readonly type = 'InterruptionDetectionError';
+
+  readonly timestamp: number;
+  readonly label: string;
+  readonly recoverable: boolean;
+
+  constructor(message: string, timestamp: number, label: string, recoverable: boolean) {
+    super(message);
+    this.name = 'InterruptionDetectionError';
+    this.timestamp = timestamp;
+    this.label = label;
+    this.recoverable = recoverable;
+  }
+
+  toString(): string {
+    return `${this.name}: ${this.message} (label=${this.label}, timestamp=${this.timestamp}, recoverable=${this.recoverable})`;
+  }
+}
+
+function estimateProbability(
+  probabilities: Float32Array,
+  windowSize: number = MIN_INTERRUPTION_DURATION,
+): number {
+  const minWindow = Math.ceil(windowSize / 0.025); // 25ms per frame
+  if (probabilities.length < minWindow) {
+    return 0;
+  }
+
+  return slidingWindowMinMax(probabilities, windowSize);
+}
+
+/**
+ * Typed cache entry for interruption inference results.
+ */
+export class InterruptionCacheEntry {
+  readonly createdAt: number;
+  readonly totalDuration: number;
+  readonly predictionDuration: number;
+  readonly detectionDelay: number;
+  readonly speechInput?: Int16Array;
+  readonly probabilities?: Float32Array;
+  readonly isInterruption?: boolean;
+  readonly probability: number;
+
+  constructor(params: {
+    createdAt: number;
+    speechInput?: Int16Array;
+    totalDuration?: number;
+    predictionDuration?: number;
+    detectionDelay?: number;
+    probabilities?: Float32Array;
+    isInterruption?: boolean;
+  }) {
+    this.createdAt = params.createdAt;
+    this.totalDuration = params.totalDuration ?? 0;
+    this.predictionDuration = params.predictionDuration ?? 0;
+    this.detectionDelay = params.detectionDelay ?? 0;
+    this.speechInput = params.speechInput;
+    this.probabilities = params.probabilities;
+    this.isInterruption = params.isInterruption;
+    this.probability = this.probabilities ? estimateProbability(this.probabilities) : 0;
+  }
+
+  static default(): InterruptionCacheEntry {
+    return new InterruptionCacheEntry({ createdAt: 0 });
+  }
+}
diff --git a/agents/src/inference/utils.test.ts b/agents/src/inference/utils.test.ts
new file mode 100644
index 000000000..bcd2fe9a8
--- /dev/null
+++ b/agents/src/inference/utils.test.ts
@@ -0,0 +1,31 @@
+// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { describe, expect, it } from 'vitest';
+import { slidingWindowMinMax } from './utils.js';
+
+describe('slidingWindowMinMax', () => {
+  it('returns -Infinity when array is shorter than window size', () => {
+    expect(slidingWindowMinMax([0.5, 0.6], 3)).toBe(-Infinity);
+    expect(slidingWindowMinMax([], 1)).toBe(-Infinity);
+  });
+
+  it('returns the max value when window size is 1', () => {
+    // With window size 1, min of each window is the element itself,
+    // so max of mins is just the max of the array
+    expect(slidingWindowMinMax([0.1, 0.5, 0.3, 0.8, 0.2], 1)).toBe(0.8);
+  });
+
+  it('finds the best sustained probability across windows', () => {
+    // Windows of size 3: [0.2, 0.8, 0.7], [0.8, 0.7, 0.3], [0.7, 0.3, 0.9]
+    // Mins:              0.2,             0.3,             0.3
+    // Max of mins: 0.3
+    expect(slidingWindowMinMax([0.2, 0.8, 0.7, 0.3, 0.9], 3)).toBe(0.3);
+  });
+
+  it('returns the single element when array length equals window size', () => {
+    // Only one window covering the entire array, return min of that window
+    expect(slidingWindowMinMax([0.5, 0.9, 0.7], 3)).toBe(0.5);
+    expect(slidingWindowMinMax([0.8], 1)).toBe(0.8);
+  });
+});
diff --git a/agents/src/inference/utils.ts b/agents/src/inference/utils.ts
index b3b772ef6..38c9faa5f 100644
--- a/agents/src/inference/utils.ts
+++ b/agents/src/inference/utils.ts
@@ -64,3 +64,18 @@ export async function connectWs(
     socket.once('close', onClose);
   });
 }
+
+export function slidingWindowMinMax(probabilities: Float32Array, minWindow: number): number {
+  if (probabilities.length < minWindow) {
+    return -Infinity;
+  }
+
+  let maxOfMins = -Infinity;
+
+  for (let i = 0; i <= probabilities.length - minWindow; i++) {
+    const windowMin = Math.min(...probabilities.slice(i, i + minWindow));
+    maxOfMins = Math.max(maxOfMins, windowMin);
+  }
+
+  return maxOfMins;
+}
diff --git a/agents/src/stream/stream_channel.ts b/agents/src/stream/stream_channel.ts
index 1fb68bab2..546cf93ff 100644
--- a/agents/src/stream/stream_channel.ts
+++ b/agents/src/stream/stream_channel.ts
@@ -4,14 +4,15 @@
 import type { ReadableStream } from 'node:stream/web';
 import { IdentityTransform } from './identity_transform.js';
 
-export interface StreamChannel<T> {
+export interface StreamChannel<T, E extends Error = Error> {
   write(chunk: T): Promise<void>;
   close(): Promise<void>;
   stream(): ReadableStream<T>;
+  abort(error: E): Promise<void>;
   readonly closed: boolean;
 }
 
-export function createStreamChannel<T>(): StreamChannel<T> {
+export function createStreamChannel<T, E extends Error = Error>(): StreamChannel<T, E> {
   const transform = new IdentityTransform<T>();
   const writer = transform.writable.getWriter();
   let isClosed = false;
@@ -19,6 +20,9 @@ export function createStreamChannel<T>(): StreamChannel<T> {
   return {
     write: (chunk: T) => writer.write(chunk),
     stream: () => transform.readable,
+    abort: (error: E) => {
+      return writer.abort(error);
+    },
     close: async () => {
       try {
         const result = await writer.close();

From 738d1a5e7b918b6d0b97cd4c959d9c65abfbc07c Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Tue, 20 Jan 2026 14:11:55 +0100
Subject: [PATCH 03/25] type errors resolved

---
 .../interruption/InterruptionStream.ts        | 54 ++++++++++++++++---
 .../inference/interruption/http_transport.ts  |  4 +-
 .../inference/interruption/interruption.ts    |  1 +
 agents/src/telemetry/trace_types.ts           |  7 +++
 4 files changed, 57 insertions(+), 9 deletions(-)

diff --git a/agents/src/inference/interruption/InterruptionStream.ts b/agents/src/inference/interruption/InterruptionStream.ts
index c0f7136b9..1d9d9ca4f 100644
--- a/agents/src/inference/interruption/InterruptionStream.ts
+++ b/agents/src/inference/interruption/InterruptionStream.ts
@@ -1,5 +1,6 @@
 import { AudioFrame, AudioResampler } from '@livekit/rtc-node';
 import type { Span } from '@opentelemetry/sdk-trace-base';
+import { traceTypes } from 'agents/src/telemetry/index.js';
 import { type ReadableStream, TransformStream, WritableStream } from 'stream/web';
 import { log } from '../../log.js';
 import { type StreamChannel, createStreamChannel } from '../../stream/stream_channel.js';
@@ -77,7 +78,18 @@ export interface ApiConnectOptions {
   timeout: number;
 }
 
-abstract class InterruptionStreamBase {
+function updateUserSpeakingSpan(span: Span, entry: InterruptionCacheEntry) {
+  span.setAttribute(
+    traceTypes.ATTR_IS_INTERRUPTION,
+    (entry.isInterruption ?? false).toString().toLowerCase(),
+  );
+  span.setAttribute(traceTypes.ATTR_INTERRUPTION_PROBABILITY, entry.probability);
+  span.setAttribute(traceTypes.ATTR_INTERRUPTION_TOTAL_DURATION, entry.totalDuration);
+  span.setAttribute(traceTypes.ATTR_INTERRUPTION_PREDICTION_DURATION, entry.predictionDuration);
+  span.setAttribute(traceTypes.ATTR_INTERRUPTION_DETECTION_DELAY, entry.detectionDelay);
+}
+
+export class InterruptionStreamBase {
   private inputStream: StreamChannel<InterruptionSentinel | AudioFrame, InterruptionDetectionError>;
 
   private eventStream: StreamChannel<InterruptionEvent, InterruptionDetectionError>;
@@ -220,23 +232,51 @@ abstract class InterruptionStreamBase {
       {
         // Implement the sink
         write: async (chunk) => {
-          if (this.overlapSpeechStartedAt) {
+          if (!this.overlapSpeechStartedAt) {
             return;
           }
-          await predictHTTP(
+          const resp = await predictHTTP(
             chunk,
             { threshold: this.options.threshold, minFrames: this.options.minFrames },
             {
               baseUrl: this.options.baseUrl,
               timeout: this.options.inferenceTimeout,
-              token: await createAccessToken(),
+              token: await createAccessToken(this.options.apiKey, this.options.apiSecret),
             },
           );
+          console.log('received inference response', resp);
+          const { createdAt, isBargein, probabilities, predictionDuration } = resp;
+          const entry = new InterruptionCacheEntry({
+            createdAt,
+            probabilities,
+            isInterruption: isBargein,
+            speechInput: chunk,
+            totalDuration: (performance.now() - createdAt) / 1e9,
+            detectionDelay: Date.now() - this.overlapSpeechStartedAt,
+            predictionDuration,
+          });
+          cache.set(createdAt, entry);
+          if (overlapSpeechStarted && entry.isInterruption) {
+            if (this.userSpeakingSpan) {
+              this.updateUserSpeakingSpan(this.userSpeakingSpan, entry);
+            }
+            const event: InterruptionEvent = {
+              type: InterruptionEventType.INTERRUPTION,
+              timestamp: Date.now(),
+              overlapSpeechStartedAt: this.overlapSpeechStartedAt,
+              isInterruption: entry.isInterruption,
+              speechInput: entry.speechInput,
+              probabilities: entry.probabilities,
+              totalDuration: entry.totalDuration,
+              predictionDuration: entry.predictionDuration,
+              detectionDelay: entry.detectionDelay,
+              probability: entry.probability,
+            };
+            this.eventStream.write(event);
+          }
         },
         close() {
-          const listItem = document.createElement('li');
-          listItem.textContent = `[MESSAGE RECEIVED] ${result}`;
-          list.appendChild(listItem);
+          console.log('closing http writer');
         },
         abort(err) {
           console.log('Sink error:', err);
diff --git a/agents/src/inference/interruption/http_transport.ts b/agents/src/inference/interruption/http_transport.ts
index dc2a9ddd4..c1f22a569 100644
--- a/agents/src/inference/interruption/http_transport.ts
+++ b/agents/src/inference/interruption/http_transport.ts
@@ -21,7 +21,7 @@ export interface PredictEndpointResponse {
 export interface PredictResponse {
   createdAt: number;
   isBargein: boolean;
-  probabilities: number[];
+  probabilities: Float32Array;
   predictionDuration: number;
 }
 
@@ -55,7 +55,7 @@ export async function predictHTTP(
   return {
     createdAt: created_at,
     isBargein: is_bargein,
-    probabilities,
+    probabilities: new Float32Array(probabilities),
     predictionDuration: (performance.now() - createdAt) / 1e9,
   };
 }
diff --git a/agents/src/inference/interruption/interruption.ts b/agents/src/inference/interruption/interruption.ts
index f13767120..e415f6d98 100644
--- a/agents/src/inference/interruption/interruption.ts
+++ b/agents/src/inference/interruption/interruption.ts
@@ -1,4 +1,5 @@
 import { slidingWindowMinMax } from '../utils.js';
+import { MIN_INTERRUPTION_DURATION } from './defaults.js';
 
 export enum InterruptionEventType {
   INTERRUPTION = 'interruption',
diff --git a/agents/src/telemetry/trace_types.ts b/agents/src/telemetry/trace_types.ts
index db76f7bc1..7220ec03a 100644
--- a/agents/src/telemetry/trace_types.ts
+++ b/agents/src/telemetry/trace_types.ts
@@ -51,6 +51,13 @@ export const ATTR_TRANSCRIPT_CONFIDENCE = 'lk.transcript_confidence';
 export const ATTR_TRANSCRIPTION_DELAY = 'lk.transcription_delay';
 export const ATTR_END_OF_TURN_DELAY = 'lk.end_of_turn_delay';
 
+// Adaptive Interruption attributes
+export const ATTR_IS_INTERRUPTION = 'lk.is_interruption';
+export const ATTR_INTERRUPTION_PROBABILITY = 'lk.interruption.probability';
+export const ATTR_INTERRUPTION_TOTAL_DURATION = 'lk.interruption.total_duration';
+export const ATTR_INTERRUPTION_PREDICTION_DURATION = 'lk.interruption.prediction_duration';
+export const ATTR_INTERRUPTION_DETECTION_DELAY = 'lk.interruption.detection_delay';
+
 // metrics
 export const ATTR_LLM_METRICS = 'lk.llm_metrics';
 export const ATTR_TTS_METRICS = 'lk.tts_metrics';

From b3638e9b5b99c1d866bb78b58e8f80828e9eb845 Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Tue, 20 Jan 2026 14:41:55 +0100
Subject: [PATCH 04/25] more wiring

---
 agents/src/index.ts                           |  2 ++
 .../AdaptiveInterruptionDetector.ts           | 25 ++++++++++++++++++-
 .../interruption/InterruptionStream.ts        |  8 +++---
 agents/src/inference/interruption/index.ts    |  1 +
 4 files changed, 32 insertions(+), 4 deletions(-)
 create mode 100644 agents/src/inference/interruption/index.ts

diff --git a/agents/src/index.ts b/agents/src/index.ts
index 57ace0c7a..e4fd2859b 100644
--- a/agents/src/index.ts
+++ b/agents/src/index.ts
@@ -36,4 +36,6 @@ export * from './vad.js';
 export * from './version.js';
 export * from './worker.js';
 
+export * from './inference/interruption/index.js';
+
 export { cli, inference, ipc, llm, metrics, stream, stt, telemetry, tokenize, tts, voice };
diff --git a/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts b/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
index a2181cdda..69bf9b6d0 100644
--- a/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
+++ b/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
@@ -1,12 +1,18 @@
 import type { TypedEventEmitter } from '@livekit/typed-emitter';
 import EventEmitter from 'events';
+import { type ReadableStream, TransformStream } from 'stream/web';
+import { InterruptionStreamBase } from './InterruptionStream.js';
 import {
   DEFAULT_BASE_URL,
   FRAMES_PER_SECOND,
   SAMPLE_RATE,
   interruptionOptionDefaults,
 } from './defaults.js';
-import type { InterruptionDetectionError } from './interruption.js';
+import {
+  type InterruptionDetectionError,
+  type InterruptionEvent,
+  InterruptionEventType,
+} from './interruption.js';
 
 type InterruptionCallbacks = {
   interruptionDetected: () => void;
@@ -116,4 +122,21 @@ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => Typ
       useProxy: this.options.useProxy,
     });
   }
+
+  stream(): ReadableStream<InterruptionEvent> {
+    const httpStream = new InterruptionStreamBase(this, {});
+    this.streams.add(httpStream);
+    const transformer = new TransformStream<InterruptionEvent, InterruptionEvent>({
+      transform: (chunk, controller) => {
+        if (chunk.type === InterruptionEventType.INTERRUPTION) {
+          this.emit('interruptionDetected'); // TODO payload
+        } else if (chunk.type === InterruptionEventType.OVERLAP_SPEECH_ENDED) {
+          this.emit('overlapSpeechDetected'); // TODO payload
+        }
+        controller.enqueue(chunk);
+      },
+    });
+    const stream = httpStream.stream.pipeThrough(transformer);
+    return stream;
+  }
 }
diff --git a/agents/src/inference/interruption/InterruptionStream.ts b/agents/src/inference/interruption/InterruptionStream.ts
index 1d9d9ca4f..ef4d4b682 100644
--- a/agents/src/inference/interruption/InterruptionStream.ts
+++ b/agents/src/inference/interruption/InterruptionStream.ts
@@ -48,8 +48,8 @@ export type InterruptionSentinel =
   | Flush;
 
 export class InterruptionStreamSentinel {
-  static speechStarted(): AgentSpeechEnded {
-    return { type: 'agent-speech-ended' };
+  static speechStarted(): AgentSpeechStarted {
+    return { type: 'agent-speech-started' };
   }
 
   static speechEnded(): AgentSpeechEnded {
@@ -117,6 +117,8 @@ export class InterruptionStreamBase {
     this.model = model;
     this.options = model.options;
     this.apiOptions = { ...apiConnectDefaults, ...apiOptions };
+
+    this.setupTransform();
   }
 
   private setupTransform() {
@@ -258,7 +260,7 @@ export class InterruptionStreamBase {
           cache.set(createdAt, entry);
           if (overlapSpeechStarted && entry.isInterruption) {
             if (this.userSpeakingSpan) {
-              this.updateUserSpeakingSpan(this.userSpeakingSpan, entry);
+              updateUserSpeakingSpan(this.userSpeakingSpan, entry);
             }
             const event: InterruptionEvent = {
               type: InterruptionEventType.INTERRUPTION,
diff --git a/agents/src/inference/interruption/index.ts b/agents/src/inference/interruption/index.ts
new file mode 100644
index 000000000..b8a4ed715
--- /dev/null
+++ b/agents/src/inference/interruption/index.ts
@@ -0,0 +1 @@
+export * from './AdaptiveInterruptionDetector.js';

From df7bb86163b619158883332007382ab9a9fc372c Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Tue, 20 Jan 2026 14:45:54 +0100
Subject: [PATCH 05/25] exports and overlap handling

---
 agents/src/inference/interruption/InterruptionStream.ts | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/agents/src/inference/interruption/InterruptionStream.ts b/agents/src/inference/interruption/InterruptionStream.ts
index ef4d4b682..2a90d91ed 100644
--- a/agents/src/inference/interruption/InterruptionStream.ts
+++ b/agents/src/inference/interruption/InterruptionStream.ts
@@ -314,6 +314,9 @@ export class InterruptionStreamBase {
   async pushFrame(frame: InterruptionSentinel | AudioFrame): Promise<void> {
     this.ensureStreamsNotEnded();
     if (!(frame instanceof AudioFrame)) {
+      if (frame.type === 'overlap-speech-started') {
+        this.overlapSpeechStartedAt = Date.now() - frame.speechDuration;
+      }
       return this.inputStream.write(frame);
     } else if (this.options.sampleRate !== frame.sampleRate) {
       const resampler = this.getResamplerFor(frame.sampleRate);

From 1f715c94ef3aa4faa6afab78e0d4432fe9fa53b4 Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Tue, 20 Jan 2026 14:48:06 +0100
Subject: [PATCH 06/25] thx claude

---
 .../interruption/AdaptiveInterruptionDetector.ts         | 9 +++++++++
 agents/src/inference/interruption/InterruptionStream.ts  | 4 +++-
 agents/src/inference/interruption/index.ts               | 3 +++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts b/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
index 69bf9b6d0..04c5741e4 100644
--- a/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
+++ b/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
@@ -139,4 +139,13 @@ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => Typ
     const stream = httpStream.stream.pipeThrough(transformer);
     return stream;
   }
+
+  updateOptions(options: { threshold?: number; minInterruptionDuration?: number }): void {
+    if (options.threshold !== undefined) {
+      this.options.threshold = options.threshold;
+    }
+    if (options.minInterruptionDuration !== undefined) {
+      this.options.minFrames = Math.ceil(options.minInterruptionDuration * FRAMES_PER_SECOND);
+    }
+  }
 }
diff --git a/agents/src/inference/interruption/InterruptionStream.ts b/agents/src/inference/interruption/InterruptionStream.ts
index 2a90d91ed..d1f1defac 100644
--- a/agents/src/inference/interruption/InterruptionStream.ts
+++ b/agents/src/inference/interruption/InterruptionStream.ts
@@ -184,9 +184,11 @@ export class InterruptionStreamBase {
             log().debug('overlap speech started, starting interruption inference');
             overlapSpeechStarted = true;
             accumulatedSamples = 0;
+            // Include both speech duration and audio prefix duration for context
             const shiftSize = Math.min(
               startIdx,
-              Math.round(chunk.speechDuration * this.options.sampleRate),
+              Math.round(chunk.speechDuration * this.options.sampleRate) +
+                Math.round(this.options.audioPrefixDuration * this.options.sampleRate),
             );
             // Shift the buffer: copy the last `shiftSize` samples before startIdx
             // to the beginning of the buffer. This preserves recent audio context
diff --git a/agents/src/inference/interruption/index.ts b/agents/src/inference/interruption/index.ts
index b8a4ed715..0d0bc4c4a 100644
--- a/agents/src/inference/interruption/index.ts
+++ b/agents/src/inference/interruption/index.ts
@@ -1 +1,4 @@
 export * from './AdaptiveInterruptionDetector.js';
+export * from './interruption.js';
+export { InterruptionStreamSentinel } from './InterruptionStream.js';
+export type { InterruptionSentinel } from './InterruptionStream.js';

From 049ca1726bbb6f3ed749c9b889dbd794effe7b4a Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Tue, 20 Jan 2026 15:43:20 +0100
Subject: [PATCH 07/25] more wip

---
 .changeset/config.json                        |   7 -
 .../AdaptiveInterruptionDetector.ts           |  19 +-
 .../interruption/InterruptionStream.ts        |   4 +-
 agents/src/voice/agent_activity.ts            |  61 +++++++
 agents/src/voice/agent_session.ts             |  24 ++-
 agents/src/voice/audio_recognition.ts         | 162 +++++++++++++++++-
 examples/src/adaptive_interruption.ts         | 106 ++++++++++++
 7 files changed, 369 insertions(+), 14 deletions(-)
 create mode 100644 examples/src/adaptive_interruption.ts

diff --git a/.changeset/config.json b/.changeset/config.json
index af66336b2..6e26590ab 100644
--- a/.changeset/config.json
+++ b/.changeset/config.json
@@ -8,13 +8,6 @@
   ],
   "commit": false,
   "ignore": ["livekit-agents-examples"],
-  "fixed": [
-    [
-      "@livekit/agents",
-      "@livekit/agents-plugin-*",
-      "@livekit/agents-plugins-test"
-    ]
-  ],
   "access": "public",
   "baseBranch": "main",
   "updateInternalDependencies": "patch",
diff --git a/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts b/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
index 04c5741e4..89c2a7b0b 100644
--- a/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
+++ b/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
@@ -123,9 +123,24 @@ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => Typ
     });
   }
 
+  /**
+   * Creates a new InterruptionStreamBase for internal use.
+   * The stream can receive audio frames and sentinels via pushFrame().
+   * Use this when you need direct access to the stream for pushing frames.
+   */
+  createStream(): InterruptionStreamBase {
+    const stream = new InterruptionStreamBase(this, {});
+    this.streams.add(stream);
+    return stream;
+  }
+
+  /**
+   * Creates a new interruption stream and returns a ReadableStream of InterruptionEvents.
+   * This is a convenience method for consuming interruption events without needing
+   * to manage the underlying stream directly.
+   */
   stream(): ReadableStream<InterruptionEvent> {
-    const httpStream = new InterruptionStreamBase(this, {});
-    this.streams.add(httpStream);
+    const httpStream = this.createStream();
     const transformer = new TransformStream<InterruptionEvent, InterruptionEvent>({
       transform: (chunk, controller) => {
         if (chunk.type === InterruptionEventType.INTERRUPTION) {
diff --git a/agents/src/inference/interruption/InterruptionStream.ts b/agents/src/inference/interruption/InterruptionStream.ts
index d1f1defac..fc1d5333e 100644
--- a/agents/src/inference/interruption/InterruptionStream.ts
+++ b/agents/src/inference/interruption/InterruptionStream.ts
@@ -1,6 +1,6 @@
 import { AudioFrame, AudioResampler } from '@livekit/rtc-node';
-import type { Span } from '@opentelemetry/sdk-trace-base';
-import { traceTypes } from 'agents/src/telemetry/index.js';
+import type { Span } from '@opentelemetry/api';
+import { traceTypes } from '../../telemetry/index.js';
 import { type ReadableStream, TransformStream, WritableStream } from 'stream/web';
 import { log } from '../../log.js';
 import { type StreamChannel, createStreamChannel } from '../../stream/stream_channel.js';
diff --git a/agents/src/voice/agent_activity.ts b/agents/src/voice/agent_activity.ts
index 3a0713329..3300f68f1 100644
--- a/agents/src/voice/agent_activity.ts
+++ b/agents/src/voice/agent_activity.ts
@@ -41,6 +41,8 @@ import { recordRealtimeMetrics, traceTypes, tracer } from '../telemetry/index.js
 import { splitWords } from '../tokenize/basic/word.js';
 import { TTS, type TTSError } from '../tts/tts.js';
 import { Future, Task, cancelAndWait, waitFor } from '../utils.js';
+import type { InterruptionEvent } from '../inference/interruption/interruption.js';
+import { InterruptionEventType } from '../inference/interruption/interruption.js';
 import { VAD, type VADEvent } from '../vad.js';
 import type { Agent, ModelSettings } from './agent.js';
 import { StopResponse, asyncLocalStorage } from './agent.js';
@@ -112,6 +114,24 @@ export class AgentActivity implements RecognitionHooks {
   _mainTask?: Task<void>;
   _userTurnCompletedTask?: Promise<void>;
 
+  /**
+   * Notify that agent started speaking.
+   * This enables interruption detection in AudioRecognition.
+   * @internal
+   */
+  notifyAgentSpeechStarted(): void {
+    this.audioRecognition?.onStartOfAgentSpeech();
+  }
+
+  /**
+   * Notify that agent stopped speaking.
+   * This disables interruption detection in AudioRecognition.
+   * @internal
+   */
+  notifyAgentSpeechEnded(): void {
+    this.audioRecognition?.onEndOfAgentSpeech();
+  }
+
   constructor(agent: Agent, agentSession: AgentSession) {
     this.agent = agent;
     this.agentSession = agentSession;
@@ -292,6 +312,7 @@ export class AgentActivity implements RecognitionHooks {
         // Disable stt node if stt is not provided
         stt: this.stt ? (...args) => this.agent.sttNode(...args) : undefined,
         vad: this.vad,
+        interruptionDetector: this.agentSession.interruptionDetector,
         turnDetector: typeof this.turnDetection === 'string' ? undefined : this.turnDetection,
         turnDetectionMode: this.turnDetectionMode,
         minEndpointingDelay: this.agentSession.options.minEndpointingDelay,
@@ -697,6 +718,46 @@ export class AgentActivity implements RecognitionHooks {
     }
   }
 
+  onInterruption(ev: InterruptionEvent): void {
+    if (ev.type !== InterruptionEventType.INTERRUPTION) {
+      // Only handle actual interruptions, not overlap_speech_ended events
+      return;
+    }
+
+    this.logger.info(
+      {
+        probability: ev.probability,
+        detectionDelay: ev.detectionDelay,
+        totalDuration: ev.totalDuration,
+      },
+      'adaptive interruption detected',
+    );
+
+    // Similar to onVADInferenceDone but triggered by the adaptive interruption detector
+    if (this.turnDetection === 'manual' || this.turnDetection === 'realtime_llm') {
+      return;
+    }
+
+    if (this.llm instanceof RealtimeModel && this.llm.capabilities.turnDetection) {
+      return;
+    }
+
+    this.realtimeSession?.startUserActivity();
+
+    if (
+      this._currentSpeech &&
+      !this._currentSpeech.interrupted &&
+      this._currentSpeech.allowInterruptions
+    ) {
+      this.logger.info(
+        { 'speech id': this._currentSpeech.id },
+        'speech interrupted by adaptive interruption detector',
+      );
+      this.realtimeSession?.interrupt();
+      this._currentSpeech.interrupt();
+    }
+  }
+
   onInterimTranscript(ev: SpeechEvent): void {
     if (this.llm instanceof RealtimeModel && this.llm.capabilities.userTranscription) {
       // skip stt transcription if userTranscription is enabled on the realtime model
diff --git a/agents/src/voice/agent_session.ts b/agents/src/voice/agent_session.ts
index ad349a122..bb8325a80 100644
--- a/agents/src/voice/agent_session.ts
+++ b/agents/src/voice/agent_session.ts
@@ -15,6 +15,7 @@ import {
   type STTModelString,
   type TTSModelString,
 } from '../inference/index.js';
+import type { AdaptiveInterruptionDetector } from '../inference/interruption/AdaptiveInterruptionDetector.js';
 import { type JobContext, getJobContext } from '../job.js';
 import type { FunctionCall, FunctionCallOutput } from '../llm/chat_context.js';
 import { AgentHandoffItem, ChatContext, ChatMessage } from '../llm/chat_context.js';
@@ -106,6 +107,7 @@ export type AgentSessionOptions<UserData = UnknownUserData> = {
   vad?: VAD;
   llm?: LLM | RealtimeModel | LLMModels;
   tts?: TTS | TTSModelString;
+  interruptionDetector?: AdaptiveInterruptionDetector;
   userData?: UserData;
   voiceOptions?: Partial<VoiceOptions>;
   connOptions?: SessionConnectOptions;
@@ -167,6 +169,8 @@ export class AgentSession<
   /** @internal - Timestamp when the session started (milliseconds) */
   _startedAt?: number;
 
+  interruptionDetector?: AdaptiveInterruptionDetector;
+
   constructor(opts: AgentSessionOptions<UserData>) {
     super();
 
@@ -176,6 +180,7 @@ export class AgentSession<
       llm,
       tts,
       turnDetection,
+      interruptionDetector,
       userData,
       voiceOptions = defaultVoiceOptions,
       connOptions,
@@ -212,6 +217,7 @@ export class AgentSession<
     }
 
     this.turnDetection = turnDetection;
+    this.interruptionDetector = interruptionDetector;
     this._userData = userData;
 
     // configurable IO
@@ -637,6 +643,8 @@ export class AgentSession<
       return;
     }
 
+    const oldState = this._agentState;
+
     if (state === 'speaking') {
       // Reset error counts when agent starts speaking
       this.llmErrorCounts = 0;
@@ -651,13 +659,25 @@ export class AgentSession<
         // TODO(brian): PR4 - Set participant attributes if roomIO.room.localParticipant is available
         // (Ref: Python agent_session.py line 1161-1164)
       }
+
+      // Notify AudioRecognition that agent started speaking (for interruption detection)
+      this.activity?.notifyAgentSpeechStarted();
+    } else if (oldState === 'speaking') {
+      // Agent stopped speaking
+      if (this.agentSpeakingSpan !== undefined) {
+        // TODO(brian): PR4 - Set ATTR_END_TIME attribute if available
+        this.agentSpeakingSpan.end();
+        this.agentSpeakingSpan = undefined;
+      }
+
+      // Notify AudioRecognition that agent stopped speaking (for interruption detection)
+      this.activity?.notifyAgentSpeechEnded();
     } else if (this.agentSpeakingSpan !== undefined) {
-      // TODO(brian): PR4 - Set ATTR_END_TIME attribute if available
+      // Non-speaking to non-speaking transition but span is still open
       this.agentSpeakingSpan.end();
       this.agentSpeakingSpan = undefined;
     }
 
-    const oldState = this._agentState;
     this._agentState = state;
 
     // Handle user away timer based on state changes
diff --git a/agents/src/voice/audio_recognition.ts b/agents/src/voice/audio_recognition.ts
index 0382b1fd5..8206cffe7 100644
--- a/agents/src/voice/audio_recognition.ts
+++ b/agents/src/voice/audio_recognition.ts
@@ -5,6 +5,12 @@ import { AudioFrame } from '@livekit/rtc-node';
 import type { Context, Span } from '@opentelemetry/api';
 import type { WritableStreamDefaultWriter } from 'node:stream/web';
 import { ReadableStream } from 'node:stream/web';
+import type { AdaptiveInterruptionDetector } from '../inference/interruption/AdaptiveInterruptionDetector.js';
+import {
+  InterruptionStreamBase,
+  InterruptionStreamSentinel,
+} from '../inference/interruption/InterruptionStream.js';
+import type { InterruptionEvent } from '../inference/interruption/interruption.js';
 import { type ChatContext } from '../llm/chat_context.js';
 import { log } from '../log.js';
 import { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/deferred_stream.js';
@@ -39,6 +45,7 @@ export interface RecognitionHooks {
   onFinalTranscript: (ev: SpeechEvent) => void;
   onEndOfTurn: (info: EndOfTurnInfo) => Promise<boolean>;
   onPreemptiveGeneration: (info: PreemptiveGenerationInfo) => void;
+  onInterruption: (ev: InterruptionEvent) => void;
 
   retrieveChatCtx: () => ChatContext;
 }
@@ -53,6 +60,7 @@ export interface AudioRecognitionOptions {
   recognitionHooks: RecognitionHooks;
   stt?: STTNode;
   vad?: VAD;
+  interruptionDetector?: AdaptiveInterruptionDetector;
   turnDetector?: _TurnDetector;
   turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;
   minEndpointingDelay: number;
@@ -88,6 +96,7 @@ export class AudioRecognition {
 
   private vadInputStream: ReadableStream<AudioFrame>;
   private sttInputStream: ReadableStream<AudioFrame>;
+  private interruptionInputStream: ReadableStream<AudioFrame>;
   private silenceAudioTransform = new IdentityTransform<AudioFrame>();
   private silenceAudioWriter: WritableStreamDefaultWriter<AudioFrame>;
 
@@ -96,11 +105,19 @@ export class AudioRecognition {
   private commitUserTurnTask?: Task<void>;
   private vadTask?: Task<void>;
   private sttTask?: Task<void>;
+  private interruptionTask?: Task<void>;
+
+  // interruption detection
+  private interruptionDetector?: AdaptiveInterruptionDetector;
+  private interruptionStream?: InterruptionStreamBase;
+  private interruptionEnabled = false;
+  private agentSpeaking = false;
 
   constructor(opts: AudioRecognitionOptions) {
     this.hooks = opts.recognitionHooks;
     this.stt = opts.stt;
     this.vad = opts.vad;
+    this.interruptionDetector = opts.interruptionDetector;
     this.turnDetector = opts.turnDetector;
     this.turnDetectionMode = opts.turnDetectionMode;
     this.minEndpointingDelay = opts.minEndpointingDelay;
@@ -108,10 +125,15 @@ export class AudioRecognition {
     this.lastLanguage = undefined;
     this.rootSpanContext = opts.rootSpanContext;
 
+    // Interruption detection is only enabled if both detector and VAD are provided
+    this.interruptionEnabled = this.interruptionDetector !== undefined && this.vad !== undefined;
+
     this.deferredInputStream = new DeferredReadableStream<AudioFrame>();
-    const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();
+    const [vadInputStream, rest] = this.deferredInputStream.stream.tee();
+    const [sttInputStream, interruptionInputStream] = rest.tee();
     this.vadInputStream = vadInputStream;
     this.sttInputStream = mergeReadableStreams(sttInputStream, this.silenceAudioTransform.readable);
+    this.interruptionInputStream = interruptionInputStream;
     this.silenceAudioWriter = this.silenceAudioTransform.writable.getWriter();
   }
 
@@ -135,6 +157,15 @@ export class AudioRecognition {
     this.sttTask.result.catch((err) => {
       this.logger.error(`Error running STT task: ${err}`);
     });
+
+    if (this.interruptionEnabled && this.interruptionDetector) {
+      this.interruptionTask = Task.from(({ signal }) =>
+        this.createInterruptionTask(this.interruptionDetector!, signal),
+      );
+      this.interruptionTask.result.catch((err) => {
+        this.logger.error(`Error running interruption task: ${err}`);
+      });
+    }
   }
 
   private async onSTTEvent(ev: SpeechEvent) {
@@ -577,6 +608,11 @@ export class AudioRecognition {
               this.sampleRate = ev.frames[0].sampleRate;
             }
 
+            // If agent is speaking, user speech is overlap - trigger interruption detection
+            if (this.agentSpeaking && this.interruptionEnabled) {
+              this.onStartOfOverlapSpeech(ev.speechDuration, this.userTurnSpan);
+            }
+
             this.bounceEOUTask?.cancel();
             break;
           case VADEventType.INFERENCE_DONE:
@@ -597,6 +633,11 @@ export class AudioRecognition {
             // when VAD fires END_OF_SPEECH, it already waited for the silence_duration
             this.speaking = false;
 
+            // If we were in overlap speech (agent speaking + user speaking), end it
+            if (this.agentSpeaking && this.interruptionEnabled) {
+              this.onEndOfOverlapSpeech();
+            }
+
             if (
               this.vadBaseTurnDetection ||
               (this.turnDetectionMode === 'stt' && this.userTurnCommitted)
@@ -614,6 +655,123 @@ export class AudioRecognition {
     }
   }
 
+  private async createInterruptionTask(
+    interruptionDetector: AdaptiveInterruptionDetector,
+    signal: AbortSignal,
+  ) {
+    // Create the interruption stream from the detector
+    this.interruptionStream = interruptionDetector.createStream();
+
+    // Forward audio frames to the interruption stream
+    const reader = this.interruptionInputStream.getReader();
+
+    const forwardTask = (async () => {
+      try {
+        while (!signal.aborted) {
+          const { done, value: frame } = await reader.read();
+          if (done) break;
+          await this.interruptionStream?.pushFrame(frame);
+        }
+      } catch (e) {
+        if (!signal.aborted) {
+          this.logger.error(e, 'Error forwarding audio to interruption stream');
+        }
+      } finally {
+        reader.releaseLock();
+      }
+    })();
+
+    // Read interruption events from the stream
+    const eventStream = this.interruptionStream.stream;
+    const eventReader = eventStream.getReader();
+
+    const abortHandler = () => {
+      eventReader.releaseLock();
+      this.interruptionStream?.close();
+      signal.removeEventListener('abort', abortHandler);
+    };
+    signal.addEventListener('abort', abortHandler);
+
+    try {
+      while (!signal.aborted) {
+        const { done, value: ev } = await eventReader.read();
+        if (done) break;
+
+        this.logger.debug({ type: ev.type, probability: ev.probability }, 'Interruption event');
+        this.hooks.onInterruption(ev);
+      }
+    } catch (e) {
+      if (!signal.aborted) {
+        this.logger.error(e, 'Error in interruption task');
+      }
+    } finally {
+      this.logger.debug('Interruption task closed');
+      await forwardTask;
+    }
+  }
+
+  /**
+   * Called when the agent starts speaking.
+   * Enables interruption detection by sending the agent-speech-started sentinel.
+   */
+  onStartOfAgentSpeech(): void {
+    this.agentSpeaking = true;
+
+    if (!this.interruptionEnabled || !this.interruptionStream) {
+      return;
+    }
+
+    this.interruptionStream.pushFrame(InterruptionStreamSentinel.speechStarted());
+  }
+
+  /**
+   * Called when the agent stops speaking.
+   * Disables interruption detection by sending the agent-speech-ended sentinel.
+   */
+  onEndOfAgentSpeech(): void {
+    if (!this.interruptionEnabled || !this.interruptionStream) {
+      this.agentSpeaking = false;
+      return;
+    }
+
+    this.interruptionStream.pushFrame(InterruptionStreamSentinel.speechEnded());
+
+    if (this.agentSpeaking) {
+      // No interruption was detected, end the overlap inference (idempotent)
+      this.onEndOfOverlapSpeech();
+    }
+
+    this.agentSpeaking = false;
+  }
+
+  /**
+   * Called when user starts speaking while agent is speaking (overlap speech).
+   * This triggers the interruption detection inference.
+   */
+  onStartOfOverlapSpeech(speechDuration: number, userSpeakingSpan?: Span): void {
+    if (!this.interruptionEnabled || !this.interruptionStream) {
+      return;
+    }
+
+    if (this.agentSpeaking && userSpeakingSpan) {
+      this.interruptionStream.pushFrame(
+        InterruptionStreamSentinel.overlapSpeechStarted(speechDuration, userSpeakingSpan),
+      );
+    }
+  }
+
+  /**
+   * Called when user stops speaking during overlap.
+   * This ends the interruption detection inference for this overlap period.
+   */
+  onEndOfOverlapSpeech(): void {
+    if (!this.interruptionEnabled || !this.interruptionStream) {
+      return;
+    }
+
+    this.interruptionStream.pushFrame(InterruptionStreamSentinel.overlapSpeechEnded());
+  }
+
   setInputAudioStream(audioStream: ReadableStream<AudioFrame>) {
     this.deferredInputStream.setSource(audioStream);
   }
@@ -686,6 +844,8 @@ export class AudioRecognition {
     await this.sttTask?.cancelAndWait();
     await this.vadTask?.cancelAndWait();
     await this.bounceEOUTask?.cancelAndWait();
+    await this.interruptionTask?.cancelAndWait();
+    await this.interruptionStream?.close();
   }
 
   private _endUserTurnSpan({
diff --git a/examples/src/adaptive_interruption.ts b/examples/src/adaptive_interruption.ts
new file mode 100644
index 000000000..4b02e688b
--- /dev/null
+++ b/examples/src/adaptive_interruption.ts
@@ -0,0 +1,106 @@
+// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * This example demonstrates how to use the AdaptiveInterruptionDetector
+ * for detecting user interruptions during agent speech.
+ *
+ * The detector analyzes overlapping speech (when user speaks while agent is speaking)
+ * and determines whether the user intends to interrupt or is just providing backchannel
+ * feedback (like "uh-huh", "okay", etc).
+ *
+ * The interruption detection is integrated into AudioRecognition and works automatically
+ * when the detector is provided along with VAD. It:
+ * 1. Forwards audio frames to the detector when the agent is speaking
+ * 2. Triggers overlap detection when VAD detects user speech during agent speech
+ * 3. Emits interruption events that can be handled to stop/pause agent speech
+ */
+import {
+  AdaptiveInterruptionDetector,
+  type JobContext,
+  type JobProcess,
+  WorkerOptions,
+  cli,
+  defineAgent,
+  log,
+  voice,
+} from '@livekit/agents';
+import * as silero from '@livekit/agents-plugin-silero';
+import { fileURLToPath } from 'node:url';
+
+export default defineAgent({
+  prewarm: async (proc: JobProcess) => {
+    proc.userData.vad = await silero.VAD.load();
+  },
+  entry: async (ctx: JobContext) => {
+    const logger = log();
+    const vad = ctx.proc.userData.vad as silero.VAD;
+
+    await ctx.connect();
+
+    // Create the adaptive interruption detector with custom options
+    const interruptionDetector = new AdaptiveInterruptionDetector({
+      // Threshold for interruption classification (0-1)
+      // Higher = less sensitive, lower = more sensitive
+      threshold: 0.65,
+      // Minimum duration of overlap speech to consider as potential interruption
+      minInterruptionDuration: 0.05,
+      // Maximum audio duration to analyze (including prefix)
+      maxAudioDuration: 3.0,
+      // Audio context to include before overlap started
+      audioPrefixDuration: 0.5,
+      // How often to run inference during overlap
+      detectionInterval: 0.1,
+    });
+
+    // Listen for interruption events on the detector (optional - for logging/metrics)
+    interruptionDetector.on('interruptionDetected', () => {
+      logger.info('Interruption detected via detector event');
+    });
+
+    interruptionDetector.on('overlapSpeechDetected', () => {
+      logger.debug('Overlap speech ended without interruption (backchannel)');
+    });
+
+    // Create the agent
+    const agent = new voice.Agent({
+      instructions: `You are a helpful assistant that demonstrates interruption detection.
+        Speak naturally and respond to the user. When you are interrupted,
+        you will stop speaking and listen to the user.`,
+    });
+
+    // Create the session with interruption detection enabled
+    // The detector is passed to AgentSession which wires it through to AudioRecognition
+    const session = new voice.AgentSession({
+      llm: 'openai/gpt-4.1-mini',
+      stt: 'deepgram/nova-3',
+      tts: 'cartesia/sonic-2:c45bc5ec-dc68-4feb-8829-6e6b2748095d',
+      vad,
+      // Pass the interruption detector
+      interruptionDetector,
+    });
+
+    // Start the session
+    await session.start({
+      agent,
+      room: ctx.room,
+    });
+
+    // // Example: Dynamically adjust threshold based on context
+    // // This could be useful to adapt to different conversation styles
+    // setTimeout(() => {
+    //   logger.info('Adjusting interruption threshold for more sensitive detection');
+    //   interruptionDetector.updateOptions({
+    //     threshold: 0.5, // More sensitive to interruptions
+    //     minInterruptionDuration: 0.03, // Detect shorter interruptions
+    //   });
+    // }, 30000);
+
+    session.say(
+      'Hello! I can detect when you want to interrupt me versus when you are just saying things like uh-huh or okay. Try talking while I am speaking to see how it works!',
+    );
+  },
+});
+
+cli.runApp(new WorkerOptions({ agent: fileURLToPath(import.meta.url) }));

From 0af5c0ace89376cbfc65e8a6b304632e453112d1 Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Tue, 20 Jan 2026 15:44:07 +0100
Subject: [PATCH 08/25] changeset

---
 .changeset/shiny-eels-throw.md | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 .changeset/shiny-eels-throw.md

diff --git a/.changeset/shiny-eels-throw.md b/.changeset/shiny-eels-throw.md
new file mode 100644
index 000000000..df3e21f67
--- /dev/null
+++ b/.changeset/shiny-eels-throw.md
@@ -0,0 +1,5 @@
+---
+'@livekit/agents': patch
+---
+
+barge in

From 094b1a0aeb52b5def043e6bf1e316ddf315ead41 Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Wed, 21 Jan 2026 09:42:48 +0100
Subject: [PATCH 09/25] local testing

---
 agents/src/inference/interruption/defaults.ts | 2 +-
 examples/src/adaptive_interruption.ts         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/agents/src/inference/interruption/defaults.ts b/agents/src/inference/interruption/defaults.ts
index e5e2ba6b3..2d6eeae3c 100644
--- a/agents/src/inference/interruption/defaults.ts
+++ b/agents/src/inference/interruption/defaults.ts
@@ -9,7 +9,7 @@ export const DETECTION_INTERVAL = 0.1;
 export const REMOTE_INFERENCE_TIMEOUT = 1.0;
 export const SAMPLE_RATE = 16000;
 export const FRAMES_PER_SECOND = 40;
-export const DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';
+export const DEFAULT_BASE_URL = 'http://localhost:8080';
 
 export const apiConnectDefaults: ApiConnectOptions = {
   maxRetries: 3,
diff --git a/examples/src/adaptive_interruption.ts b/examples/src/adaptive_interruption.ts
index 4b02e688b..b0c6906fe 100644
--- a/examples/src/adaptive_interruption.ts
+++ b/examples/src/adaptive_interruption.ts
@@ -60,7 +60,7 @@ export default defineAgent({
     });
 
     interruptionDetector.on('overlapSpeechDetected', () => {
-      logger.debug('Overlap speech ended without interruption (backchannel)');
+      logger.info('Overlap speech ended without interruption (backchannel)');
     });
 
     // Create the agent

From 732d7b4465f9fa6ab818d543352c42a0f0acbe8d Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Wed, 21 Jan 2026 16:19:58 +0100
Subject: [PATCH 10/25] smaller bugfixes

---
 .../AdaptiveInterruptionDetector.ts           | 42 ++++++++-----------
 .../interruption/InterruptionStream.ts        | 16 +++----
 .../inference/interruption/http_transport.ts  |  6 +--
 .../inference/interruption/interruption.ts    | 10 ++---
 agents/src/inference/utils.ts                 |  2 +-
 agents/src/voice/audio_recognition.ts         |  7 ++--
 examples/src/adaptive_interruption.ts         |  3 ++
 7 files changed, 41 insertions(+), 45 deletions(-)

diff --git a/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts b/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
index 89c2a7b0b..589a3ff64 100644
--- a/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
+++ b/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
@@ -1,6 +1,7 @@
 import type { TypedEventEmitter } from '@livekit/typed-emitter';
+import { log } from 'agents/src/log.js';
 import EventEmitter from 'events';
-import { type ReadableStream, TransformStream } from 'stream/web';
+import { TransformStream } from 'stream/web';
 import { InterruptionStreamBase } from './InterruptionStream.js';
 import {
   DEFAULT_BASE_URL,
@@ -129,30 +130,21 @@ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => Typ
    * Use this when you need direct access to the stream for pushing frames.
    */
   createStream(): InterruptionStreamBase {
-    const stream = new InterruptionStreamBase(this, {});
-    this.streams.add(stream);
-    return stream;
-  }
-
-  /**
-   * Creates a new interruption stream and returns a ReadableStream of InterruptionEvents.
-   * This is a convenience method for consuming interruption events without needing
-   * to manage the underlying stream directly.
-   */
-  stream(): ReadableStream<InterruptionEvent> {
-    const httpStream = this.createStream();
-    const transformer = new TransformStream<InterruptionEvent, InterruptionEvent>({
-      transform: (chunk, controller) => {
-        if (chunk.type === InterruptionEventType.INTERRUPTION) {
-          this.emit('interruptionDetected'); // TODO payload
-        } else if (chunk.type === InterruptionEventType.OVERLAP_SPEECH_ENDED) {
-          this.emit('overlapSpeechDetected'); // TODO payload
-        }
-        controller.enqueue(chunk);
-      },
-    });
-    const stream = httpStream.stream.pipeThrough(transformer);
-    return stream;
+    const streamBase = new InterruptionStreamBase(this, {});
+    this.streams.add(streamBase);
+    // const transformer = new TransformStream<InterruptionEvent, InterruptionEvent>({
+    //   transform: (chunk, controller) => {
+    //     log().info('adaptive interruption detection stream transformer', chunk);
+    //     if (chunk.type === InterruptionEventType.INTERRUPTION) {
+    //       this.emit('interruptionDetected'); // TODO payload
+    //     } else if (chunk.type === InterruptionEventType.OVERLAP_SPEECH_ENDED) {
+    //       this.emit('overlapSpeechDetected'); // TODO payload
+    //     }
+    //     controller.enqueue(chunk);
+    //   },
+    // });
+    // streamBase.stream().pipeThrough(transformer);
+    return streamBase;
   }
 
   updateOptions(options: { threshold?: number; minInterruptionDuration?: number }): void {
diff --git a/agents/src/inference/interruption/InterruptionStream.ts b/agents/src/inference/interruption/InterruptionStream.ts
index fc1d5333e..fab9f9a97 100644
--- a/agents/src/inference/interruption/InterruptionStream.ts
+++ b/agents/src/inference/interruption/InterruptionStream.ts
@@ -1,9 +1,9 @@
 import { AudioFrame, AudioResampler } from '@livekit/rtc-node';
 import type { Span } from '@opentelemetry/api';
-import { traceTypes } from '../../telemetry/index.js';
 import { type ReadableStream, TransformStream, WritableStream } from 'stream/web';
 import { log } from '../../log.js';
 import { type StreamChannel, createStreamChannel } from '../../stream/stream_channel.js';
+import { traceTypes } from '../../telemetry/index.js';
 import { createAccessToken } from '../utils.js';
 import type {
   AdaptiveInterruptionDetector,
@@ -164,7 +164,7 @@ export class InterruptionStreamBase {
               controller.enqueue(audioSlice);
             }
           } else if (chunk.type === 'agent-speech-started') {
-            log().debug('agent speech started');
+            log().info('agent speech started');
 
             agentSpeechStarted = true;
             overlapSpeechStarted = false;
@@ -172,7 +172,7 @@ export class InterruptionStreamBase {
             startIdx = 0;
             cache.clear();
           } else if (chunk.type === 'agent-speech-ended') {
-            log().debug('agent speech ended');
+            log().info('agent speech ended');
 
             agentSpeechStarted = false;
             overlapSpeechStarted = false;
@@ -181,7 +181,7 @@ export class InterruptionStreamBase {
             cache.clear();
           } else if (chunk.type === 'overlap-speech-started' && agentSpeechStarted) {
             this.userSpeakingSpan = chunk.userSpeakingSpan;
-            log().debug('overlap speech started, starting interruption inference');
+            log().info('overlap speech started, starting interruption inference');
             overlapSpeechStarted = true;
             accumulatedSamples = 0;
             // Include both speech duration and audio prefix duration for context
@@ -197,7 +197,7 @@ export class InterruptionStreamBase {
             startIdx = shiftSize;
             cache.clear();
           } else if (chunk.type === 'overlap-speech-ended') {
-            log().debug('overlap speech ended');
+            log().info('overlap speech ended');
 
             if (overlapSpeechStarted) {
               this.userSpeakingSpan = undefined;
@@ -248,14 +248,13 @@ export class InterruptionStreamBase {
               token: await createAccessToken(this.options.apiKey, this.options.apiSecret),
             },
           );
-          console.log('received inference response', resp);
           const { createdAt, isBargein, probabilities, predictionDuration } = resp;
           const entry = new InterruptionCacheEntry({
             createdAt,
             probabilities,
             isInterruption: isBargein,
             speechInput: chunk,
-            totalDuration: (performance.now() - createdAt) / 1e9,
+            totalDuration: (performance.now() - createdAt) / 1000,
             detectionDelay: Date.now() - this.overlapSpeechStartedAt,
             predictionDuration,
           });
@@ -276,6 +275,7 @@ export class InterruptionStreamBase {
               detectionDelay: entry.detectionDelay,
               probability: entry.probability,
             };
+            log().info(`emitting interruption event: ${event.type}`);
             this.eventStream.write(event);
           }
         },
@@ -309,7 +309,7 @@ export class InterruptionStreamBase {
     return this.resampler;
   }
 
-  get stream(): ReadableStream<InterruptionEvent> {
+  stream(): ReadableStream<InterruptionEvent> {
     return this.eventStream.stream();
   }
 
diff --git a/agents/src/inference/interruption/http_transport.ts b/agents/src/inference/interruption/http_transport.ts
index c1f22a569..fc131ed4f 100644
--- a/agents/src/inference/interruption/http_transport.ts
+++ b/agents/src/inference/interruption/http_transport.ts
@@ -21,7 +21,7 @@ export interface PredictEndpointResponse {
 export interface PredictResponse {
   createdAt: number;
   isBargein: boolean;
-  probabilities: Float32Array;
+  probabilities: number[];
   predictionDuration: number;
 }
 
@@ -55,7 +55,7 @@ export async function predictHTTP(
   return {
     createdAt: created_at,
     isBargein: is_bargein,
-    probabilities: new Float32Array(probabilities),
-    predictionDuration: (performance.now() - createdAt) / 1e9,
+    probabilities,
+    predictionDuration: (performance.now() - createdAt) / 1000,
   };
 }
diff --git a/agents/src/inference/interruption/interruption.ts b/agents/src/inference/interruption/interruption.ts
index e415f6d98..52783c895 100644
--- a/agents/src/inference/interruption/interruption.ts
+++ b/agents/src/inference/interruption/interruption.ts
@@ -14,7 +14,7 @@ export interface InterruptionEvent {
   detectionDelay: number;
   overlapSpeechStartedAt?: number;
   speechInput?: Int16Array;
-  probabilities?: Float32Array;
+  probabilities?: number[];
   probability: number;
 }
 
@@ -39,7 +39,7 @@ export class InterruptionDetectionError extends Error {
 }
 
 function estimateProbability(
-  probabilities: Float32Array,
+  probabilities: number[],
   windowSize: number = MIN_INTERRUPTION_DURATION,
 ): number {
   const minWindow = Math.ceil(windowSize / 0.025); // 25ms per frame
@@ -47,7 +47,7 @@ function estimateProbability(
     return 0;
   }
 
-  return slidingWindowMinMax(probabilities, windowSize);
+  return slidingWindowMinMax(probabilities, minWindow);
 }
 
 /**
@@ -59,7 +59,7 @@ export class InterruptionCacheEntry {
   readonly predictionDuration: number;
   readonly detectionDelay: number;
   readonly speechInput?: Int16Array;
-  readonly probabilities?: Float32Array;
+  readonly probabilities?: number[];
   readonly isInterruption?: boolean;
   readonly probability: number;
 
@@ -69,7 +69,7 @@ export class InterruptionCacheEntry {
     totalDuration?: number;
     predictionDuration?: number;
     detectionDelay?: number;
-    probabilities?: Float32Array;
+    probabilities?: number[];
     isInterruption?: boolean;
   }) {
     this.createdAt = params.createdAt;
diff --git a/agents/src/inference/utils.ts b/agents/src/inference/utils.ts
index 38c9faa5f..e898d4de1 100644
--- a/agents/src/inference/utils.ts
+++ b/agents/src/inference/utils.ts
@@ -65,7 +65,7 @@ export async function connectWs(
   });
 }
 
-export function slidingWindowMinMax(probabilities: Float32Array, minWindow: number): number {
+export function slidingWindowMinMax(probabilities: number[], minWindow: number): number {
   if (probabilities.length < minWindow) {
     return -Infinity;
   }
diff --git a/agents/src/voice/audio_recognition.ts b/agents/src/voice/audio_recognition.ts
index 8206cffe7..24c109891 100644
--- a/agents/src/voice/audio_recognition.ts
+++ b/agents/src/voice/audio_recognition.ts
@@ -609,7 +609,8 @@ export class AudioRecognition {
             }
 
             // If agent is speaking, user speech is overlap - trigger interruption detection
-            if (this.agentSpeaking && this.interruptionEnabled) {
+            if (this.agentSpeaking) {
+              // TODO re-enable check for this.interruptionEnabled
               this.onStartOfOverlapSpeech(ev.speechDuration, this.userTurnSpan);
             }
 
@@ -682,7 +683,7 @@ export class AudioRecognition {
     })();
 
     // Read interruption events from the stream
-    const eventStream = this.interruptionStream.stream;
+    const eventStream = this.interruptionStream.stream();
     const eventReader = eventStream.getReader();
 
     const abortHandler = () => {
@@ -697,7 +698,7 @@ export class AudioRecognition {
         const { done, value: ev } = await eventReader.read();
         if (done) break;
 
-        this.logger.debug({ type: ev.type, probability: ev.probability }, 'Interruption event');
+        this.logger.info({ type: ev.type, probability: ev.probability }, 'Interruption event');
         this.hooks.onInterruption(ev);
       }
     } catch (e) {
diff --git a/examples/src/adaptive_interruption.ts b/examples/src/adaptive_interruption.ts
index b0c6906fe..6e6700f58 100644
--- a/examples/src/adaptive_interruption.ts
+++ b/examples/src/adaptive_interruption.ts
@@ -79,6 +79,9 @@ export default defineAgent({
       vad,
       // Pass the interruption detector
       interruptionDetector,
+      voiceOptions: {
+        allowInterruptions: false,
+      },
     });
 
     // Start the session

From d52d8afa1239dd588bd391d21b0272b9309bfe61 Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Wed, 21 Jan 2026 17:38:14 +0100
Subject: [PATCH 11/25] more bug fixes and back pressure

---
 .../interruption/InterruptionStream.ts        | 80 +++++++++++++------
 1 file changed, 55 insertions(+), 25 deletions(-)

diff --git a/agents/src/inference/interruption/InterruptionStream.ts b/agents/src/inference/interruption/InterruptionStream.ts
index fab9f9a97..087a55abe 100644
--- a/agents/src/inference/interruption/InterruptionStream.ts
+++ b/agents/src/inference/interruption/InterruptionStream.ts
@@ -1,6 +1,6 @@
 import { AudioFrame, AudioResampler } from '@livekit/rtc-node';
 import type { Span } from '@opentelemetry/api';
-import { type ReadableStream, TransformStream, WritableStream } from 'stream/web';
+import { type ReadableStream, TransformStream } from 'stream/web';
 import { log } from '../../log.js';
 import { type StreamChannel, createStreamChannel } from '../../stream/stream_channel.js';
 import { traceTypes } from '../../telemetry/index.js';
@@ -92,7 +92,7 @@ function updateUserSpeakingSpan(span: Span, entry: InterruptionCacheEntry) {
 export class InterruptionStreamBase {
   private inputStream: StreamChannel<InterruptionSentinel | AudioFrame, InterruptionDetectionError>;
 
-  private eventStream: StreamChannel<InterruptionEvent, InterruptionDetectionError>;
+  private eventStream: ReadableStream<InterruptionEvent>;
 
   private resampler?: AudioResampler;
 
@@ -112,16 +112,14 @@ export class InterruptionStreamBase {
       InterruptionDetectionError
     >();
 
-    this.eventStream = createStreamChannel<InterruptionEvent, InterruptionDetectionError>();
-
     this.model = model;
     this.options = model.options;
     this.apiOptions = { ...apiConnectDefaults, ...apiOptions };
 
-    this.setupTransform();
+    this.eventStream = this.setupTransform();
   }
 
-  private setupTransform() {
+  private setupTransform(): ReadableStream<InterruptionEvent> {
     let agentSpeechStarted = false;
     let startIdx = 0;
     let accumulatedSamples = 0;
@@ -131,7 +129,11 @@ export class InterruptionStreamBase {
       Math.ceil(this.options.maxAudioDuration * this.options.sampleRate),
     ).fill(0);
 
-    const transformer = new TransformStream<InterruptionSentinel | AudioFrame, Int16Array>(
+    // First transform: process input frames/sentinels and output audio slices or events
+    const audioTransformer = new TransformStream<
+      InterruptionSentinel | AudioFrame,
+      Int16Array | InterruptionEvent
+    >(
       {
         transform: (chunk, controller) => {
           if (chunk instanceof AudioFrame) {
@@ -159,8 +161,13 @@ export class InterruptionStreamBase {
             ) {
               // Send a copy of the audio data up to startIdx for inference
               const audioSlice = inferenceS16Data.slice(0, startIdx);
-              // TODO: send to data channel - dataChan.send(audioSlice);
               accumulatedSamples = 0;
+              const sinceOverlapStart = this.overlapSpeechStartedAt
+                ? Date.now() - this.overlapSpeechStartedAt
+                : 0;
+              log().info(
+                `audioTransformer: enqueuing audio slice for inference, ${sinceOverlapStart}ms since overlap start, ${audioSlice.length} samples`,
+              );
               controller.enqueue(audioSlice);
             }
           } else if (chunk.type === 'agent-speech-started') {
@@ -220,7 +227,8 @@ export class InterruptionStreamBase {
                 predictionDuration: latestEntry.predictionDuration,
                 probability: latestEntry.probability,
               };
-              this.eventStream.write(event);
+              controller.enqueue(event);
+              overlapSpeechStarted = false;
             }
           } else if (chunk.type === 'flush') {
             log().debug('flushing');
@@ -228,17 +236,31 @@ export class InterruptionStreamBase {
           }
         },
       },
-      { highWaterMark: Number.MAX_SAFE_INTEGER },
-      { highWaterMark: Number.MAX_SAFE_INTEGER },
+      { highWaterMark: 32 },
+      { highWaterMark: 32 },
     );
 
-    const httpPostWriter = new WritableStream<Int16Array>(
+    // Second transform: HTTP transport - converts audio slices to events, passes through existing events
+    const httpTransport = new TransformStream<Int16Array | InterruptionEvent, InterruptionEvent>(
       {
-        // Implement the sink
-        write: async (chunk) => {
+        transform: async (chunk, controller) => {
+          // Pass through InterruptionEvents unchanged
+          if (!(chunk instanceof Int16Array)) {
+            log().info(
+              `httpTransport: passing through event type=${chunk.type}, detectionDelay=${chunk.detectionDelay}ms`,
+            );
+            controller.enqueue(chunk);
+            return;
+          }
+
           if (!this.overlapSpeechStartedAt) {
             return;
           }
+          const httpStartTime = Date.now();
+          const sinceOverlapStart = httpStartTime - this.overlapSpeechStartedAt;
+          log().info(
+            `httpTransport: starting HTTP prediction, ${sinceOverlapStart}ms since overlap start`,
+          );
           const resp = await predictHTTP(
             chunk,
             { threshold: this.options.threshold, minFrames: this.options.minFrames },
@@ -248,7 +270,11 @@ export class InterruptionStreamBase {
               token: await createAccessToken(this.options.apiKey, this.options.apiSecret),
             },
           );
+          const httpDuration = Date.now() - httpStartTime;
           const { createdAt, isBargein, probabilities, predictionDuration } = resp;
+          log().info(
+            `httpTransport: HTTP prediction completed in ${httpDuration}ms, isBargein=${isBargein}, predictionDuration=${predictionDuration}ms`,
+          );
           const entry = new InterruptionCacheEntry({
             createdAt,
             probabilities,
@@ -275,21 +301,20 @@ export class InterruptionStreamBase {
               detectionDelay: entry.detectionDelay,
               probability: entry.probability,
             };
-            log().info(`emitting interruption event: ${event.type}`);
-            this.eventStream.write(event);
+            log().info(
+              `httpTransport: emitting interruption event, detectionDelay=${entry.detectionDelay}ms, totalDuration=${(entry.totalDuration * 1000).toFixed(0)}ms`,
+            );
+            overlapSpeechStarted = false;
+            controller.enqueue(event);
           }
         },
-        close() {
-          console.log('closing http writer');
-        },
-        abort(err) {
-          console.log('Sink error:', err);
-        },
       },
-      { highWaterMark: Number.MAX_SAFE_INTEGER },
+      { highWaterMark: 2 },
+      { highWaterMark: 2 },
     );
 
-    this.inputStream.stream().pipeThrough(transformer).pipeTo(httpPostWriter);
+    // Pipeline: input -> audioTransformer -> httpTransport -> eventStream
+    return this.inputStream.stream().pipeThrough(audioTransformer).pipeThrough(httpTransport);
   }
 
   private ensureInputNotEnded() {
@@ -310,7 +335,7 @@ export class InterruptionStreamBase {
   }
 
   stream(): ReadableStream<InterruptionEvent> {
-    return this.eventStream.stream();
+    return this.eventStream;
   }
 
   async pushFrame(frame: InterruptionSentinel | AudioFrame): Promise<void> {
@@ -318,6 +343,11 @@ export class InterruptionStreamBase {
     if (!(frame instanceof AudioFrame)) {
       if (frame.type === 'overlap-speech-started') {
         this.overlapSpeechStartedAt = Date.now() - frame.speechDuration;
+        log().info(
+          `pushFrame: overlap-speech-started, speechDuration=${frame.speechDuration}ms, overlapSpeechStartedAt set to ${this.overlapSpeechStartedAt}`,
+        );
+      } else {
+        log().info(`pushFrame: sentinel type=${frame.type}`);
       }
       return this.inputStream.write(frame);
     } else if (this.options.sampleRate !== frame.sampleRate) {

From 4c4dbc87f1dc66ef8af791173b67f5e9f3a174a4 Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Wed, 21 Jan 2026 19:25:45 +0100
Subject: [PATCH 12/25] better logging

---
 .../AdaptiveInterruptionDetector.ts           | 35 +++++++-------
 .../interruption/InterruptionStream.ts        | 46 +++++--------------
 2 files changed, 27 insertions(+), 54 deletions(-)

diff --git a/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts b/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
index 589a3ff64..133387156 100644
--- a/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
+++ b/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
@@ -1,7 +1,6 @@
 import type { TypedEventEmitter } from '@livekit/typed-emitter';
-import { log } from 'agents/src/log.js';
 import EventEmitter from 'events';
-import { TransformStream } from 'stream/web';
+import { log } from '../../log.js';
 import { InterruptionStreamBase } from './InterruptionStream.js';
 import {
   DEFAULT_BASE_URL,
@@ -9,11 +8,7 @@ import {
   SAMPLE_RATE,
   interruptionOptionDefaults,
 } from './defaults.js';
-import {
-  type InterruptionDetectionError,
-  type InterruptionEvent,
-  InterruptionEventType,
-} from './interruption.js';
+import { type InterruptionDetectionError } from './interruption.js';
 
 type InterruptionCallbacks = {
   interruptionDetected: () => void;
@@ -40,7 +35,7 @@ export type AdaptiveInterruptionDetectorOptions = Partial<InterruptionOptions>;
 
 export class AdaptiveInterruptionDetector extends (EventEmitter as new () => TypedEventEmitter<InterruptionCallbacks>) {
   options: InterruptionOptions;
-  private label: string;
+  private logger = log();
   private streams: WeakSet<object>; // TODO: Union of InterruptionHttpStream | InterruptionWebSocketStream
 
   constructor(options: AdaptiveInterruptionDetectorOptions = {}) {
@@ -109,19 +104,21 @@ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => Typ
       minInterruptionDuration,
     };
 
-    this.label = `${this.constructor.name}`;
     this.streams = new WeakSet();
 
-    console.info('adaptive interruption detector initialized', {
-      baseUrl: this.options.baseUrl,
-      detectionInterval: this.options.detectionInterval,
-      audioPrefixDuration: this.options.audioPrefixDuration,
-      maxAudioDuration: this.options.maxAudioDuration,
-      minFrames: this.options.minFrames,
-      threshold: this.options.threshold,
-      inferenceTimeout: this.options.inferenceTimeout,
-      useProxy: this.options.useProxy,
-    });
+    this.logger.debug(
+      {
+        baseUrl: this.options.baseUrl,
+        detectionInterval: this.options.detectionInterval,
+        audioPrefixDuration: this.options.audioPrefixDuration,
+        maxAudioDuration: this.options.maxAudioDuration,
+        minFrames: this.options.minFrames,
+        threshold: this.options.threshold,
+        inferenceTimeout: this.options.inferenceTimeout,
+        useProxy: this.options.useProxy,
+      },
+      'adaptive interruption detector initialized',
+    );
   }
 
   /**
diff --git a/agents/src/inference/interruption/InterruptionStream.ts b/agents/src/inference/interruption/InterruptionStream.ts
index 087a55abe..76e2b6ec2 100644
--- a/agents/src/inference/interruption/InterruptionStream.ts
+++ b/agents/src/inference/interruption/InterruptionStream.ts
@@ -106,6 +106,8 @@ export class InterruptionStreamBase {
 
   private model: AdaptiveInterruptionDetector;
 
+  private logger = log();
+
   constructor(model: AdaptiveInterruptionDetector, apiOptions: Partial<ApiConnectOptions>) {
     this.inputStream = createStreamChannel<
       InterruptionSentinel | AudioFrame,
@@ -162,25 +164,17 @@ export class InterruptionStreamBase {
               // Send a copy of the audio data up to startIdx for inference
               const audioSlice = inferenceS16Data.slice(0, startIdx);
               accumulatedSamples = 0;
-              const sinceOverlapStart = this.overlapSpeechStartedAt
-                ? Date.now() - this.overlapSpeechStartedAt
-                : 0;
-              log().info(
-                `audioTransformer: enqueuing audio slice for inference, ${sinceOverlapStart}ms since overlap start, ${audioSlice.length} samples`,
-              );
               controller.enqueue(audioSlice);
             }
           } else if (chunk.type === 'agent-speech-started') {
-            log().info('agent speech started');
-
+            this.logger.debug('agent speech started');
             agentSpeechStarted = true;
             overlapSpeechStarted = false;
             accumulatedSamples = 0;
             startIdx = 0;
             cache.clear();
           } else if (chunk.type === 'agent-speech-ended') {
-            log().info('agent speech ended');
-
+            this.logger.debug('agent speech ended');
             agentSpeechStarted = false;
             overlapSpeechStarted = false;
             accumulatedSamples = 0;
@@ -188,7 +182,7 @@ export class InterruptionStreamBase {
             cache.clear();
           } else if (chunk.type === 'overlap-speech-started' && agentSpeechStarted) {
             this.userSpeakingSpan = chunk.userSpeakingSpan;
-            log().info('overlap speech started, starting interruption inference');
+            this.logger.debug('overlap speech started, starting interruption inference');
             overlapSpeechStarted = true;
             accumulatedSamples = 0;
             // Include both speech duration and audio prefix duration for context
@@ -204,13 +198,12 @@ export class InterruptionStreamBase {
             startIdx = shiftSize;
             cache.clear();
           } else if (chunk.type === 'overlap-speech-ended') {
-            log().info('overlap speech ended');
-
+            this.logger.debug('overlap speech ended');
             if (overlapSpeechStarted) {
               this.userSpeakingSpan = undefined;
               let latestEntry = Array.from(cache.values()).at(-1);
               if (!latestEntry) {
-                log().debug('no request made for overlap speech');
+                this.logger.debug('no request made for overlap speech');
                 latestEntry = InterruptionCacheEntry.default();
               } else {
                 cache.delete(latestEntry.createdAt);
@@ -231,8 +224,7 @@ export class InterruptionStreamBase {
               overlapSpeechStarted = false;
             }
           } else if (chunk.type === 'flush') {
-            log().debug('flushing');
-            // do nothing
+            // no-op
           }
         },
       },
@@ -246,9 +238,6 @@ export class InterruptionStreamBase {
         transform: async (chunk, controller) => {
           // Pass through InterruptionEvents unchanged
           if (!(chunk instanceof Int16Array)) {
-            log().info(
-              `httpTransport: passing through event type=${chunk.type}, detectionDelay=${chunk.detectionDelay}ms`,
-            );
             controller.enqueue(chunk);
             return;
           }
@@ -256,11 +245,6 @@ export class InterruptionStreamBase {
           if (!this.overlapSpeechStartedAt) {
             return;
           }
-          const httpStartTime = Date.now();
-          const sinceOverlapStart = httpStartTime - this.overlapSpeechStartedAt;
-          log().info(
-            `httpTransport: starting HTTP prediction, ${sinceOverlapStart}ms since overlap start`,
-          );
           const resp = await predictHTTP(
             chunk,
             { threshold: this.options.threshold, minFrames: this.options.minFrames },
@@ -270,11 +254,7 @@ export class InterruptionStreamBase {
               token: await createAccessToken(this.options.apiKey, this.options.apiSecret),
             },
           );
-          const httpDuration = Date.now() - httpStartTime;
           const { createdAt, isBargein, probabilities, predictionDuration } = resp;
-          log().info(
-            `httpTransport: HTTP prediction completed in ${httpDuration}ms, isBargein=${isBargein}, predictionDuration=${predictionDuration}ms`,
-          );
           const entry = new InterruptionCacheEntry({
             createdAt,
             probabilities,
@@ -301,8 +281,9 @@ export class InterruptionStreamBase {
               detectionDelay: entry.detectionDelay,
               probability: entry.probability,
             };
-            log().info(
-              `httpTransport: emitting interruption event, detectionDelay=${entry.detectionDelay}ms, totalDuration=${(entry.totalDuration * 1000).toFixed(0)}ms`,
+            this.logger.debug(
+              { detectionDelay: entry.detectionDelay, totalDuration: entry.totalDuration },
+              'interruption detected',
             );
             overlapSpeechStarted = false;
             controller.enqueue(event);
@@ -343,11 +324,6 @@ export class InterruptionStreamBase {
     if (!(frame instanceof AudioFrame)) {
       if (frame.type === 'overlap-speech-started') {
         this.overlapSpeechStartedAt = Date.now() - frame.speechDuration;
-        log().info(
-          `pushFrame: overlap-speech-started, speechDuration=${frame.speechDuration}ms, overlapSpeechStartedAt set to ${this.overlapSpeechStartedAt}`,
-        );
-      } else {
-        log().info(`pushFrame: sentinel type=${frame.type}`);
       }
       return this.inputStream.write(frame);
     } else if (this.options.sampleRate !== frame.sampleRate) {

From c27f8dcd51da731ca85f9f9b5e0eb256847f0a71 Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Wed, 21 Jan 2026 20:13:19 +0100
Subject: [PATCH 13/25] refactor and update naming

---
 .../AdaptiveInterruptionDetector.ts           |  42 +--
 .../interruption/InterruptionStream.ts        | 123 +++----
 agents/src/inference/interruption/defaults.ts |  21 +-
 .../inference/interruption/http_transport.ts  | 118 +++++-
 .../inference/interruption/interruption.ts    |   6 +-
 .../interruption}/ws_transport.test.ts        |   0
 .../inference/interruption/ws_transport.ts    | 346 ++++++++++++++++++
 agents/src/utils/ws_transport.ts              |  22 --
 8 files changed, 542 insertions(+), 136 deletions(-)
 rename agents/src/{utils => inference/interruption}/ws_transport.test.ts (100%)
 create mode 100644 agents/src/inference/interruption/ws_transport.ts
 delete mode 100644 agents/src/utils/ws_transport.ts

diff --git a/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts b/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
index 133387156..c20da246d 100644
--- a/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
+++ b/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
@@ -20,11 +20,11 @@ export interface InterruptionOptions {
   sampleRate: number;
   threshold: number;
   minFrames: number;
-  maxAudioDuration: number;
-  audioPrefixDuration: number;
-  detectionInterval: number;
+  maxAudioDurationInS: number;
+  audioPrefixDurationInS: number;
+  detectionIntervalInS: number;
   inferenceTimeout: number;
-  minInterruptionDuration: number;
+  minInterruptionDurationInS: number;
   baseUrl: string;
   apiKey: string;
   apiSecret: string;
@@ -42,20 +42,20 @@ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => Typ
     super();
 
     const {
-      maxAudioDuration,
+      maxAudioDurationInS,
       baseUrl,
       apiKey,
       apiSecret,
       useProxy: useProxyArg,
-      audioPrefixDuration,
+      audioPrefixDurationInS,
       threshold,
-      detectionInterval,
+      detectionIntervalInS,
       inferenceTimeout,
-      minInterruptionDuration,
+      minInterruptionDurationInS,
     } = { ...interruptionOptionDefaults, ...options };
 
-    if (maxAudioDuration > 3.0) {
-      throw new Error('maxAudioDuration must be less than or equal to 3.0 seconds');
+    if (maxAudioDurationInS > 3.0) {
+      throw new Error('maxAudioDurationInS must be less than or equal to 3.0 seconds');
     }
 
     const lkBaseUrl = baseUrl ?? process.env.LIVEKIT_REMOTE_EOT_URL ?? DEFAULT_BASE_URL;
@@ -92,16 +92,16 @@ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => Typ
     this.options = {
       sampleRate: SAMPLE_RATE,
       threshold,
-      minFrames: Math.ceil(minInterruptionDuration * FRAMES_PER_SECOND),
-      maxAudioDuration,
-      audioPrefixDuration,
-      detectionInterval,
+      minFrames: Math.ceil(minInterruptionDurationInS * FRAMES_PER_SECOND),
+      maxAudioDurationInS,
+      audioPrefixDurationInS,
+      detectionIntervalInS,
       inferenceTimeout,
       baseUrl: lkBaseUrl,
       apiKey: lkApiKey,
       apiSecret: lkApiSecret,
       useProxy,
-      minInterruptionDuration,
+      minInterruptionDurationInS,
     };
 
     this.streams = new WeakSet();
@@ -109,9 +109,9 @@ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => Typ
     this.logger.debug(
       {
         baseUrl: this.options.baseUrl,
-        detectionInterval: this.options.detectionInterval,
-        audioPrefixDuration: this.options.audioPrefixDuration,
-        maxAudioDuration: this.options.maxAudioDuration,
+        detectionIntervalInS: this.options.detectionIntervalInS,
+        audioPrefixDurationInS: this.options.audioPrefixDurationInS,
+        maxAudioDurationInS: this.options.maxAudioDurationInS,
         minFrames: this.options.minFrames,
         threshold: this.options.threshold,
         inferenceTimeout: this.options.inferenceTimeout,
@@ -144,12 +144,12 @@ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => Typ
     return streamBase;
   }
 
-  updateOptions(options: { threshold?: number; minInterruptionDuration?: number }): void {
+  updateOptions(options: { threshold?: number; minInterruptionDurationInS?: number }): void {
     if (options.threshold !== undefined) {
       this.options.threshold = options.threshold;
     }
-    if (options.minInterruptionDuration !== undefined) {
-      this.options.minFrames = Math.ceil(options.minInterruptionDuration * FRAMES_PER_SECOND);
+    if (options.minInterruptionDurationInS !== undefined) {
+      this.options.minFrames = Math.ceil(options.minInterruptionDurationInS * FRAMES_PER_SECOND);
     }
   }
 }
diff --git a/agents/src/inference/interruption/InterruptionStream.ts b/agents/src/inference/interruption/InterruptionStream.ts
index 76e2b6ec2..7071ebb00 100644
--- a/agents/src/inference/interruption/InterruptionStream.ts
+++ b/agents/src/inference/interruption/InterruptionStream.ts
@@ -4,19 +4,19 @@ import { type ReadableStream, TransformStream } from 'stream/web';
 import { log } from '../../log.js';
 import { type StreamChannel, createStreamChannel } from '../../stream/stream_channel.js';
 import { traceTypes } from '../../telemetry/index.js';
-import { createAccessToken } from '../utils.js';
 import type {
   AdaptiveInterruptionDetector,
   InterruptionOptions,
 } from './AdaptiveInterruptionDetector.js';
 import { apiConnectDefaults } from './defaults.js';
-import { predictHTTP } from './http_transport.js';
+import { createHttpTransport } from './http_transport.js';
 import {
   InterruptionCacheEntry,
   type InterruptionDetectionError,
   type InterruptionEvent,
   InterruptionEventType,
 } from './interruption.js';
+import { createWsTransport } from './ws_transport.js';
 
 export interface AgentSpeechStarted {
   type: 'agent-speech-started';
@@ -28,7 +28,7 @@ export interface AgentSpeechEnded {
 
 export interface OverlapSpeechStarted {
   type: 'overlap-speech-started';
-  speechDuration: number;
+  speechDurationInS: number;
   userSpeakingSpan: Span;
 }
 
@@ -57,10 +57,10 @@ export class InterruptionStreamSentinel {
   }
 
   static overlapSpeechStarted(
-    speechDuration: number,
+    speechDurationInS: number,
     userSpeakingSpan: Span,
   ): OverlapSpeechStarted {
-    return { type: 'overlap-speech-started', speechDuration, userSpeakingSpan };
+    return { type: 'overlap-speech-started', speechDurationInS, userSpeakingSpan };
   }
 
   static overlapSpeechEnded(): OverlapSpeechEnded {
@@ -126,11 +126,29 @@ export class InterruptionStreamBase {
     let startIdx = 0;
     let accumulatedSamples = 0;
     let overlapSpeechStarted = false;
-    const cache = new Map<number, InterruptionCacheEntry>(); // TODO limit cache size
+    const cache = new Map<number, InterruptionCacheEntry>();
     const inferenceS16Data = new Int16Array(
-      Math.ceil(this.options.maxAudioDuration * this.options.sampleRate),
+      Math.ceil(this.options.maxAudioDurationInS * this.options.sampleRate),
     ).fill(0);
 
+    // State accessors for transport
+    const getState = () => ({
+      overlapSpeechStarted,
+      overlapSpeechStartedAt: this.overlapSpeechStartedAt,
+      cache,
+    });
+    const setState = (partial: { overlapSpeechStarted?: boolean }) => {
+      if (partial.overlapSpeechStarted !== undefined) {
+        overlapSpeechStarted = partial.overlapSpeechStarted;
+      }
+    };
+    const handleSpanUpdate = (entry: InterruptionCacheEntry) => {
+      if (this.userSpeakingSpan) {
+        updateUserSpeakingSpan(this.userSpeakingSpan, entry);
+        this.userSpeakingSpan = undefined;
+      }
+    };
+
     // First transform: process input frames/sentinels and output audio slices or events
     const audioTransformer = new TransformStream<
       InterruptionSentinel | AudioFrame,
@@ -150,7 +168,7 @@ export class InterruptionStreamBase {
               chunk,
               startIdx,
               inferenceS16Data,
-              this.options.maxAudioDuration,
+              this.options.maxAudioDurationInS,
             );
             startIdx = result.startIdx;
             accumulatedSamples += result.samplesWritten;
@@ -158,7 +176,7 @@ export class InterruptionStreamBase {
             // Send data for inference when enough samples accumulated during overlap
             if (
               accumulatedSamples >=
-                Math.floor(this.options.detectionInterval * this.options.sampleRate) &&
+                Math.floor(this.options.detectionIntervalInS * this.options.sampleRate) &&
               overlapSpeechStarted
             ) {
               // Send a copy of the audio data up to startIdx for inference
@@ -188,8 +206,8 @@ export class InterruptionStreamBase {
             // Include both speech duration and audio prefix duration for context
             const shiftSize = Math.min(
               startIdx,
-              Math.round(chunk.speechDuration * this.options.sampleRate) +
-                Math.round(this.options.audioPrefixDuration * this.options.sampleRate),
+              Math.round(chunk.speechDurationInS * this.options.sampleRate) +
+                Math.round(this.options.audioPrefixDurationInS * this.options.sampleRate),
             );
             // Shift the buffer: copy the last `shiftSize` samples before startIdx
             // to the beginning of the buffer. This preserves recent audio context
@@ -232,70 +250,23 @@ export class InterruptionStreamBase {
       { highWaterMark: 32 },
     );
 
-    // Second transform: HTTP transport - converts audio slices to events, passes through existing events
-    const httpTransport = new TransformStream<Int16Array | InterruptionEvent, InterruptionEvent>(
-      {
-        transform: async (chunk, controller) => {
-          // Pass through InterruptionEvents unchanged
-          if (!(chunk instanceof Int16Array)) {
-            controller.enqueue(chunk);
-            return;
-          }
-
-          if (!this.overlapSpeechStartedAt) {
-            return;
-          }
-          const resp = await predictHTTP(
-            chunk,
-            { threshold: this.options.threshold, minFrames: this.options.minFrames },
-            {
-              baseUrl: this.options.baseUrl,
-              timeout: this.options.inferenceTimeout,
-              token: await createAccessToken(this.options.apiKey, this.options.apiSecret),
-            },
-          );
-          const { createdAt, isBargein, probabilities, predictionDuration } = resp;
-          const entry = new InterruptionCacheEntry({
-            createdAt,
-            probabilities,
-            isInterruption: isBargein,
-            speechInput: chunk,
-            totalDuration: (performance.now() - createdAt) / 1000,
-            detectionDelay: Date.now() - this.overlapSpeechStartedAt,
-            predictionDuration,
-          });
-          cache.set(createdAt, entry);
-          if (overlapSpeechStarted && entry.isInterruption) {
-            if (this.userSpeakingSpan) {
-              updateUserSpeakingSpan(this.userSpeakingSpan, entry);
-            }
-            const event: InterruptionEvent = {
-              type: InterruptionEventType.INTERRUPTION,
-              timestamp: Date.now(),
-              overlapSpeechStartedAt: this.overlapSpeechStartedAt,
-              isInterruption: entry.isInterruption,
-              speechInput: entry.speechInput,
-              probabilities: entry.probabilities,
-              totalDuration: entry.totalDuration,
-              predictionDuration: entry.predictionDuration,
-              detectionDelay: entry.detectionDelay,
-              probability: entry.probability,
-            };
-            this.logger.debug(
-              { detectionDelay: entry.detectionDelay, totalDuration: entry.totalDuration },
-              'interruption detected',
-            );
-            overlapSpeechStarted = false;
-            controller.enqueue(event);
-          }
-        },
-      },
-      { highWaterMark: 2 },
-      { highWaterMark: 2 },
-    );
-
-    // Pipeline: input -> audioTransformer -> httpTransport -> eventStream
-    return this.inputStream.stream().pipeThrough(audioTransformer).pipeThrough(httpTransport);
+    // Second transform: transport layer (HTTP or WebSocket based on useProxy)
+    const transportOptions = {
+      baseUrl: this.options.baseUrl,
+      apiKey: this.options.apiKey,
+      apiSecret: this.options.apiSecret,
+      sampleRate: this.options.sampleRate,
+      threshold: this.options.threshold,
+      minFrames: this.options.minFrames,
+      timeout: this.options.inferenceTimeout,
+    };
+
+    const transport = this.options.useProxy
+      ? createWsTransport(transportOptions, getState, setState, handleSpanUpdate)
+      : createHttpTransport(transportOptions, getState, setState, handleSpanUpdate);
+
+    // Pipeline: input -> audioTransformer -> transport -> eventStream
+    return this.inputStream.stream().pipeThrough(audioTransformer).pipeThrough(transport);
   }
 
   private ensureInputNotEnded() {
@@ -323,7 +294,7 @@ export class InterruptionStreamBase {
     this.ensureStreamsNotEnded();
     if (!(frame instanceof AudioFrame)) {
       if (frame.type === 'overlap-speech-started') {
-        this.overlapSpeechStartedAt = Date.now() - frame.speechDuration;
+        this.overlapSpeechStartedAt = Date.now() - frame.speechDurationInS * 1000;
       }
       return this.inputStream.write(frame);
     } else if (this.options.sampleRate !== frame.sampleRate) {
diff --git a/agents/src/inference/interruption/defaults.ts b/agents/src/inference/interruption/defaults.ts
index 2d6eeae3c..60529b7d2 100644
--- a/agents/src/inference/interruption/defaults.ts
+++ b/agents/src/inference/interruption/defaults.ts
@@ -1,14 +1,15 @@
 import type { InterruptionOptions } from './AdaptiveInterruptionDetector.js';
 import type { ApiConnectOptions } from './InterruptionStream.js';
 
-export const MIN_INTERRUPTION_DURATION = 0.025 * 2; // 25ms per frame, 2 consecutive frames
+export const MIN_INTERRUPTION_DURATION_IN_S = 0.025 * 2; // 25ms per frame, 2 consecutive frames
 export const THRESHOLD = 0.65;
-export const MAX_AUDIO_DURATION = 3.0;
-export const AUDIO_PREFIX_DURATION = 0.5;
-export const DETECTION_INTERVAL = 0.1;
-export const REMOTE_INFERENCE_TIMEOUT = 1.0;
+export const MAX_AUDIO_DURATION_IN_S = 3.0;
+export const AUDIO_PREFIX_DURATION_IN_S = 0.5;
+export const DETECTION_INTERVAL_IN_S = 0.1;
+export const REMOTE_INFERENCE_TIMEOUT_IN_S = 1.0;
 export const SAMPLE_RATE = 16000;
 export const FRAMES_PER_SECOND = 40;
+export const FRAME_DURATION_IN_S = 0.025; // 25ms per frame
 export const DEFAULT_BASE_URL = 'http://localhost:8080';
 
 export const apiConnectDefaults: ApiConnectOptions = {
@@ -20,14 +21,14 @@ export const apiConnectDefaults: ApiConnectOptions = {
 export const interruptionOptionDefaults: InterruptionOptions = {
   sampleRate: SAMPLE_RATE,
   threshold: THRESHOLD,
-  minFrames: Math.ceil(MIN_INTERRUPTION_DURATION * FRAMES_PER_SECOND),
-  maxAudioDuration: MAX_AUDIO_DURATION,
-  audioPrefixDuration: AUDIO_PREFIX_DURATION,
-  detectionInterval: DETECTION_INTERVAL,
+  minFrames: Math.ceil(MIN_INTERRUPTION_DURATION_IN_S * FRAMES_PER_SECOND),
+  maxAudioDurationInS: MAX_AUDIO_DURATION_IN_S,
+  audioPrefixDurationInS: AUDIO_PREFIX_DURATION_IN_S,
+  detectionIntervalInS: DETECTION_INTERVAL_IN_S,
   inferenceTimeout: 10_000,
   baseUrl: DEFAULT_BASE_URL,
   apiKey: process.env.LIVEKIT_API_KEY || '',
   apiSecret: process.env.LIVEKIT_API_SECRET || '',
   useProxy: false,
-  minInterruptionDuration: MIN_INTERRUPTION_DURATION,
+  minInterruptionDurationInS: MIN_INTERRUPTION_DURATION_IN_S,
 } as const;
diff --git a/agents/src/inference/interruption/http_transport.ts b/agents/src/inference/interruption/http_transport.ts
index fc131ed4f..82f9726b6 100644
--- a/agents/src/inference/interruption/http_transport.ts
+++ b/agents/src/inference/interruption/http_transport.ts
@@ -1,4 +1,13 @@
 import { ofetch } from 'ofetch';
+import { TransformStream } from 'stream/web';
+import { log } from '../../log.js';
+import { createAccessToken } from '../utils.js';
+import type { ApiConnectOptions } from './InterruptionStream.js';
+import {
+  InterruptionCacheEntry,
+  type InterruptionEvent,
+  InterruptionEventType,
+} from './interruption.js';
 
 export interface PostOptions {
   baseUrl: string;
@@ -22,13 +31,14 @@ export interface PredictResponse {
   createdAt: number;
   isBargein: boolean;
   probabilities: number[];
-  predictionDuration: number;
+  predictionDurationInS: number;
 }
 
 export async function predictHTTP(
   data: Int16Array,
   predictOptions: PredictOptions,
   options: PostOptions,
+  apiOptions: ApiConnectOptions,
 ): Promise<PredictResponse> {
   const createdAt = performance.now();
   const url = new URL(`/bargein`, options.baseUrl);
@@ -39,8 +49,11 @@ export async function predictHTTP(
   const { created_at, is_bargein, probabilities } = await ofetch<PredictEndpointResponse>(
     url.toString(),
     {
-      retry: 1,
-      retryDelay: 100,
+      retry: apiOptions.maxRetries,
+      retryDelay: () => {
+        // TODO backoff
+        return apiOptions.retryInterval;
+      },
       headers: {
         'Content-Type': 'application/octet-stream',
         Authorization: `Bearer ${options.token}`,
@@ -56,6 +69,103 @@ export async function predictHTTP(
     createdAt: created_at,
     isBargein: is_bargein,
     probabilities,
-    predictionDuration: (performance.now() - createdAt) / 1000,
+    predictionDurationInS: (performance.now() - createdAt) / 1000,
   };
 }
+
+export interface HttpTransportOptions {
+  baseUrl: string;
+  apiKey: string;
+  apiSecret: string;
+  threshold: number;
+  minFrames: number;
+  timeout: number;
+}
+
+export interface HttpTransportState {
+  overlapSpeechStarted: boolean;
+  overlapSpeechStartedAt: number | undefined;
+  cache: Map<number, InterruptionCacheEntry>;
+}
+
+/**
+ * Creates an HTTP transport TransformStream for interruption detection.
+ *
+ * This transport receives Int16Array audio slices and outputs InterruptionEvents.
+ * Each audio slice triggers an HTTP POST request.
+ */
+export function createHttpTransport(
+  options: HttpTransportOptions,
+  getState: () => HttpTransportState,
+  setState: (partial: Partial<HttpTransportState>) => void,
+  updateUserSpeakingSpan?: (entry: InterruptionCacheEntry) => void,
+): TransformStream<Int16Array | InterruptionEvent, InterruptionEvent> {
+  const logger = log();
+
+  return new TransformStream<Int16Array | InterruptionEvent, InterruptionEvent>(
+    {
+      async transform(chunk, controller) {
+        // Pass through InterruptionEvents unchanged
+        if (!(chunk instanceof Int16Array)) {
+          controller.enqueue(chunk);
+          return;
+        }
+
+        const state = getState();
+        if (!state.overlapSpeechStartedAt) return;
+
+        try {
+          const resp = await predictHTTP(
+            chunk,
+            { threshold: options.threshold, minFrames: options.minFrames },
+            {
+              baseUrl: options.baseUrl,
+              timeout: options.timeout,
+              token: await createAccessToken(options.apiKey, options.apiSecret),
+            },
+          );
+
+          const { createdAt, isBargein, probabilities, predictionDurationInS } = resp;
+          const entry = new InterruptionCacheEntry({
+            createdAt,
+            probabilities,
+            isInterruption: isBargein,
+            speechInput: chunk,
+            totalDuration: (performance.now() - createdAt) / 1000,
+            detectionDelay: Date.now() - state.overlapSpeechStartedAt,
+            predictionDuration: predictionDurationInS,
+          });
+          state.cache.set(createdAt, entry);
+
+          if (state.overlapSpeechStarted && entry.isInterruption) {
+            if (updateUserSpeakingSpan) {
+              updateUserSpeakingSpan(entry);
+            }
+            const event: InterruptionEvent = {
+              type: InterruptionEventType.INTERRUPTION,
+              timestamp: Date.now(),
+              overlapSpeechStartedAt: state.overlapSpeechStartedAt,
+              isInterruption: entry.isInterruption,
+              speechInput: entry.speechInput,
+              probabilities: entry.probabilities,
+              totalDuration: entry.totalDuration,
+              predictionDuration: entry.predictionDuration,
+              detectionDelay: entry.detectionDelay,
+              probability: entry.probability,
+            };
+            logger.debug(
+              { detectionDelay: entry.detectionDelay, totalDuration: entry.totalDuration },
+              'interruption detected',
+            );
+            setState({ overlapSpeechStarted: false });
+            controller.enqueue(event);
+          }
+        } catch (err) {
+          logger.error({ err }, 'Failed to send audio data over HTTP');
+        }
+      },
+    },
+    { highWaterMark: 2 },
+    { highWaterMark: 2 },
+  );
+}
diff --git a/agents/src/inference/interruption/interruption.ts b/agents/src/inference/interruption/interruption.ts
index 52783c895..23d04fc8c 100644
--- a/agents/src/inference/interruption/interruption.ts
+++ b/agents/src/inference/interruption/interruption.ts
@@ -1,5 +1,5 @@
 import { slidingWindowMinMax } from '../utils.js';
-import { MIN_INTERRUPTION_DURATION } from './defaults.js';
+import { FRAME_DURATION_IN_S, MIN_INTERRUPTION_DURATION_IN_S } from './defaults.js';
 
 export enum InterruptionEventType {
   INTERRUPTION = 'interruption',
@@ -40,9 +40,9 @@ export class InterruptionDetectionError extends Error {
 
 function estimateProbability(
   probabilities: number[],
-  windowSize: number = MIN_INTERRUPTION_DURATION,
+  windowSizeInS: number = MIN_INTERRUPTION_DURATION_IN_S,
 ): number {
-  const minWindow = Math.ceil(windowSize / 0.025); // 25ms per frame
+  const minWindow = Math.ceil(windowSizeInS / FRAME_DURATION_IN_S);
   if (probabilities.length < minWindow) {
     return 0;
   }
diff --git a/agents/src/utils/ws_transport.test.ts b/agents/src/inference/interruption/ws_transport.test.ts
similarity index 100%
rename from agents/src/utils/ws_transport.test.ts
rename to agents/src/inference/interruption/ws_transport.test.ts
diff --git a/agents/src/inference/interruption/ws_transport.ts b/agents/src/inference/interruption/ws_transport.ts
new file mode 100644
index 000000000..e4b649bb5
--- /dev/null
+++ b/agents/src/inference/interruption/ws_transport.ts
@@ -0,0 +1,346 @@
+import { Readable, Writable } from 'node:stream';
+import { TransformStream } from 'stream/web';
+import WebSocket, { createWebSocketStream } from 'ws';
+import { log } from '../../log.js';
+import { createAccessToken } from '../utils.js';
+import {
+  InterruptionCacheEntry,
+  type InterruptionEvent,
+  InterruptionEventType,
+} from './interruption.js';
+
+// WebSocket message types
+const MSG_SESSION_CREATE = 'session.create';
+const MSG_SESSION_CLOSE = 'session.close';
+const MSG_SESSION_CREATED = 'session.created';
+const MSG_SESSION_CLOSED = 'session.closed';
+const MSG_INTERRUPTION_DETECTED = 'bargein_detected';
+const MSG_INFERENCE_DONE = 'inference_done';
+const MSG_ERROR = 'error';
+
+export interface WsTransportOptions {
+  baseUrl: string;
+  apiKey: string;
+  apiSecret: string;
+  sampleRate: number;
+  threshold: number;
+  minFrames: number;
+  timeout: number;
+}
+
+export interface WsTransportState {
+  overlapSpeechStarted: boolean;
+  overlapSpeechStartedAt: number | undefined;
+  cache: Map<number, InterruptionCacheEntry>;
+}
+
+interface WsMessage {
+  type: string;
+  created_at?: number;
+  probabilities?: number[];
+  prediction_duration?: number;
+  is_bargein?: boolean;
+  error?: string;
+}
+
+/**
+ * Creates a WebSocket connection and returns web-standard streams.
+ */
+async function connectWebSocket(options: WsTransportOptions): Promise<{
+  readable: ReadableStream<Uint8Array>;
+  writable: WritableStream<Uint8Array>;
+  ws: WebSocket;
+}> {
+  const baseUrl = options.baseUrl.replace(/^http/, 'ws');
+  const url = `${baseUrl}/bargein`;
+  const token = await createAccessToken(options.apiKey, options.apiSecret);
+
+  const ws = new WebSocket(url, {
+    headers: { Authorization: `Bearer ${token}` },
+  });
+
+  await new Promise<void>((resolve, reject) => {
+    const timeout = setTimeout(
+      () => reject(new Error('WebSocket connection timeout')),
+      options.timeout,
+    );
+    ws.once('open', () => {
+      clearTimeout(timeout);
+      resolve();
+    });
+    ws.once('error', (err) => {
+      clearTimeout(timeout);
+      reject(err);
+    });
+  });
+
+  const duplex = createWebSocketStream(ws);
+  duplex.on('error', (err) => log().error({ err }, 'WebSocket stream error'));
+
+  // End the write side when the read side ends
+  duplex.on('end', () => duplex.end());
+
+  const writable = Writable.toWeb(duplex) as WritableStream<Uint8Array>;
+  const readable = Readable.toWeb(duplex) as ReadableStream<Uint8Array>;
+
+  return { readable, writable, ws };
+}
+
+/**
+ * Creates a WebSocket transport TransformStream for interruption detection.
+ *
+ * This transport receives Int16Array audio slices and outputs InterruptionEvents.
+ * It maintains a persistent WebSocket connection.
+ */
+export function createWsTransport(
+  options: WsTransportOptions,
+  getState: () => WsTransportState,
+  setState: (partial: Partial<WsTransportState>) => void,
+  updateUserSpeakingSpan?: (entry: InterruptionCacheEntry) => void,
+): TransformStream<Int16Array | InterruptionEvent, InterruptionEvent> {
+  const logger = log();
+  let ws: WebSocket | null = null;
+  let writer: WritableStreamDefaultWriter<Uint8Array> | null = null;
+  let readerTask: Promise<void> | null = null;
+  let outputController: TransformStreamDefaultController<InterruptionEvent> | null = null;
+
+  async function ensureConnection(): Promise<void> {
+    if (ws && ws.readyState === WebSocket.OPEN) return;
+
+    const conn = await connectWebSocket(options);
+    ws = conn.ws;
+    writer = conn.writable.getWriter();
+
+    // Send session.create message
+    const sessionCreateMsg = JSON.stringify({
+      type: MSG_SESSION_CREATE,
+      settings: {
+        sample_rate: options.sampleRate,
+        num_channels: 1,
+        threshold: options.threshold,
+        min_frames: options.minFrames,
+        encoding: 's16le',
+      },
+    });
+    await writer.write(new TextEncoder().encode(sessionCreateMsg));
+
+    // Start reading responses
+    readerTask = processResponses(conn.readable);
+  }
+
+  async function processResponses(readable: ReadableStream<Uint8Array>): Promise<void> {
+    const reader = readable.getReader();
+    const decoder = new TextDecoder();
+    let buffer = '';
+
+    try {
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+
+        buffer += decoder.decode(value, { stream: true });
+
+        // Process complete JSON messages (newline-delimited or single messages)
+        const lines = buffer.split('\n');
+        buffer = lines.pop() ?? '';
+
+        for (const line of lines) {
+          if (line.trim()) {
+            try {
+              const message: WsMessage = JSON.parse(line);
+              handleMessage(message);
+            } catch {
+              // Try parsing the whole buffer as a single message
+              try {
+                const message: WsMessage = JSON.parse(line);
+                handleMessage(message);
+              } catch {
+                logger.warn({ line }, 'Failed to parse WebSocket message');
+              }
+            }
+          }
+        }
+
+        // Also try parsing buffer as complete message (for non-newline-delimited)
+        if (buffer.trim()) {
+          try {
+            const message: WsMessage = JSON.parse(buffer);
+            handleMessage(message);
+            buffer = '';
+          } catch {
+            // Incomplete message, keep buffering
+          }
+        }
+      }
+    } finally {
+      reader.releaseLock();
+    }
+  }
+
+  function handleMessage(message: WsMessage): void {
+    const state = getState();
+
+    switch (message.type) {
+      case MSG_SESSION_CREATED:
+        logger.debug('WebSocket session created');
+        break;
+
+      case MSG_INTERRUPTION_DETECTED: {
+        const createdAt = message.created_at ?? 0;
+        if (state.overlapSpeechStarted && state.overlapSpeechStartedAt !== undefined) {
+          const existing = state.cache.get(createdAt);
+          const entry = new InterruptionCacheEntry({
+            createdAt,
+            speechInput: existing?.speechInput,
+            totalDuration: (performance.now() - createdAt) / 1000,
+            probabilities: message.probabilities,
+            isInterruption: true,
+            predictionDuration: message.prediction_duration ?? 0,
+            detectionDelay: Date.now() - state.overlapSpeechStartedAt,
+          });
+          state.cache.set(createdAt, entry);
+
+          if (updateUserSpeakingSpan) {
+            updateUserSpeakingSpan(entry);
+          }
+
+          logger.debug(
+            {
+              totalDuration: entry.totalDuration,
+              predictionDuration: entry.predictionDuration,
+              detectionDelay: entry.detectionDelay,
+              probability: entry.probability,
+            },
+            'interruption detected',
+          );
+
+          const event: InterruptionEvent = {
+            type: InterruptionEventType.INTERRUPTION,
+            timestamp: Date.now(),
+            isInterruption: true,
+            totalDuration: entry.totalDuration,
+            predictionDuration: entry.predictionDuration,
+            overlapSpeechStartedAt: state.overlapSpeechStartedAt,
+            speechInput: entry.speechInput,
+            probabilities: entry.probabilities,
+            detectionDelay: entry.detectionDelay,
+            probability: entry.probability,
+          };
+
+          outputController?.enqueue(event);
+          setState({ overlapSpeechStarted: false });
+        }
+        break;
+      }
+
+      case MSG_INFERENCE_DONE: {
+        const createdAt = message.created_at ?? 0;
+        if (state.overlapSpeechStartedAt !== undefined) {
+          const existing = state.cache.get(createdAt);
+          const entry = new InterruptionCacheEntry({
+            createdAt,
+            speechInput: existing?.speechInput,
+            totalDuration: (performance.now() - createdAt) / 1000,
+            predictionDuration: message.prediction_duration ?? 0,
+            probabilities: message.probabilities,
+            isInterruption: message.is_bargein ?? false,
+            detectionDelay: Date.now() - state.overlapSpeechStartedAt,
+          });
+          state.cache.set(createdAt, entry);
+
+          logger.trace(
+            {
+              totalDuration: entry.totalDuration,
+              predictionDuration: entry.predictionDuration,
+            },
+            'interruption inference done',
+          );
+        }
+        break;
+      }
+
+      case MSG_SESSION_CLOSED:
+        logger.debug('WebSocket session closed');
+        break;
+
+      case MSG_ERROR:
+        logger.error({ error: message.error }, 'WebSocket error message received');
+        outputController?.error(new Error(`LiveKit Interruption error: ${message.error}`));
+        break;
+
+      default:
+        logger.warn({ type: message.type }, 'Received unexpected WebSocket message type');
+    }
+  }
+
+  async function sendAudioData(audioSlice: Int16Array): Promise<void> {
+    await ensureConnection();
+    if (!writer) throw new Error('WebSocket not connected');
+
+    const state = getState();
+    const createdAt = performance.now();
+
+    // Store the audio data in cache
+    state.cache.set(createdAt, new InterruptionCacheEntry({ createdAt, speechInput: audioSlice }));
+
+    // Create header: 8-byte little-endian uint64 timestamp (milliseconds as integer)
+    const header = new ArrayBuffer(8);
+    const view = new DataView(header);
+    const createdAtInt = Math.floor(createdAt);
+    view.setUint32(0, createdAtInt >>> 0, true);
+    view.setUint32(4, Math.floor(createdAtInt / 0x100000000) >>> 0, true);
+
+    // Combine header and audio data
+    const audioBytes = new Uint8Array(
+      audioSlice.buffer,
+      audioSlice.byteOffset,
+      audioSlice.byteLength,
+    );
+    const combined = new Uint8Array(8 + audioBytes.length);
+    combined.set(new Uint8Array(header), 0);
+    combined.set(audioBytes, 8);
+
+    await writer.write(combined);
+  }
+
+  async function close(): Promise<void> {
+    if (writer && ws?.readyState === WebSocket.OPEN) {
+      const closeMsg = JSON.stringify({ type: MSG_SESSION_CLOSE });
+      await writer.write(new TextEncoder().encode(closeMsg));
+      writer.releaseLock();
+    }
+    ws?.close(1000);
+    await readerTask;
+  }
+
+  return new TransformStream<Int16Array | InterruptionEvent, InterruptionEvent>(
+    {
+      start(controller) {
+        outputController = controller;
+      },
+
+      async transform(chunk, controller) {
+        // Pass through InterruptionEvents unchanged
+        if (!(chunk instanceof Int16Array)) {
+          controller.enqueue(chunk);
+          return;
+        }
+
+        const state = getState();
+        if (!state.overlapSpeechStartedAt) return;
+
+        try {
+          await sendAudioData(chunk);
+        } catch (err) {
+          logger.error({ err }, 'Failed to send audio data over WebSocket');
+        }
+      },
+
+      async flush() {
+        await close();
+      },
+    },
+    { highWaterMark: 2 },
+    { highWaterMark: 2 },
+  );
+}
diff --git a/agents/src/utils/ws_transport.ts b/agents/src/utils/ws_transport.ts
deleted file mode 100644
index 4af4f906b..000000000
--- a/agents/src/utils/ws_transport.ts
+++ /dev/null
@@ -1,22 +0,0 @@
-import { Readable, Writable } from 'node:stream';
-import WebSocket, { createWebSocketStream } from 'ws';
-
-export function webSocketStream(wsUrl: string) {
-  const ws = new WebSocket(wsUrl);
-  const duplex = createWebSocketStream(ws);
-  duplex.on('error', console.error);
-
-  // End the write side when the read side ends to properly close the stream.
-  // This is needed because Readable.toWeb() waits for both sides of the duplex
-  // to close before signaling done on the ReadableStream.
-  duplex.on('end', () => {
-    duplex.end();
-  });
-
-  // Convert the writable side
-  const writable = Writable.toWeb(duplex);
-  // Convert the readable side
-  const readable = Readable.toWeb(duplex);
-
-  return { readable, writable, close: ws.close };
-}

From aee3612137f7289d034641a6556c31180a8a93e8 Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Thu, 22 Jan 2026 10:02:33 +0100
Subject: [PATCH 14/25] renaming and update transport tests

---
 .../interruption/InterruptionStream.ts        |  12 +-
 .../inference/interruption/http_transport.ts  |  23 ++--
 .../inference/interruption/interruption.ts    |  24 ++--
 .../interruption/ws_transport.test.ts         | 127 ++++++------------
 .../inference/interruption/ws_transport.ts    |  52 +++----
 5 files changed, 103 insertions(+), 135 deletions(-)

diff --git a/agents/src/inference/interruption/InterruptionStream.ts b/agents/src/inference/interruption/InterruptionStream.ts
index 7071ebb00..1a2f6eb29 100644
--- a/agents/src/inference/interruption/InterruptionStream.ts
+++ b/agents/src/inference/interruption/InterruptionStream.ts
@@ -84,9 +84,9 @@ function updateUserSpeakingSpan(span: Span, entry: InterruptionCacheEntry) {
     (entry.isInterruption ?? false).toString().toLowerCase(),
   );
   span.setAttribute(traceTypes.ATTR_INTERRUPTION_PROBABILITY, entry.probability);
-  span.setAttribute(traceTypes.ATTR_INTERRUPTION_TOTAL_DURATION, entry.totalDuration);
-  span.setAttribute(traceTypes.ATTR_INTERRUPTION_PREDICTION_DURATION, entry.predictionDuration);
-  span.setAttribute(traceTypes.ATTR_INTERRUPTION_DETECTION_DELAY, entry.detectionDelay);
+  span.setAttribute(traceTypes.ATTR_INTERRUPTION_TOTAL_DURATION, entry.totalDurationInS);
+  span.setAttribute(traceTypes.ATTR_INTERRUPTION_PREDICTION_DURATION, entry.predictionDurationInS);
+  span.setAttribute(traceTypes.ATTR_INTERRUPTION_DETECTION_DELAY, entry.detectionDelayInS);
 }
 
 export class InterruptionStreamBase {
@@ -233,9 +233,9 @@ export class InterruptionStreamBase {
                 overlapSpeechStartedAt: this.overlapSpeechStartedAt,
                 speechInput: latestEntry.speechInput,
                 probabilities: latestEntry.probabilities,
-                totalDuration: latestEntry.totalDuration,
-                detectionDelay: latestEntry.detectionDelay,
-                predictionDuration: latestEntry.predictionDuration,
+                totalDurationInS: latestEntry.totalDurationInS,
+                detectionDelayInS: latestEntry.detectionDelayInS,
+                predictionDurationInS: latestEntry.predictionDurationInS,
                 probability: latestEntry.probability,
               };
               controller.enqueue(event);
diff --git a/agents/src/inference/interruption/http_transport.ts b/agents/src/inference/interruption/http_transport.ts
index 82f9726b6..7ab424c68 100644
--- a/agents/src/inference/interruption/http_transport.ts
+++ b/agents/src/inference/interruption/http_transport.ts
@@ -2,7 +2,6 @@ import { ofetch } from 'ofetch';
 import { TransformStream } from 'stream/web';
 import { log } from '../../log.js';
 import { createAccessToken } from '../utils.js';
-import type { ApiConnectOptions } from './InterruptionStream.js';
 import {
   InterruptionCacheEntry,
   type InterruptionEvent,
@@ -38,7 +37,6 @@ export async function predictHTTP(
   data: Int16Array,
   predictOptions: PredictOptions,
   options: PostOptions,
-  apiOptions: ApiConnectOptions,
 ): Promise<PredictResponse> {
   const createdAt = performance.now();
   const url = new URL(`/bargein`, options.baseUrl);
@@ -49,10 +47,10 @@ export async function predictHTTP(
   const { created_at, is_bargein, probabilities } = await ofetch<PredictEndpointResponse>(
     url.toString(),
     {
-      retry: apiOptions.maxRetries,
+      retry: 1,
       retryDelay: () => {
         // TODO backoff
-        return apiOptions.retryInterval;
+        return 500;
       },
       headers: {
         'Content-Type': 'application/octet-stream',
@@ -131,9 +129,9 @@ export function createHttpTransport(
             probabilities,
             isInterruption: isBargein,
             speechInput: chunk,
-            totalDuration: (performance.now() - createdAt) / 1000,
-            detectionDelay: Date.now() - state.overlapSpeechStartedAt,
-            predictionDuration: predictionDurationInS,
+            totalDurationInS: (performance.now() - createdAt) / 1000,
+            detectionDelayInS: (Date.now() - state.overlapSpeechStartedAt) / 1000,
+            predictionDurationInS,
           });
           state.cache.set(createdAt, entry);
 
@@ -148,13 +146,16 @@ export function createHttpTransport(
               isInterruption: entry.isInterruption,
               speechInput: entry.speechInput,
               probabilities: entry.probabilities,
-              totalDuration: entry.totalDuration,
-              predictionDuration: entry.predictionDuration,
-              detectionDelay: entry.detectionDelay,
+              totalDurationInS: entry.totalDurationInS,
+              predictionDurationInS: entry.predictionDurationInS,
+              detectionDelayInS: entry.detectionDelayInS,
               probability: entry.probability,
             };
             logger.debug(
-              { detectionDelay: entry.detectionDelay, totalDuration: entry.totalDuration },
+              {
+                detectionDelayInS: entry.detectionDelayInS,
+                totalDurationInS: entry.totalDurationInS,
+              },
               'interruption detected',
             );
             setState({ overlapSpeechStarted: false });
diff --git a/agents/src/inference/interruption/interruption.ts b/agents/src/inference/interruption/interruption.ts
index 23d04fc8c..0d5e23e55 100644
--- a/agents/src/inference/interruption/interruption.ts
+++ b/agents/src/inference/interruption/interruption.ts
@@ -9,9 +9,9 @@ export interface InterruptionEvent {
   type: InterruptionEventType;
   timestamp: number;
   isInterruption: boolean;
-  totalDuration: number;
-  predictionDuration: number;
-  detectionDelay: number;
+  totalDurationInS: number;
+  predictionDurationInS: number;
+  detectionDelayInS: number;
   overlapSpeechStartedAt?: number;
   speechInput?: Int16Array;
   probabilities?: number[];
@@ -55,9 +55,9 @@ function estimateProbability(
  */
 export class InterruptionCacheEntry {
   readonly createdAt: number;
-  readonly totalDuration: number;
-  readonly predictionDuration: number;
-  readonly detectionDelay: number;
+  readonly totalDurationInS: number;
+  readonly predictionDurationInS: number;
+  readonly detectionDelayInS: number;
   readonly speechInput?: Int16Array;
   readonly probabilities?: number[];
   readonly isInterruption?: boolean;
@@ -66,16 +66,16 @@ export class InterruptionCacheEntry {
   constructor(params: {
     createdAt: number;
     speechInput?: Int16Array;
-    totalDuration?: number;
-    predictionDuration?: number;
-    detectionDelay?: number;
+    totalDurationInS?: number;
+    predictionDurationInS?: number;
+    detectionDelayInS?: number;
     probabilities?: number[];
     isInterruption?: boolean;
   }) {
     this.createdAt = params.createdAt;
-    this.totalDuration = params.totalDuration ?? 0;
-    this.predictionDuration = params.predictionDuration ?? 0;
-    this.detectionDelay = params.detectionDelay ?? 0;
+    this.totalDurationInS = params.totalDurationInS ?? 0;
+    this.predictionDurationInS = params.predictionDurationInS ?? 0;
+    this.detectionDelayInS = params.detectionDelayInS ?? 0;
     this.speechInput = params.speechInput;
     this.probabilities = params.probabilities;
     this.isInterruption = params.isInterruption;
diff --git a/agents/src/inference/interruption/ws_transport.test.ts b/agents/src/inference/interruption/ws_transport.test.ts
index 77c5fdc91..e44f62fdb 100644
--- a/agents/src/inference/interruption/ws_transport.test.ts
+++ b/agents/src/inference/interruption/ws_transport.test.ts
@@ -3,16 +3,31 @@
 // SPDX-License-Identifier: Apache-2.0
 import { describe, expect, it } from 'vitest';
 import { WebSocket, WebSocketServer } from 'ws';
-import { webSocketStream } from './ws_transport.js';
+import { webSocketToStream } from './ws_transport.js';
 
-describe('webSocketStream', () => {
+/** Helper to create a WebSocket server and return its port */
+async function createServer(): Promise<{ wss: WebSocketServer; port: number }> {
+  const wss = await new Promise<WebSocketServer>((resolve) => {
+    const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
+  });
+  const port = (wss.address() as { port: number }).port;
+  return { wss, port };
+}
+
+/** Helper to create a connected WebSocket client */
+async function createClient(port: number): Promise<WebSocket> {
+  const ws = new WebSocket(`ws://localhost:${port}`);
+  // await new Promise<void>((resolve, reject) => {
+  //   ws.once('open', resolve);
+  //   ws.once('error', reject);
+  // });
+  return ws;
+}
+
+describe('webSocketToStream', () => {
   describe('readable stream', () => {
     it('receives messages from the WebSocket', async () => {
-      const wss = await new Promise<WebSocketServer>((resolve) => {
-        const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
-      });
-
-      const port = (wss.address() as { port: number }).port;
+      const { wss, port } = await createServer();
 
       wss.on('connection', (serverWs) => {
         serverWs.send('hello');
@@ -20,7 +35,8 @@ describe('webSocketStream', () => {
         serverWs.close();
       });
 
-      const { readable } = webSocketStream(`ws://localhost:${port}`);
+      const ws = await createClient(port);
+      const { readable } = webSocketToStream(ws);
       const reader = readable.getReader();
 
       const messages: string[] = [];
@@ -40,11 +56,7 @@ describe('webSocketStream', () => {
     });
 
     it('handles binary messages', async () => {
-      const wss = await new Promise<WebSocketServer>((resolve) => {
-        const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
-      });
-
-      const port = (wss.address() as { port: number }).port;
+      const { wss, port } = await createServer();
 
       const binaryData = new Uint8Array([1, 2, 3, 4, 5]);
 
@@ -53,7 +65,8 @@ describe('webSocketStream', () => {
         serverWs.close();
       });
 
-      const { readable } = webSocketStream(`ws://localhost:${port}`);
+      const ws = await createClient(port);
+      const { readable } = webSocketToStream(ws);
       const reader = readable.getReader();
 
       const chunks: Uint8Array[] = [];
@@ -74,16 +87,14 @@ describe('webSocketStream', () => {
     });
 
     it('handles empty stream when connection closes immediately', async () => {
-      const wss = await new Promise<WebSocketServer>((resolve) => {
-        const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
-      });
-
-      const port = (wss.address() as { port: number }).port;
+      const { wss, port } = await createServer();
 
       wss.on('connection', (serverWs) => {
         serverWs.close();
       });
-      const { readable } = webSocketStream(`ws://localhost:${port}`);
+
+      const ws = await createClient(port);
+      const { readable } = webSocketToStream(ws);
       const reader = readable.getReader();
 
       const chunks: Uint8Array[] = [];
@@ -105,16 +116,7 @@ describe('webSocketStream', () => {
 
   describe('writable stream', () => {
     it('sends messages through the WebSocket', async () => {
-      const wss = await new Promise<WebSocketServer>((resolve) => {
-        const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
-      });
-
-      const port = (wss.address() as { port: number }).port;
-      const ws = new WebSocket(`ws://localhost:${port}`);
-
-      const connected = new Promise<void>((resolve) => {
-        ws.on('open', resolve);
-      });
+      const { wss, port } = await createServer();
 
       const messagesReceived: string[] = [];
       const serverClosed = new Promise<void>((resolve) => {
@@ -126,8 +128,8 @@ describe('webSocketStream', () => {
         });
       });
 
-      await connected;
-      const { writable } = webSocketStream(`ws://localhost:${port}`);
+      const ws = await createClient(port);
+      const { writable } = webSocketToStream(ws);
       const writer = writable.getWriter();
 
       await writer.write(new TextEncoder().encode('hello'));
@@ -142,11 +144,7 @@ describe('webSocketStream', () => {
     });
 
     it('sends binary data through the WebSocket', async () => {
-      const wss = await new Promise<WebSocketServer>((resolve) => {
-        const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
-      });
-
-      const port = (wss.address() as { port: number }).port;
+      const { wss, port } = await createServer();
 
       const chunksReceived: Buffer[] = [];
       const serverClosed = new Promise<void>((resolve) => {
@@ -158,7 +156,8 @@ describe('webSocketStream', () => {
         });
       });
 
-      const { writable } = webSocketStream(`ws://localhost:${port}`);
+      const ws = await createClient(port);
+      const { writable } = webSocketToStream(ws);
       const writer = writable.getWriter();
 
       const binaryData = new Uint8Array([10, 20, 30, 40, 50]);
@@ -172,46 +171,11 @@ describe('webSocketStream', () => {
 
       wss.close();
     });
-
-    it('buffers writes if readyState is CONNECTING', async () => {
-      const wss = await new Promise<WebSocketServer>((resolve) => {
-        const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
-      });
-
-      const port = (wss.address() as { port: number }).port;
-
-      const { writable } = webSocketStream(`ws://localhost:${port}`);
-      const writer = writable.getWriter();
-
-      const messagesReceived: string[] = [];
-      const serverClosed = new Promise<void>((resolve) => {
-        wss.on('connection', (serverWs) => {
-          serverWs.on('message', (data) => {
-            messagesReceived.push(data.toString());
-          });
-          serverWs.on('close', resolve);
-        });
-      });
-
-      // These writes should be buffered
-      await writer.write(new TextEncoder().encode('buffered message'));
-      await writer.close();
-
-      await serverClosed;
-
-      expect(messagesReceived).toEqual(['buffered message']);
-
-      wss.close();
-    });
   });
 
   describe('bidirectional communication', () => {
     it('supports echo pattern with readable and writable', async () => {
-      const wss = await new Promise<WebSocketServer>((resolve) => {
-        const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
-      });
-
-      const port = (wss.address() as { port: number }).port;
+      const { wss, port } = await createServer();
 
       // Server echoes messages back
       wss.on('connection', (serverWs) => {
@@ -220,7 +184,8 @@ describe('webSocketStream', () => {
         });
       });
 
-      const { readable, writable } = webSocketStream(`ws://localhost:${port}`);
+      const ws = await createClient(port);
+      const { readable, writable } = webSocketToStream(ws);
       const writer = writable.getWriter();
       const reader = readable.getReader();
 
@@ -244,11 +209,7 @@ describe('webSocketStream', () => {
 
   describe('error handling', () => {
     it('readable stream ends when WebSocket closes unexpectedly', async () => {
-      const wss = await new Promise<WebSocketServer>((resolve) => {
-        const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
-      });
-
-      const port = (wss.address() as { port: number }).port;
+      const { wss, port } = await createServer();
 
       wss.on('connection', (serverWs) => {
         serverWs.send('before close');
@@ -256,7 +217,8 @@ describe('webSocketStream', () => {
         serverWs.terminate();
       });
 
-      const { readable } = webSocketStream(`ws://localhost:${port}`);
+      const ws = await createClient(port);
+      const { readable } = webSocketToStream(ws);
       const reader = readable.getReader();
 
       const chunks: string[] = [];
@@ -266,8 +228,7 @@ describe('webSocketStream', () => {
           if (done) break;
           chunks.push(Buffer.from(value).toString());
         }
-      } catch (error) {
-        console.error(error);
+      } catch {
         // Connection terminated, stream may error
       } finally {
         reader.releaseLock();
diff --git a/agents/src/inference/interruption/ws_transport.ts b/agents/src/inference/interruption/ws_transport.ts
index e4b649bb5..083e1669d 100644
--- a/agents/src/inference/interruption/ws_transport.ts
+++ b/agents/src/inference/interruption/ws_transport.ts
@@ -43,6 +43,19 @@ interface WsMessage {
   error?: string;
 }
 
+export function webSocketToStream(ws: WebSocket) {
+  const duplex = createWebSocketStream(ws);
+  duplex.on('error', (err) => log().error({ err }, 'WebSocket stream error'));
+
+  // End the write side when the read side ends
+  duplex.on('end', () => duplex.end());
+
+  const writable = Writable.toWeb(duplex) as WritableStream<Uint8Array>;
+  const readable = Readable.toWeb(duplex) as ReadableStream<Uint8Array>;
+
+  return { readable, writable };
+}
+
 /**
  * Creates a WebSocket connection and returns web-standard streams.
  */
@@ -59,6 +72,8 @@ async function connectWebSocket(options: WsTransportOptions): Promise<{
     headers: { Authorization: `Bearer ${token}` },
   });
 
+  const { readable, writable } = webSocketToStream(ws);
+
   await new Promise<void>((resolve, reject) => {
     const timeout = setTimeout(
       () => reject(new Error('WebSocket connection timeout')),
@@ -74,15 +89,6 @@ async function connectWebSocket(options: WsTransportOptions): Promise<{
     });
   });
 
-  const duplex = createWebSocketStream(ws);
-  duplex.on('error', (err) => log().error({ err }, 'WebSocket stream error'));
-
-  // End the write side when the read side ends
-  duplex.on('end', () => duplex.end());
-
-  const writable = Writable.toWeb(duplex) as WritableStream<Uint8Array>;
-  const readable = Readable.toWeb(duplex) as ReadableStream<Uint8Array>;
-
   return { readable, writable, ws };
 }
 
@@ -192,11 +198,11 @@ export function createWsTransport(
           const entry = new InterruptionCacheEntry({
             createdAt,
             speechInput: existing?.speechInput,
-            totalDuration: (performance.now() - createdAt) / 1000,
+            totalDurationInS: (performance.now() - createdAt) / 1000,
             probabilities: message.probabilities,
             isInterruption: true,
-            predictionDuration: message.prediction_duration ?? 0,
-            detectionDelay: Date.now() - state.overlapSpeechStartedAt,
+            predictionDurationInS: message.prediction_duration ?? 0,
+            detectionDelayInS: (Date.now() - state.overlapSpeechStartedAt) / 1000,
           });
           state.cache.set(createdAt, entry);
 
@@ -206,9 +212,9 @@ export function createWsTransport(
 
           logger.debug(
             {
-              totalDuration: entry.totalDuration,
-              predictionDuration: entry.predictionDuration,
-              detectionDelay: entry.detectionDelay,
+              totalDurationInS: entry.totalDurationInS,
+              predictionDurationInS: entry.predictionDurationInS,
+              detectionDelayInS: entry.detectionDelayInS,
               probability: entry.probability,
             },
             'interruption detected',
@@ -218,12 +224,12 @@ export function createWsTransport(
             type: InterruptionEventType.INTERRUPTION,
             timestamp: Date.now(),
             isInterruption: true,
-            totalDuration: entry.totalDuration,
-            predictionDuration: entry.predictionDuration,
+            totalDurationInS: entry.totalDurationInS,
+            predictionDurationInS: entry.predictionDurationInS,
             overlapSpeechStartedAt: state.overlapSpeechStartedAt,
             speechInput: entry.speechInput,
             probabilities: entry.probabilities,
-            detectionDelay: entry.detectionDelay,
+            detectionDelayInS: entry.detectionDelayInS,
             probability: entry.probability,
           };
 
@@ -240,18 +246,18 @@ export function createWsTransport(
           const entry = new InterruptionCacheEntry({
             createdAt,
             speechInput: existing?.speechInput,
-            totalDuration: (performance.now() - createdAt) / 1000,
-            predictionDuration: message.prediction_duration ?? 0,
+            totalDurationInS: (performance.now() - createdAt) / 1000,
+            predictionDurationInS: message.prediction_duration ?? 0,
             probabilities: message.probabilities,
             isInterruption: message.is_bargein ?? false,
-            detectionDelay: Date.now() - state.overlapSpeechStartedAt,
+            detectionDelayInS: (Date.now() - state.overlapSpeechStartedAt) / 1000,
           });
           state.cache.set(createdAt, entry);
 
           logger.trace(
             {
-              totalDuration: entry.totalDuration,
-              predictionDuration: entry.predictionDuration,
+              totalDurationInS: entry.totalDurationInS,
+              predictionDurationInS: entry.predictionDurationInS,
             },
             'interruption inference done',
           );

From 1f3c315edf3941c8d78d9911b49f6415d2b501d8 Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Thu, 22 Jan 2026 10:30:44 +0100
Subject: [PATCH 15/25] add missing features

---
 .../AdaptiveInterruptionDetector.ts           |  88 +++++++++---
 .../interruption/InterruptionStream.ts        |  51 +++++--
 agents/src/inference/interruption/defaults.ts |  17 ++-
 .../inference/interruption/http_transport.ts  |  15 +-
 .../inference/interruption/interruption.ts    | 129 ++++++++++++++++--
 .../inference/interruption/ws_transport.ts    |  87 +++++++++---
 agents/src/voice/agent_activity.ts            |   4 +-
 7 files changed, 324 insertions(+), 67 deletions(-)

diff --git a/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts b/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
index c20da246d..0e137b15c 100644
--- a/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
+++ b/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
@@ -8,11 +8,12 @@ import {
   SAMPLE_RATE,
   interruptionOptionDefaults,
 } from './defaults.js';
-import { type InterruptionDetectionError } from './interruption.js';
+import { type InterruptionDetectionError, type InterruptionEvent } from './interruption.js';
 
 type InterruptionCallbacks = {
-  interruptionDetected: () => void;
-  overlapSpeechDetected: () => void;
+  userInterruptionDetected: (event: InterruptionEvent) => void;
+  userNonInterruptionDetected: (event: InterruptionEvent) => void;
+  overlapSpeechEnded: (event: InterruptionEvent) => void;
   error: (error: InterruptionDetectionError) => void;
 };
 
@@ -35,8 +36,10 @@ export type AdaptiveInterruptionDetectorOptions = Partial<InterruptionOptions>;
 
 export class AdaptiveInterruptionDetector extends (EventEmitter as new () => TypedEventEmitter<InterruptionCallbacks>) {
   options: InterruptionOptions;
+  private readonly _label: string;
   private logger = log();
-  private streams: WeakSet<object>; // TODO: Union of InterruptionHttpStream | InterruptionWebSocketStream
+  // Use Set instead of WeakSet to allow iteration for propagating option updates
+  private streams: Set<InterruptionStreamBase> = new Set();
 
   constructor(options: AdaptiveInterruptionDetectorOptions = {}) {
     super();
@@ -46,7 +49,6 @@ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => Typ
       baseUrl,
       apiKey,
       apiSecret,
-      useProxy: useProxyArg,
       audioPrefixDurationInS,
       threshold,
       detectionIntervalInS,
@@ -86,7 +88,8 @@ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => Typ
 
       useProxy = true;
     } else {
-      useProxy = useProxyArg ?? false;
+      // Force useProxy to false for custom URLs (matching Python behavior)
+      useProxy = false;
     }
 
     this.options = {
@@ -104,7 +107,7 @@ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => Typ
       minInterruptionDurationInS,
     };
 
-    this.streams = new WeakSet();
+    this._label = `${this.constructor.name}`;
 
     this.logger.debug(
       {
@@ -121,6 +124,41 @@ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => Typ
     );
   }
 
+  /**
+   * The model identifier for this detector.
+   */
+  get model(): string {
+    return 'adaptive interruption';
+  }
+
+  /**
+   * The provider identifier for this detector.
+   */
+  get provider(): string {
+    return 'livekit';
+  }
+
+  /**
+   * The label for this detector instance.
+   */
+  get label(): string {
+    return this._label;
+  }
+
+  /**
+   * The sample rate used for audio processing.
+   */
+  get sampleRate(): number {
+    return this.options.sampleRate;
+  }
+
+  /**
+   * Emit an error event from the detector.
+   */
+  emitError(error: InterruptionDetectionError): void {
+    this.emit('error', error);
+  }
+
   /**
    * Creates a new InterruptionStreamBase for internal use.
    * The stream can receive audio frames and sentinels via pushFrame().
@@ -129,27 +167,37 @@ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => Typ
   createStream(): InterruptionStreamBase {
     const streamBase = new InterruptionStreamBase(this, {});
     this.streams.add(streamBase);
-    // const transformer = new TransformStream<InterruptionEvent, InterruptionEvent>({
-    //   transform: (chunk, controller) => {
-    //     log().info('adaptive interruption detection stream transformer', chunk);
-    //     if (chunk.type === InterruptionEventType.INTERRUPTION) {
-    //       this.emit('interruptionDetected'); // TODO payload
-    //     } else if (chunk.type === InterruptionEventType.OVERLAP_SPEECH_ENDED) {
-    //       this.emit('overlapSpeechDetected'); // TODO payload
-    //     }
-    //     controller.enqueue(chunk);
-    //   },
-    // });
-    // streamBase.stream().pipeThrough(transformer);
     return streamBase;
   }
 
-  updateOptions(options: { threshold?: number; minInterruptionDurationInS?: number }): void {
+  /**
+   * Remove a stream from tracking (called when stream is closed).
+   */
+  removeStream(stream: InterruptionStreamBase): void {
+    this.streams.delete(stream);
+  }
+
+  /**
+   * Update options for the detector and propagate to all active streams.
+   * For WebSocket streams, this triggers a reconnection with new settings.
+   */
+  async updateOptions(options: {
+    threshold?: number;
+    minInterruptionDurationInS?: number;
+  }): Promise<void> {
     if (options.threshold !== undefined) {
       this.options.threshold = options.threshold;
     }
     if (options.minInterruptionDurationInS !== undefined) {
+      this.options.minInterruptionDurationInS = options.minInterruptionDurationInS;
       this.options.minFrames = Math.ceil(options.minInterruptionDurationInS * FRAMES_PER_SECOND);
     }
+
+    // Propagate option updates to all active streams (matching Python behavior)
+    const updatePromises: Promise<void>[] = [];
+    for (const stream of this.streams) {
+      updatePromises.push(stream.updateOptions(options));
+    }
+    await Promise.all(updatePromises);
   }
 }
diff --git a/agents/src/inference/interruption/InterruptionStream.ts b/agents/src/inference/interruption/InterruptionStream.ts
index 1a2f6eb29..7d0bd8142 100644
--- a/agents/src/inference/interruption/InterruptionStream.ts
+++ b/agents/src/inference/interruption/InterruptionStream.ts
@@ -8,9 +8,10 @@ import type {
   AdaptiveInterruptionDetector,
   InterruptionOptions,
 } from './AdaptiveInterruptionDetector.js';
-import { apiConnectDefaults } from './defaults.js';
+import { FRAMES_PER_SECOND, apiConnectDefaults } from './defaults.js';
 import { createHttpTransport } from './http_transport.js';
 import {
+  BoundedCache,
   InterruptionCacheEntry,
   type InterruptionDetectionError,
   type InterruptionEvent,
@@ -108,6 +109,9 @@ export class InterruptionStreamBase {
 
   private logger = log();
 
+  // Store reconnect function for WebSocket transport
+  private wsReconnect?: () => Promise<void>;
+
   constructor(model: AdaptiveInterruptionDetector, apiOptions: Partial<ApiConnectOptions>) {
     this.inputStream = createStreamChannel<
       InterruptionSentinel | AudioFrame,
@@ -115,18 +119,39 @@ export class InterruptionStreamBase {
     >();
 
     this.model = model;
-    this.options = model.options;
+    this.options = { ...model.options };
     this.apiOptions = { ...apiConnectDefaults, ...apiOptions };
 
     this.eventStream = this.setupTransform();
   }
 
+  /**
+   * Update stream options. For WebSocket transport, this triggers a reconnection.
+   */
+  async updateOptions(options: {
+    threshold?: number;
+    minInterruptionDurationInS?: number;
+  }): Promise<void> {
+    if (options.threshold !== undefined) {
+      this.options.threshold = options.threshold;
+    }
+    if (options.minInterruptionDurationInS !== undefined) {
+      this.options.minInterruptionDurationInS = options.minInterruptionDurationInS;
+      this.options.minFrames = Math.ceil(options.minInterruptionDurationInS * FRAMES_PER_SECOND);
+    }
+    // Trigger WebSocket reconnection if using proxy (WebSocket transport)
+    if (this.options.useProxy && this.wsReconnect) {
+      await this.wsReconnect();
+    }
+  }
+
   private setupTransform(): ReadableStream<InterruptionEvent> {
     let agentSpeechStarted = false;
     let startIdx = 0;
     let accumulatedSamples = 0;
     let overlapSpeechStarted = false;
-    const cache = new Map<number, InterruptionCacheEntry>();
+    // Use BoundedCache with max_len=10 to prevent unbounded memory growth
+    const cache = new BoundedCache<number, InterruptionCacheEntry>(10);
     const inferenceS16Data = new Int16Array(
       Math.ceil(this.options.maxAudioDurationInS * this.options.sampleRate),
     ).fill(0);
@@ -219,12 +244,14 @@ export class InterruptionStreamBase {
             this.logger.debug('overlap speech ended');
             if (overlapSpeechStarted) {
               this.userSpeakingSpan = undefined;
-              let latestEntry = Array.from(cache.values()).at(-1);
+              // Use pop with predicate to get only completed requests (matching Python behavior)
+              // This ensures we don't return incomplete/in-flight requests as the "final" result
+              let latestEntry = cache.pop(
+                (entry) => entry.totalDurationInS !== undefined && entry.totalDurationInS > 0,
+              );
               if (!latestEntry) {
                 this.logger.debug('no request made for overlap speech');
                 latestEntry = InterruptionCacheEntry.default();
-              } else {
-                cache.delete(latestEntry.createdAt);
               }
               const event: InterruptionEvent = {
                 type: InterruptionEventType.OVERLAP_SPEECH_ENDED,
@@ -259,11 +286,17 @@ export class InterruptionStreamBase {
       threshold: this.options.threshold,
       minFrames: this.options.minFrames,
       timeout: this.options.inferenceTimeout,
+      maxRetries: this.apiOptions.maxRetries,
     };
 
-    const transport = this.options.useProxy
-      ? createWsTransport(transportOptions, getState, setState, handleSpanUpdate)
-      : createHttpTransport(transportOptions, getState, setState, handleSpanUpdate);
+    let transport: TransformStream<Int16Array | InterruptionEvent, InterruptionEvent>;
+    if (this.options.useProxy) {
+      const wsResult = createWsTransport(transportOptions, getState, setState, handleSpanUpdate);
+      transport = wsResult.transport;
+      this.wsReconnect = wsResult.reconnect;
+    } else {
+      transport = createHttpTransport(transportOptions, getState, setState, handleSpanUpdate);
+    }
 
     // Pipeline: input -> audioTransformer -> transport -> eventStream
     return this.inputStream.stream().pipeThrough(audioTransformer).pipeThrough(transport);
diff --git a/agents/src/inference/interruption/defaults.ts b/agents/src/inference/interruption/defaults.ts
index 60529b7d2..1a2beeb08 100644
--- a/agents/src/inference/interruption/defaults.ts
+++ b/agents/src/inference/interruption/defaults.ts
@@ -10,7 +10,7 @@ export const REMOTE_INFERENCE_TIMEOUT_IN_S = 1.0;
 export const SAMPLE_RATE = 16000;
 export const FRAMES_PER_SECOND = 40;
 export const FRAME_DURATION_IN_S = 0.025; // 25ms per frame
-export const DEFAULT_BASE_URL = 'http://localhost:8080';
+export const DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';
 
 export const apiConnectDefaults: ApiConnectOptions = {
   maxRetries: 3,
@@ -18,6 +18,21 @@ export const apiConnectDefaults: ApiConnectOptions = {
   timeout: 10_000,
 } as const;
 
+/**
+ * Calculate the retry interval using exponential backoff with jitter.
+ * Matches the Python implementation's _interval_for_retry behavior.
+ */
+export function intervalForRetry(
+  attempt: number,
+  baseInterval: number = apiConnectDefaults.retryInterval,
+): number {
+  // Exponential backoff: baseInterval * 2^attempt with some jitter
+  const exponentialDelay = baseInterval * Math.pow(2, attempt);
+  // Add jitter (0-25% of the delay)
+  const jitter = exponentialDelay * Math.random() * 0.25;
+  return exponentialDelay + jitter;
+}
+
 export const interruptionOptionDefaults: InterruptionOptions = {
   sampleRate: SAMPLE_RATE,
   threshold: THRESHOLD,
diff --git a/agents/src/inference/interruption/http_transport.ts b/agents/src/inference/interruption/http_transport.ts
index 7ab424c68..b2419a514 100644
--- a/agents/src/inference/interruption/http_transport.ts
+++ b/agents/src/inference/interruption/http_transport.ts
@@ -2,7 +2,9 @@ import { ofetch } from 'ofetch';
 import { TransformStream } from 'stream/web';
 import { log } from '../../log.js';
 import { createAccessToken } from '../utils.js';
+import { intervalForRetry } from './defaults.js';
 import {
+  BoundedCache,
   InterruptionCacheEntry,
   type InterruptionEvent,
   InterruptionEventType,
@@ -13,6 +15,7 @@ export interface PostOptions {
   token: string;
   signal?: AbortSignal;
   timeout?: number;
+  maxRetries?: number;
 }
 
 export interface PredictOptions {
@@ -44,13 +47,15 @@ export async function predictHTTP(
   url.searchParams.append('min_frames', predictOptions.minFrames.toFixed());
   url.searchParams.append('created_at', createdAt.toFixed());
 
+  let retryCount = 0;
   const { created_at, is_bargein, probabilities } = await ofetch<PredictEndpointResponse>(
     url.toString(),
     {
-      retry: 1,
+      retry: options.maxRetries ?? 3,
       retryDelay: () => {
-        // TODO backoff
-        return 500;
+        const delay = intervalForRetry(retryCount);
+        retryCount++;
+        return delay;
       },
       headers: {
         'Content-Type': 'application/octet-stream',
@@ -78,12 +83,13 @@ export interface HttpTransportOptions {
   threshold: number;
   minFrames: number;
   timeout: number;
+  maxRetries?: number;
 }
 
 export interface HttpTransportState {
   overlapSpeechStarted: boolean;
   overlapSpeechStartedAt: number | undefined;
-  cache: Map<number, InterruptionCacheEntry>;
+  cache: BoundedCache<number, InterruptionCacheEntry>;
 }
 
 /**
@@ -119,6 +125,7 @@ export function createHttpTransport(
             {
               baseUrl: options.baseUrl,
               timeout: options.timeout,
+              maxRetries: options.maxRetries,
               token: await createAccessToken(options.apiKey, options.apiSecret),
             },
           );
diff --git a/agents/src/inference/interruption/interruption.ts b/agents/src/inference/interruption/interruption.ts
index 0d5e23e55..5c17ff534 100644
--- a/agents/src/inference/interruption/interruption.ts
+++ b/agents/src/inference/interruption/interruption.ts
@@ -1,6 +1,111 @@
 import { slidingWindowMinMax } from '../utils.js';
 import { FRAME_DURATION_IN_S, MIN_INTERRUPTION_DURATION_IN_S } from './defaults.js';
 
+/**
+ * A bounded cache that automatically evicts the oldest entries when the cache exceeds max size.
+ * Uses FIFO eviction strategy.
+ */
+export class BoundedCache<K, V> {
+  private cache: Map<K, V> = new Map();
+  private readonly maxLen: number;
+
+  constructor(maxLen: number = 10) {
+    this.maxLen = maxLen;
+  }
+
+  set(key: K, value: V): void {
+    this.cache.set(key, value);
+    if (this.cache.size > this.maxLen) {
+      // Remove the oldest entry (first inserted)
+      const firstKey = this.cache.keys().next().value as K;
+      this.cache.delete(firstKey);
+    }
+  }
+
+  get(key: K): V | undefined {
+    return this.cache.get(key);
+  }
+
+  has(key: K): boolean {
+    return this.cache.has(key);
+  }
+
+  delete(key: K): boolean {
+    return this.cache.delete(key);
+  }
+
+  /**
+   * Get existing entry and update it, or create a new one using factory.
+   * Updates the entry with the provided partial fields.
+   */
+  setOrUpdate<T extends V>(
+    key: K,
+    factory: () => T,
+    updates: Partial<{ [P in keyof T]: T[P] }>,
+  ): T {
+    let entry = this.cache.get(key) as T | undefined;
+    if (entry === undefined) {
+      entry = factory();
+      this.set(key, entry);
+    }
+    // Apply updates to the entry
+    for (const [field, value] of Object.entries(updates)) {
+      if (value !== undefined) {
+        (entry as Record<string, unknown>)[field] = value;
+      }
+    }
+    return entry;
+  }
+
+  /**
+   * Pop the last entry that matches the predicate, or return undefined.
+   * Only removes and returns the matching entry, preserving others.
+   */
+  pop(predicate?: (value: V) => boolean): V | undefined {
+    if (predicate === undefined) {
+      // Pop the last (most recent) entry
+      const keys = Array.from(this.cache.keys());
+      if (keys.length === 0) return undefined;
+      const lastKey = keys[keys.length - 1]!;
+      const value = this.cache.get(lastKey);
+      this.cache.delete(lastKey);
+      return value;
+    }
+
+    // Find the last entry matching the predicate (iterating in reverse)
+    const keys = Array.from(this.cache.keys());
+    for (let i = keys.length - 1; i >= 0; i--) {
+      const key = keys[i]!;
+      const value = this.cache.get(key)!;
+      if (predicate(value)) {
+        this.cache.delete(key);
+        return value;
+      }
+    }
+    return undefined;
+  }
+
+  clear(): void {
+    this.cache.clear();
+  }
+
+  get size(): number {
+    return this.cache.size;
+  }
+
+  values(): IterableIterator<V> {
+    return this.cache.values();
+  }
+
+  keys(): IterableIterator<K> {
+    return this.cache.keys();
+  }
+
+  entries(): IterableIterator<[K, V]> {
+    return this.cache.entries();
+  }
+}
+
 export enum InterruptionEventType {
   INTERRUPTION = 'interruption',
   OVERLAP_SPEECH_ENDED = 'overlap_speech_ended',
@@ -52,16 +157,16 @@ function estimateProbability(
 
 /**
  * Typed cache entry for interruption inference results.
+ * Mutable to support setOrUpdate pattern from Python's _BoundedCache.
  */
 export class InterruptionCacheEntry {
-  readonly createdAt: number;
-  readonly totalDurationInS: number;
-  readonly predictionDurationInS: number;
-  readonly detectionDelayInS: number;
-  readonly speechInput?: Int16Array;
-  readonly probabilities?: number[];
-  readonly isInterruption?: boolean;
-  readonly probability: number;
+  createdAt: number;
+  totalDurationInS: number;
+  predictionDurationInS: number;
+  detectionDelayInS: number;
+  speechInput?: Int16Array;
+  probabilities?: number[];
+  isInterruption?: boolean;
 
   constructor(params: {
     createdAt: number;
@@ -79,7 +184,13 @@ export class InterruptionCacheEntry {
     this.speechInput = params.speechInput;
     this.probabilities = params.probabilities;
     this.isInterruption = params.isInterruption;
-    this.probability = this.probabilities ? estimateProbability(this.probabilities) : 0;
+  }
+
+  /**
+   * The conservative estimated probability of the interruption event.
+   */
+  get probability(): number {
+    return this.probabilities ? estimateProbability(this.probabilities) : 0;
   }
 
   static default(): InterruptionCacheEntry {
diff --git a/agents/src/inference/interruption/ws_transport.ts b/agents/src/inference/interruption/ws_transport.ts
index 083e1669d..3d76ff205 100644
--- a/agents/src/inference/interruption/ws_transport.ts
+++ b/agents/src/inference/interruption/ws_transport.ts
@@ -3,7 +3,9 @@ import { TransformStream } from 'stream/web';
 import WebSocket, { createWebSocketStream } from 'ws';
 import { log } from '../../log.js';
 import { createAccessToken } from '../utils.js';
+import { intervalForRetry } from './defaults.js';
 import {
+  type BoundedCache,
   InterruptionCacheEntry,
   type InterruptionEvent,
   InterruptionEventType,
@@ -26,12 +28,13 @@ export interface WsTransportOptions {
   threshold: number;
   minFrames: number;
   timeout: number;
+  maxRetries?: number;
 }
 
 export interface WsTransportState {
   overlapSpeechStarted: boolean;
   overlapSpeechStartedAt: number | undefined;
-  cache: Map<number, InterruptionCacheEntry>;
+  cache: BoundedCache<number, InterruptionCacheEntry>;
 }
 
 interface WsMessage {
@@ -92,18 +95,24 @@ async function connectWebSocket(options: WsTransportOptions): Promise<{
   return { readable, writable, ws };
 }
 
+export interface WsTransportResult {
+  transport: TransformStream<Int16Array | InterruptionEvent, InterruptionEvent>;
+  reconnect: () => Promise<void>;
+}
+
 /**
  * Creates a WebSocket transport TransformStream for interruption detection.
  *
  * This transport receives Int16Array audio slices and outputs InterruptionEvents.
- * It maintains a persistent WebSocket connection.
+ * It maintains a persistent WebSocket connection with automatic retry on failure.
+ * Returns both the transport and a reconnect function for option updates.
  */
 export function createWsTransport(
   options: WsTransportOptions,
   getState: () => WsTransportState,
   setState: (partial: Partial<WsTransportState>) => void,
   updateUserSpeakingSpan?: (entry: InterruptionCacheEntry) => void,
-): TransformStream<Int16Array | InterruptionEvent, InterruptionEvent> {
+): WsTransportResult {
   const logger = log();
   let ws: WebSocket | null = null;
   let writer: WritableStreamDefaultWriter<Uint8Array> | null = null;
@@ -113,25 +122,45 @@ export function createWsTransport(
   async function ensureConnection(): Promise<void> {
     if (ws && ws.readyState === WebSocket.OPEN) return;
 
-    const conn = await connectWebSocket(options);
-    ws = conn.ws;
-    writer = conn.writable.getWriter();
-
-    // Send session.create message
-    const sessionCreateMsg = JSON.stringify({
-      type: MSG_SESSION_CREATE,
-      settings: {
-        sample_rate: options.sampleRate,
-        num_channels: 1,
-        threshold: options.threshold,
-        min_frames: options.minFrames,
-        encoding: 's16le',
-      },
-    });
-    await writer.write(new TextEncoder().encode(sessionCreateMsg));
+    const maxRetries = options.maxRetries ?? 3;
+    let lastError: Error | null = null;
+
+    for (let attempt = 0; attempt <= maxRetries; attempt++) {
+      try {
+        const conn = await connectWebSocket(options);
+        ws = conn.ws;
+        writer = conn.writable.getWriter();
+
+        // Send session.create message
+        const sessionCreateMsg = JSON.stringify({
+          type: MSG_SESSION_CREATE,
+          settings: {
+            sample_rate: options.sampleRate,
+            num_channels: 1,
+            threshold: options.threshold,
+            min_frames: options.minFrames,
+            encoding: 's16le',
+          },
+        });
+        await writer.write(new TextEncoder().encode(sessionCreateMsg));
+
+        // Start reading responses
+        readerTask = processResponses(conn.readable);
+        return;
+      } catch (err) {
+        lastError = err instanceof Error ? err : new Error(String(err));
+        if (attempt < maxRetries) {
+          const delay = intervalForRetry(attempt);
+          logger.warn(
+            { attempt, delay, err: lastError.message },
+            'WebSocket connection failed, retrying',
+          );
+          await new Promise((resolve) => setTimeout(resolve, delay));
+        }
+      }
+    }
 
-    // Start reading responses
-    readerTask = processResponses(conn.readable);
+    throw lastError ?? new Error('Failed to connect to WebSocket after retries');
   }
 
   async function processResponses(readable: ReadableStream<Uint8Array>): Promise<void> {
@@ -314,12 +343,24 @@ export function createWsTransport(
       const closeMsg = JSON.stringify({ type: MSG_SESSION_CLOSE });
       await writer.write(new TextEncoder().encode(closeMsg));
       writer.releaseLock();
+      writer = null;
     }
     ws?.close(1000);
+    ws = null;
     await readerTask;
+    readerTask = null;
   }
 
-  return new TransformStream<Int16Array | InterruptionEvent, InterruptionEvent>(
+  /**
+   * Reconnect the WebSocket with updated options.
+   * This is called when options are updated via updateOptions().
+   */
+  async function reconnect(): Promise<void> {
+    await close();
+    // Connection will be re-established on next sendAudioData call
+  }
+
+  const transport = new TransformStream<Int16Array | InterruptionEvent, InterruptionEvent>(
     {
       start(controller) {
         outputController = controller;
@@ -349,4 +390,6 @@ export function createWsTransport(
     { highWaterMark: 2 },
     { highWaterMark: 2 },
   );
+
+  return { transport, reconnect };
 }
diff --git a/agents/src/voice/agent_activity.ts b/agents/src/voice/agent_activity.ts
index 1ec298678..94890b9f1 100644
--- a/agents/src/voice/agent_activity.ts
+++ b/agents/src/voice/agent_activity.ts
@@ -730,8 +730,8 @@ export class AgentActivity implements RecognitionHooks {
     this.logger.info(
       {
         probability: ev.probability,
-        detectionDelay: ev.detectionDelay,
-        totalDuration: ev.totalDuration,
+        detectionDelayInS: ev.detectionDelayInS,
+        totalDurationInS: ev.totalDurationInS,
       },
       'adaptive interruption detected',
     );

From 1aac5f72ce01bca70b2a154f9cf59909cf78fc8b Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Thu, 22 Jan 2026 10:32:11 +0100
Subject: [PATCH 16/25] revert voice activity stuff

---
 agents/src/voice/agent_activity.ts    |  61 ----------
 agents/src/voice/agent_session.ts     |  24 +---
 agents/src/voice/audio_recognition.ts | 163 +-------------------------
 3 files changed, 3 insertions(+), 245 deletions(-)

diff --git a/agents/src/voice/agent_activity.ts b/agents/src/voice/agent_activity.ts
index 94890b9f1..c5b2b999c 100644
--- a/agents/src/voice/agent_activity.ts
+++ b/agents/src/voice/agent_activity.ts
@@ -41,8 +41,6 @@ import { recordRealtimeMetrics, traceTypes, tracer } from '../telemetry/index.js
 import { splitWords } from '../tokenize/basic/word.js';
 import { TTS, type TTSError } from '../tts/tts.js';
 import { Future, Task, cancelAndWait, waitFor } from '../utils.js';
-import type { InterruptionEvent } from '../inference/interruption/interruption.js';
-import { InterruptionEventType } from '../inference/interruption/interruption.js';
 import { VAD, type VADEvent } from '../vad.js';
 import type { Agent, ModelSettings } from './agent.js';
 import { StopResponse, asyncLocalStorage } from './agent.js';
@@ -114,24 +112,6 @@ export class AgentActivity implements RecognitionHooks {
   _mainTask?: Task<void>;
   _userTurnCompletedTask?: Promise<void>;
 
-  /**
-   * Notify that agent started speaking.
-   * This enables interruption detection in AudioRecognition.
-   * @internal
-   */
-  notifyAgentSpeechStarted(): void {
-    this.audioRecognition?.onStartOfAgentSpeech();
-  }
-
-  /**
-   * Notify that agent stopped speaking.
-   * This disables interruption detection in AudioRecognition.
-   * @internal
-   */
-  notifyAgentSpeechEnded(): void {
-    this.audioRecognition?.onEndOfAgentSpeech();
-  }
-
   constructor(agent: Agent, agentSession: AgentSession) {
     this.agent = agent;
     this.agentSession = agentSession;
@@ -312,7 +292,6 @@ export class AgentActivity implements RecognitionHooks {
         // Disable stt node if stt is not provided
         stt: this.stt ? (...args) => this.agent.sttNode(...args) : undefined,
         vad: this.vad,
-        interruptionDetector: this.agentSession.interruptionDetector,
         turnDetector: typeof this.turnDetection === 'string' ? undefined : this.turnDetection,
         turnDetectionMode: this.turnDetectionMode,
         minEndpointingDelay: this.agentSession.options.minEndpointingDelay,
@@ -721,46 +700,6 @@ export class AgentActivity implements RecognitionHooks {
     }
   }
 
-  onInterruption(ev: InterruptionEvent): void {
-    if (ev.type !== InterruptionEventType.INTERRUPTION) {
-      // Only handle actual interruptions, not overlap_speech_ended events
-      return;
-    }
-
-    this.logger.info(
-      {
-        probability: ev.probability,
-        detectionDelayInS: ev.detectionDelayInS,
-        totalDurationInS: ev.totalDurationInS,
-      },
-      'adaptive interruption detected',
-    );
-
-    // Similar to onVADInferenceDone but triggered by the adaptive interruption detector
-    if (this.turnDetection === 'manual' || this.turnDetection === 'realtime_llm') {
-      return;
-    }
-
-    if (this.llm instanceof RealtimeModel && this.llm.capabilities.turnDetection) {
-      return;
-    }
-
-    this.realtimeSession?.startUserActivity();
-
-    if (
-      this._currentSpeech &&
-      !this._currentSpeech.interrupted &&
-      this._currentSpeech.allowInterruptions
-    ) {
-      this.logger.info(
-        { 'speech id': this._currentSpeech.id },
-        'speech interrupted by adaptive interruption detector',
-      );
-      this.realtimeSession?.interrupt();
-      this._currentSpeech.interrupt();
-    }
-  }
-
   onInterimTranscript(ev: SpeechEvent): void {
     if (this.llm instanceof RealtimeModel && this.llm.capabilities.userTranscription) {
       // skip stt transcription if userTranscription is enabled on the realtime model
diff --git a/agents/src/voice/agent_session.ts b/agents/src/voice/agent_session.ts
index c9b194d00..29eae5a3f 100644
--- a/agents/src/voice/agent_session.ts
+++ b/agents/src/voice/agent_session.ts
@@ -15,7 +15,6 @@ import {
   type STTModelString,
   type TTSModelString,
 } from '../inference/index.js';
-import type { AdaptiveInterruptionDetector } from '../inference/interruption/AdaptiveInterruptionDetector.js';
 import { type JobContext, getJobContext } from '../job.js';
 import type { FunctionCall, FunctionCallOutput } from '../llm/chat_context.js';
 import { AgentHandoffItem, ChatContext, ChatMessage } from '../llm/chat_context.js';
@@ -108,7 +107,6 @@ export type AgentSessionOptions<UserData = UnknownUserData> = {
   vad?: VAD;
   llm?: LLM | RealtimeModel | LLMModels;
   tts?: TTS | TTSModelString;
-  interruptionDetector?: AdaptiveInterruptionDetector;
   userData?: UserData;
   voiceOptions?: Partial<VoiceOptions>;
   connOptions?: SessionConnectOptions;
@@ -170,8 +168,6 @@ export class AgentSession<
   /** @internal - Timestamp when the session started (milliseconds) */
   _startedAt?: number;
 
-  interruptionDetector?: AdaptiveInterruptionDetector;
-
   /** @internal - Current run state for testing */
   _globalRunState?: RunResult;
 
@@ -184,7 +180,6 @@ export class AgentSession<
       llm,
       tts,
       turnDetection,
-      interruptionDetector,
       userData,
       voiceOptions = defaultVoiceOptions,
       connOptions,
@@ -220,7 +215,6 @@ export class AgentSession<
     }
 
     this.turnDetection = turnDetection;
-    this.interruptionDetector = interruptionDetector;
     this._userData = userData;
 
     // configurable IO
@@ -687,8 +681,6 @@ export class AgentSession<
       return;
     }
 
-    const oldState = this._agentState;
-
     if (state === 'speaking') {
       // Reset error counts when agent starts speaking
       this.llmErrorCounts = 0;
@@ -704,25 +696,13 @@ export class AgentSession<
         // TODO(brian): PR4 - Set participant attributes if roomIO.room.localParticipant is available
         // (Ref: Python agent_session.py line 1161-1164)
       }
-
-      // Notify AudioRecognition that agent started speaking (for interruption detection)
-      this.activity?.notifyAgentSpeechStarted();
-    } else if (oldState === 'speaking') {
-      // Agent stopped speaking
-      if (this.agentSpeakingSpan !== undefined) {
-        // TODO(brian): PR4 - Set ATTR_END_TIME attribute if available
-        this.agentSpeakingSpan.end();
-        this.agentSpeakingSpan = undefined;
-      }
-
-      // Notify AudioRecognition that agent stopped speaking (for interruption detection)
-      this.activity?.notifyAgentSpeechEnded();
     } else if (this.agentSpeakingSpan !== undefined) {
-      // Non-speaking to non-speaking transition but span is still open
+      // TODO(brian): PR4 - Set ATTR_END_TIME attribute if available
       this.agentSpeakingSpan.end();
       this.agentSpeakingSpan = undefined;
     }
 
+    const oldState = this._agentState;
     this._agentState = state;
 
     // Handle user away timer based on state changes
diff --git a/agents/src/voice/audio_recognition.ts b/agents/src/voice/audio_recognition.ts
index 20f0ae6f8..25d430684 100644
--- a/agents/src/voice/audio_recognition.ts
+++ b/agents/src/voice/audio_recognition.ts
@@ -5,12 +5,6 @@ import { AudioFrame } from '@livekit/rtc-node';
 import type { Context, Span } from '@opentelemetry/api';
 import type { WritableStreamDefaultWriter } from 'node:stream/web';
 import { ReadableStream } from 'node:stream/web';
-import type { AdaptiveInterruptionDetector } from '../inference/interruption/AdaptiveInterruptionDetector.js';
-import {
-  InterruptionStreamBase,
-  InterruptionStreamSentinel,
-} from '../inference/interruption/InterruptionStream.js';
-import type { InterruptionEvent } from '../inference/interruption/interruption.js';
 import { type ChatContext } from '../llm/chat_context.js';
 import { log } from '../log.js';
 import { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/deferred_stream.js';
@@ -45,7 +39,6 @@ export interface RecognitionHooks {
   onFinalTranscript: (ev: SpeechEvent) => void;
   onEndOfTurn: (info: EndOfTurnInfo) => Promise<boolean>;
   onPreemptiveGeneration: (info: PreemptiveGenerationInfo) => void;
-  onInterruption: (ev: InterruptionEvent) => void;
 
   retrieveChatCtx: () => ChatContext;
 }
@@ -60,7 +53,6 @@ export interface AudioRecognitionOptions {
   recognitionHooks: RecognitionHooks;
   stt?: STTNode;
   vad?: VAD;
-  interruptionDetector?: AdaptiveInterruptionDetector;
   turnDetector?: _TurnDetector;
   turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;
   minEndpointingDelay: number;
@@ -96,7 +88,6 @@ export class AudioRecognition {
 
   private vadInputStream: ReadableStream<AudioFrame>;
   private sttInputStream: ReadableStream<AudioFrame>;
-  private interruptionInputStream: ReadableStream<AudioFrame>;
   private silenceAudioTransform = new IdentityTransform<AudioFrame>();
   private silenceAudioWriter: WritableStreamDefaultWriter<AudioFrame>;
 
@@ -105,19 +96,11 @@ export class AudioRecognition {
   private commitUserTurnTask?: Task<void>;
   private vadTask?: Task<void>;
   private sttTask?: Task<void>;
-  private interruptionTask?: Task<void>;
-
-  // interruption detection
-  private interruptionDetector?: AdaptiveInterruptionDetector;
-  private interruptionStream?: InterruptionStreamBase;
-  private interruptionEnabled = false;
-  private agentSpeaking = false;
 
   constructor(opts: AudioRecognitionOptions) {
     this.hooks = opts.recognitionHooks;
     this.stt = opts.stt;
     this.vad = opts.vad;
-    this.interruptionDetector = opts.interruptionDetector;
     this.turnDetector = opts.turnDetector;
     this.turnDetectionMode = opts.turnDetectionMode;
     this.minEndpointingDelay = opts.minEndpointingDelay;
@@ -125,15 +108,10 @@ export class AudioRecognition {
     this.lastLanguage = undefined;
     this.rootSpanContext = opts.rootSpanContext;
 
-    // Interruption detection is only enabled if both detector and VAD are provided
-    this.interruptionEnabled = this.interruptionDetector !== undefined && this.vad !== undefined;
-
     this.deferredInputStream = new DeferredReadableStream<AudioFrame>();
-    const [vadInputStream, rest] = this.deferredInputStream.stream.tee();
-    const [sttInputStream, interruptionInputStream] = rest.tee();
+    const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();
     this.vadInputStream = vadInputStream;
     this.sttInputStream = mergeReadableStreams(sttInputStream, this.silenceAudioTransform.readable);
-    this.interruptionInputStream = interruptionInputStream;
     this.silenceAudioWriter = this.silenceAudioTransform.writable.getWriter();
   }
 
@@ -157,15 +135,6 @@ export class AudioRecognition {
     this.sttTask.result.catch((err) => {
       this.logger.error(`Error running STT task: ${err}`);
     });
-
-    if (this.interruptionEnabled && this.interruptionDetector) {
-      this.interruptionTask = Task.from(({ signal }) =>
-        this.createInterruptionTask(this.interruptionDetector!, signal),
-      );
-      this.interruptionTask.result.catch((err) => {
-        this.logger.error(`Error running interruption task: ${err}`);
-      });
-    }
   }
 
   private async onSTTEvent(ev: SpeechEvent) {
@@ -610,12 +579,6 @@ export class AudioRecognition {
               this.sampleRate = ev.frames[0].sampleRate;
             }
 
-            // If agent is speaking, user speech is overlap - trigger interruption detection
-            if (this.agentSpeaking) {
-              // TODO re-enable check for this.interruptionEnabled
-              this.onStartOfOverlapSpeech(ev.speechDuration, this.userTurnSpan);
-            }
-
             this.bounceEOUTask?.cancel();
             break;
           case VADEventType.INFERENCE_DONE:
@@ -636,11 +599,6 @@ export class AudioRecognition {
             // when VAD fires END_OF_SPEECH, it already waited for the silence_duration
             this.speaking = false;
 
-            // If we were in overlap speech (agent speaking + user speaking), end it
-            if (this.agentSpeaking && this.interruptionEnabled) {
-              this.onEndOfOverlapSpeech();
-            }
-
             if (
               this.vadBaseTurnDetection ||
               (this.turnDetectionMode === 'stt' && this.userTurnCommitted)
@@ -658,123 +616,6 @@ export class AudioRecognition {
     }
   }
 
-  private async createInterruptionTask(
-    interruptionDetector: AdaptiveInterruptionDetector,
-    signal: AbortSignal,
-  ) {
-    // Create the interruption stream from the detector
-    this.interruptionStream = interruptionDetector.createStream();
-
-    // Forward audio frames to the interruption stream
-    const reader = this.interruptionInputStream.getReader();
-
-    const forwardTask = (async () => {
-      try {
-        while (!signal.aborted) {
-          const { done, value: frame } = await reader.read();
-          if (done) break;
-          await this.interruptionStream?.pushFrame(frame);
-        }
-      } catch (e) {
-        if (!signal.aborted) {
-          this.logger.error(e, 'Error forwarding audio to interruption stream');
-        }
-      } finally {
-        reader.releaseLock();
-      }
-    })();
-
-    // Read interruption events from the stream
-    const eventStream = this.interruptionStream.stream();
-    const eventReader = eventStream.getReader();
-
-    const abortHandler = () => {
-      eventReader.releaseLock();
-      this.interruptionStream?.close();
-      signal.removeEventListener('abort', abortHandler);
-    };
-    signal.addEventListener('abort', abortHandler);
-
-    try {
-      while (!signal.aborted) {
-        const { done, value: ev } = await eventReader.read();
-        if (done) break;
-
-        this.logger.info({ type: ev.type, probability: ev.probability }, 'Interruption event');
-        this.hooks.onInterruption(ev);
-      }
-    } catch (e) {
-      if (!signal.aborted) {
-        this.logger.error(e, 'Error in interruption task');
-      }
-    } finally {
-      this.logger.debug('Interruption task closed');
-      await forwardTask;
-    }
-  }
-
-  /**
-   * Called when the agent starts speaking.
-   * Enables interruption detection by sending the agent-speech-started sentinel.
-   */
-  onStartOfAgentSpeech(): void {
-    this.agentSpeaking = true;
-
-    if (!this.interruptionEnabled || !this.interruptionStream) {
-      return;
-    }
-
-    this.interruptionStream.pushFrame(InterruptionStreamSentinel.speechStarted());
-  }
-
-  /**
-   * Called when the agent stops speaking.
-   * Disables interruption detection by sending the agent-speech-ended sentinel.
-   */
-  onEndOfAgentSpeech(): void {
-    if (!this.interruptionEnabled || !this.interruptionStream) {
-      this.agentSpeaking = false;
-      return;
-    }
-
-    this.interruptionStream.pushFrame(InterruptionStreamSentinel.speechEnded());
-
-    if (this.agentSpeaking) {
-      // No interruption was detected, end the overlap inference (idempotent)
-      this.onEndOfOverlapSpeech();
-    }
-
-    this.agentSpeaking = false;
-  }
-
-  /**
-   * Called when user starts speaking while agent is speaking (overlap speech).
-   * This triggers the interruption detection inference.
-   */
-  onStartOfOverlapSpeech(speechDuration: number, userSpeakingSpan?: Span): void {
-    if (!this.interruptionEnabled || !this.interruptionStream) {
-      return;
-    }
-
-    if (this.agentSpeaking && userSpeakingSpan) {
-      this.interruptionStream.pushFrame(
-        InterruptionStreamSentinel.overlapSpeechStarted(speechDuration, userSpeakingSpan),
-      );
-    }
-  }
-
-  /**
-   * Called when user stops speaking during overlap.
-   * This ends the interruption detection inference for this overlap period.
-   */
-  onEndOfOverlapSpeech(): void {
-    if (!this.interruptionEnabled || !this.interruptionStream) {
-      return;
-    }
-
-    this.interruptionStream.pushFrame(InterruptionStreamSentinel.overlapSpeechEnded());
-  }
-
   setInputAudioStream(audioStream: ReadableStream<AudioFrame>) {
     this.deferredInputStream.setSource(audioStream);
   }
@@ -847,8 +688,6 @@ export class AudioRecognition {
     await this.sttTask?.cancelAndWait();
     await this.vadTask?.cancelAndWait();
     await this.bounceEOUTask?.cancelAndWait();
-    await this.interruptionTask?.cancelAndWait();
-    await this.interruptionStream?.close();
   }
 
   private _endUserTurnSpan({

From 94cd4c4a53435ea1da8d08d67d28cf0a0def32a0 Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Thu, 22 Jan 2026 10:43:45 +0100
Subject: [PATCH 17/25] reorganize

---
 .../AdaptiveInterruptionDetector.ts           | 18 +---
 .../interruption/InterruptionCacheEntry.ts    | 44 +++++++++
 .../interruption/InterruptionStream.ts        | 65 +++++---------
 agents/src/inference/interruption/defaults.ts |  2 +-
 agents/src/inference/interruption/errors.ts   | 22 +++++
 .../inference/interruption/http_transport.ts  |  9 +-
 agents/src/inference/interruption/index.ts    |  4 -
 agents/src/inference/interruption/types.ts    | 86 ++++++++++++++++++
 .../{ => interruption}/utils.test.ts          |  0
 .../{interruption.ts => utils.ts}             | 90 +++----------------
 .../inference/interruption/ws_transport.ts    |  9 +-
 agents/src/inference/utils.ts                 | 15 ----
 12 files changed, 198 insertions(+), 166 deletions(-)
 create mode 100644 agents/src/inference/interruption/InterruptionCacheEntry.ts
 delete mode 100644 agents/src/inference/interruption/index.ts
 create mode 100644 agents/src/inference/interruption/types.ts
 rename agents/src/inference/{ => interruption}/utils.test.ts (100%)
 rename agents/src/inference/interruption/{interruption.ts => utils.ts} (55%)

diff --git a/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts b/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
index 0e137b15c..aa685dbe1 100644
--- a/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
+++ b/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
@@ -8,7 +8,8 @@ import {
   SAMPLE_RATE,
   interruptionOptionDefaults,
 } from './defaults.js';
-import { type InterruptionDetectionError, type InterruptionEvent } from './interruption.js';
+import type { InterruptionDetectionError } from './errors.js';
+import type { InterruptionEvent, InterruptionOptions } from './types.js';
 
 type InterruptionCallbacks = {
   userInterruptionDetected: (event: InterruptionEvent) => void;
@@ -17,21 +18,6 @@ type InterruptionCallbacks = {
   error: (error: InterruptionDetectionError) => void;
 };
 
-export interface InterruptionOptions {
-  sampleRate: number;
-  threshold: number;
-  minFrames: number;
-  maxAudioDurationInS: number;
-  audioPrefixDurationInS: number;
-  detectionIntervalInS: number;
-  inferenceTimeout: number;
-  minInterruptionDurationInS: number;
-  baseUrl: string;
-  apiKey: string;
-  apiSecret: string;
-  useProxy: boolean;
-}
-
 export type AdaptiveInterruptionDetectorOptions = Partial<InterruptionOptions>;
 
 export class AdaptiveInterruptionDetector extends (EventEmitter as new () => TypedEventEmitter<InterruptionCallbacks>) {
diff --git a/agents/src/inference/interruption/InterruptionCacheEntry.ts b/agents/src/inference/interruption/InterruptionCacheEntry.ts
new file mode 100644
index 000000000..4f2f0a20b
--- /dev/null
+++ b/agents/src/inference/interruption/InterruptionCacheEntry.ts
@@ -0,0 +1,44 @@
+import { estimateProbability } from './utils.js';
+
+/**
+ * Typed cache entry for interruption inference results.
+ * Mutable to support setOrUpdate pattern from Python's _BoundedCache.
+ */
+export class InterruptionCacheEntry {
+  createdAt: number;
+  totalDurationInS: number;
+  predictionDurationInS: number;
+  detectionDelayInS: number;
+  speechInput?: Int16Array;
+  probabilities?: number[];
+  isInterruption?: boolean;
+
+  constructor(params: {
+    createdAt: number;
+    speechInput?: Int16Array;
+    totalDurationInS?: number;
+    predictionDurationInS?: number;
+    detectionDelayInS?: number;
+    probabilities?: number[];
+    isInterruption?: boolean;
+  }) {
+    this.createdAt = params.createdAt;
+    this.totalDurationInS = params.totalDurationInS ?? 0;
+    this.predictionDurationInS = params.predictionDurationInS ?? 0;
+    this.detectionDelayInS = params.detectionDelayInS ?? 0;
+    this.speechInput = params.speechInput;
+    this.probabilities = params.probabilities;
+    this.isInterruption = params.isInterruption;
+  }
+
+  /**
+   * The conservative estimated probability of the interruption event.
+   */
+  get probability(): number {
+    return this.probabilities ? estimateProbability(this.probabilities) : 0;
+  }
+
+  static default(): InterruptionCacheEntry {
+    return new InterruptionCacheEntry({ createdAt: 0 });
+  }
+}
diff --git a/agents/src/inference/interruption/InterruptionStream.ts b/agents/src/inference/interruption/InterruptionStream.ts
index 7d0bd8142..d2206b787 100644
--- a/agents/src/inference/interruption/InterruptionStream.ts
+++ b/agents/src/inference/interruption/InterruptionStream.ts
@@ -4,49 +4,36 @@ import { type ReadableStream, TransformStream } from 'stream/web';
 import { log } from '../../log.js';
 import { type StreamChannel, createStreamChannel } from '../../stream/stream_channel.js';
 import { traceTypes } from '../../telemetry/index.js';
-import type {
-  AdaptiveInterruptionDetector,
-  InterruptionOptions,
-} from './AdaptiveInterruptionDetector.js';
+import type { AdaptiveInterruptionDetector } from './AdaptiveInterruptionDetector.js';
+import { InterruptionCacheEntry } from './InterruptionCacheEntry.js';
 import { FRAMES_PER_SECOND, apiConnectDefaults } from './defaults.js';
+import type { InterruptionDetectionError } from './errors.js';
 import { createHttpTransport } from './http_transport.js';
 import {
-  BoundedCache,
-  InterruptionCacheEntry,
-  type InterruptionDetectionError,
+  type AgentSpeechEnded,
+  type AgentSpeechStarted,
+  type ApiConnectOptions,
+  type Flush,
   type InterruptionEvent,
   InterruptionEventType,
-} from './interruption.js';
+  type InterruptionOptions,
+  type InterruptionSentinel,
+  type OverlapSpeechEnded,
+  type OverlapSpeechStarted,
+} from './types.js';
+import { BoundedCache } from './utils.js';
 import { createWsTransport } from './ws_transport.js';
 
-export interface AgentSpeechStarted {
-  type: 'agent-speech-started';
-}
-
-export interface AgentSpeechEnded {
-  type: 'agent-speech-ended';
-}
-
-export interface OverlapSpeechStarted {
-  type: 'overlap-speech-started';
-  speechDurationInS: number;
-  userSpeakingSpan: Span;
-}
-
-export interface OverlapSpeechEnded {
-  type: 'overlap-speech-ended';
-}
-
-export interface Flush {
-  type: 'flush';
-}
-
-export type InterruptionSentinel =
-  | AgentSpeechStarted
-  | AgentSpeechEnded
-  | OverlapSpeechStarted
-  | OverlapSpeechEnded
-  | Flush;
+// Re-export sentinel types for backwards compatibility
+export type {
+  AgentSpeechEnded,
+  AgentSpeechStarted,
+  ApiConnectOptions,
+  Flush,
+  InterruptionSentinel,
+  OverlapSpeechEnded,
+  OverlapSpeechStarted,
+};
 
 export class InterruptionStreamSentinel {
   static speechStarted(): AgentSpeechStarted {
@@ -73,12 +60,6 @@ export class InterruptionStreamSentinel {
   }
 }
 
-export interface ApiConnectOptions {
-  maxRetries: number;
-  retryInterval: number;
-  timeout: number;
-}
-
 function updateUserSpeakingSpan(span: Span, entry: InterruptionCacheEntry) {
   span.setAttribute(
     traceTypes.ATTR_IS_INTERRUPTION,
diff --git a/agents/src/inference/interruption/defaults.ts b/agents/src/inference/interruption/defaults.ts
index 1a2beeb08..9dccf40c2 100644
--- a/agents/src/inference/interruption/defaults.ts
+++ b/agents/src/inference/interruption/defaults.ts
@@ -1,5 +1,5 @@
-import type { InterruptionOptions } from './AdaptiveInterruptionDetector.js';
 import type { ApiConnectOptions } from './InterruptionStream.js';
+import type { InterruptionOptions } from './types.js';
 
 export const MIN_INTERRUPTION_DURATION_IN_S = 0.025 * 2; // 25ms per frame, 2 consecutive frames
 export const THRESHOLD = 0.65;
diff --git a/agents/src/inference/interruption/errors.ts b/agents/src/inference/interruption/errors.ts
index e69de29bb..ba95a9674 100644
--- a/agents/src/inference/interruption/errors.ts
+++ b/agents/src/inference/interruption/errors.ts
@@ -0,0 +1,22 @@
+/**
+ * Error thrown during interruption detection.
+ */
+export class InterruptionDetectionError extends Error {
+  readonly type = 'InterruptionDetectionError';
+
+  readonly timestamp: number;
+  readonly label: string;
+  readonly recoverable: boolean;
+
+  constructor(message: string, timestamp: number, label: string, recoverable: boolean) {
+    super(message);
+    this.name = 'InterruptionDetectionError';
+    this.timestamp = timestamp;
+    this.label = label;
+    this.recoverable = recoverable;
+  }
+
+  toString(): string {
+    return `${this.name}: ${this.message} (label=${this.label}, timestamp=${this.timestamp}, recoverable=${this.recoverable})`;
+  }
+}
diff --git a/agents/src/inference/interruption/http_transport.ts b/agents/src/inference/interruption/http_transport.ts
index b2419a514..96dfc03eb 100644
--- a/agents/src/inference/interruption/http_transport.ts
+++ b/agents/src/inference/interruption/http_transport.ts
@@ -2,13 +2,10 @@ import { ofetch } from 'ofetch';
 import { TransformStream } from 'stream/web';
 import { log } from '../../log.js';
 import { createAccessToken } from '../utils.js';
+import { InterruptionCacheEntry } from './InterruptionCacheEntry.js';
 import { intervalForRetry } from './defaults.js';
-import {
-  BoundedCache,
-  InterruptionCacheEntry,
-  type InterruptionEvent,
-  InterruptionEventType,
-} from './interruption.js';
+import { type InterruptionEvent, InterruptionEventType } from './types.js';
+import type { BoundedCache } from './utils.js';
 
 export interface PostOptions {
   baseUrl: string;
diff --git a/agents/src/inference/interruption/index.ts b/agents/src/inference/interruption/index.ts
deleted file mode 100644
index 0d0bc4c4a..000000000
--- a/agents/src/inference/interruption/index.ts
+++ /dev/null
@@ -1,4 +0,0 @@
-export * from './AdaptiveInterruptionDetector.js';
-export * from './interruption.js';
-export { InterruptionStreamSentinel } from './InterruptionStream.js';
-export type { InterruptionSentinel } from './InterruptionStream.js';
diff --git a/agents/src/inference/interruption/types.ts b/agents/src/inference/interruption/types.ts
new file mode 100644
index 000000000..cf13d4d2d
--- /dev/null
+++ b/agents/src/inference/interruption/types.ts
@@ -0,0 +1,86 @@
+import type { Span } from '@opentelemetry/api';
+
+/**
+ * Event types for interruption detection.
+ */
+export enum InterruptionEventType {
+  INTERRUPTION = 'interruption',
+  OVERLAP_SPEECH_ENDED = 'overlap_speech_ended',
+}
+
+/**
+ * Event emitted when an interruption is detected or overlap speech ends.
+ */
+export interface InterruptionEvent {
+  type: InterruptionEventType;
+  timestamp: number;
+  isInterruption: boolean;
+  totalDurationInS: number;
+  predictionDurationInS: number;
+  detectionDelayInS: number;
+  overlapSpeechStartedAt?: number;
+  speechInput?: Int16Array;
+  probabilities?: number[];
+  probability: number;
+}
+
+/**
+ * Configuration options for interruption detection.
+ */
+export interface InterruptionOptions {
+  sampleRate: number;
+  threshold: number;
+  minFrames: number;
+  maxAudioDurationInS: number;
+  audioPrefixDurationInS: number;
+  detectionIntervalInS: number;
+  inferenceTimeout: number;
+  minInterruptionDurationInS: number;
+  baseUrl: string;
+  apiKey: string;
+  apiSecret: string;
+  useProxy: boolean;
+}
+
+/**
+ * API connection options for transport layers.
+ */
+export interface ApiConnectOptions {
+  maxRetries: number;
+  retryInterval: number;
+  timeout: number;
+}
+
+// Sentinel types for stream control signals
+
+export interface AgentSpeechStarted {
+  type: 'agent-speech-started';
+}
+
+export interface AgentSpeechEnded {
+  type: 'agent-speech-ended';
+}
+
+export interface OverlapSpeechStarted {
+  type: 'overlap-speech-started';
+  speechDurationInS: number;
+  userSpeakingSpan: Span;
+}
+
+export interface OverlapSpeechEnded {
+  type: 'overlap-speech-ended';
+}
+
+export interface Flush {
+  type: 'flush';
+}
+
+/**
+ * Union type for all stream control signals.
+ */
+export type InterruptionSentinel =
+  | AgentSpeechStarted
+  | AgentSpeechEnded
+  | OverlapSpeechStarted
+  | OverlapSpeechEnded
+  | Flush;
diff --git a/agents/src/inference/utils.test.ts b/agents/src/inference/interruption/utils.test.ts
similarity index 100%
rename from agents/src/inference/utils.test.ts
rename to agents/src/inference/interruption/utils.test.ts
diff --git a/agents/src/inference/interruption/interruption.ts b/agents/src/inference/interruption/utils.ts
similarity index 55%
rename from agents/src/inference/interruption/interruption.ts
rename to agents/src/inference/interruption/utils.ts
index 5c17ff534..161e08bb1 100644
--- a/agents/src/inference/interruption/interruption.ts
+++ b/agents/src/inference/interruption/utils.ts
@@ -1,4 +1,3 @@
-import { slidingWindowMinMax } from '../utils.js';
 import { FRAME_DURATION_IN_S, MIN_INTERRUPTION_DURATION_IN_S } from './defaults.js';
 
 /**
@@ -106,44 +105,11 @@ export class BoundedCache<K, V> {
   }
 }
 
-export enum InterruptionEventType {
-  INTERRUPTION = 'interruption',
-  OVERLAP_SPEECH_ENDED = 'overlap_speech_ended',
-}
-export interface InterruptionEvent {
-  type: InterruptionEventType;
-  timestamp: number;
-  isInterruption: boolean;
-  totalDurationInS: number;
-  predictionDurationInS: number;
-  detectionDelayInS: number;
-  overlapSpeechStartedAt?: number;
-  speechInput?: Int16Array;
-  probabilities?: number[];
-  probability: number;
-}
-
-export class InterruptionDetectionError extends Error {
-  readonly type = 'InterruptionDetectionError';
-
-  readonly timestamp: number;
-  readonly label: string;
-  readonly recoverable: boolean;
-
-  constructor(message: string, timestamp: number, label: string, recoverable: boolean) {
-    super(message);
-    this.name = 'InterruptionDetectionError';
-    this.timestamp = timestamp;
-    this.label = label;
-    this.recoverable = recoverable;
-  }
-
-  toString(): string {
-    return `${this.name}: ${this.message} (label=${this.label}, timestamp=${this.timestamp}, recoverable=${this.recoverable})`;
-  }
-}
-
-function estimateProbability(
+/**
+ * Estimate probability using sliding window min-max algorithm.
+ * Returns a conservative estimate based on the minimum window size.
+ */
+export function estimateProbability(
   probabilities: number[],
   windowSizeInS: number = MIN_INTERRUPTION_DURATION_IN_S,
 ): number {
@@ -155,45 +121,17 @@ function estimateProbability(
   return slidingWindowMinMax(probabilities, minWindow);
 }
 
-/**
- * Typed cache entry for interruption inference results.
- * Mutable to support setOrUpdate pattern from Python's _BoundedCache.
- */
-export class InterruptionCacheEntry {
-  createdAt: number;
-  totalDurationInS: number;
-  predictionDurationInS: number;
-  detectionDelayInS: number;
-  speechInput?: Int16Array;
-  probabilities?: number[];
-  isInterruption?: boolean;
-
-  constructor(params: {
-    createdAt: number;
-    speechInput?: Int16Array;
-    totalDurationInS?: number;
-    predictionDurationInS?: number;
-    detectionDelayInS?: number;
-    probabilities?: number[];
-    isInterruption?: boolean;
-  }) {
-    this.createdAt = params.createdAt;
-    this.totalDurationInS = params.totalDurationInS ?? 0;
-    this.predictionDurationInS = params.predictionDurationInS ?? 0;
-    this.detectionDelayInS = params.detectionDelayInS ?? 0;
-    this.speechInput = params.speechInput;
-    this.probabilities = params.probabilities;
-    this.isInterruption = params.isInterruption;
+export function slidingWindowMinMax(probabilities: number[], minWindow: number): number {
+  if (probabilities.length < minWindow) {
+    return -Infinity;
   }
 
-  /**
-   * The conservative estimated probability of the interruption event.
-   */
-  get probability(): number {
-    return this.probabilities ? estimateProbability(this.probabilities) : 0;
-  }
+  let maxOfMins = -Infinity;
 
-  static default(): InterruptionCacheEntry {
-    return new InterruptionCacheEntry({ createdAt: 0 });
+  for (let i = 0; i <= probabilities.length - minWindow; i++) {
+    const windowMin = Math.min(...probabilities.slice(i, i + minWindow));
+    maxOfMins = Math.max(maxOfMins, windowMin);
   }
+
+  return maxOfMins;
 }
diff --git a/agents/src/inference/interruption/ws_transport.ts b/agents/src/inference/interruption/ws_transport.ts
index 3d76ff205..8f6409f02 100644
--- a/agents/src/inference/interruption/ws_transport.ts
+++ b/agents/src/inference/interruption/ws_transport.ts
@@ -3,13 +3,10 @@ import { TransformStream } from 'stream/web';
 import WebSocket, { createWebSocketStream } from 'ws';
 import { log } from '../../log.js';
 import { createAccessToken } from '../utils.js';
+import { InterruptionCacheEntry } from './InterruptionCacheEntry.js';
 import { intervalForRetry } from './defaults.js';
-import {
-  type BoundedCache,
-  InterruptionCacheEntry,
-  type InterruptionEvent,
-  InterruptionEventType,
-} from './interruption.js';
+import { type InterruptionEvent, InterruptionEventType } from './types.js';
+import type { BoundedCache } from './utils.js';
 
 // WebSocket message types
 const MSG_SESSION_CREATE = 'session.create';
diff --git a/agents/src/inference/utils.ts b/agents/src/inference/utils.ts
index e898d4de1..b3b772ef6 100644
--- a/agents/src/inference/utils.ts
+++ b/agents/src/inference/utils.ts
@@ -64,18 +64,3 @@ export async function connectWs(
     socket.once('close', onClose);
   });
 }
-
-export function slidingWindowMinMax(probabilities: number[], minWindow: number): number {
-  if (probabilities.length < minWindow) {
-    return -Infinity;
-  }
-
-  let maxOfMins = -Infinity;
-
-  for (let i = 0; i <= probabilities.length - minWindow; i++) {
-    const windowMin = Math.min(...probabilities.slice(i, i + minWindow));
-    maxOfMins = Math.max(maxOfMins, windowMin);
-  }
-
-  return maxOfMins;
-}

From 776cb300b70698d7f43bebef2d2ded01ba34723b Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Thu, 22 Jan 2026 10:46:06 +0100
Subject: [PATCH 18/25] revert test changes

---
 .changeset/config.json         | 1 +
 .changeset/shiny-eels-throw.md | 5 -----
 2 files changed, 1 insertion(+), 5 deletions(-)
 delete mode 100644 .changeset/shiny-eels-throw.md

diff --git a/.changeset/config.json b/.changeset/config.json
index 6e26590ab..29b38eb85 100644
--- a/.changeset/config.json
+++ b/.changeset/config.json
@@ -8,6 +8,7 @@
   ],
   "commit": false,
   "ignore": ["livekit-agents-examples"],
+  "fixed": [["@livekit/agents", "@livekit/agents-plugin-*", "@livekit/agents-plugins-test"]],
   "access": "public",
   "baseBranch": "main",
   "updateInternalDependencies": "patch",
diff --git a/.changeset/shiny-eels-throw.md b/.changeset/shiny-eels-throw.md
deleted file mode 100644
index df3e21f67..000000000
--- a/.changeset/shiny-eels-throw.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-'@livekit/agents': patch
----
-
-barge in

From a580d7e020e4ca75a1e364865589bed30790c4aa Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Thu, 22 Jan 2026 10:48:34 +0100
Subject: [PATCH 19/25] remove broken example

---
 examples/src/adaptive_interruption.ts | 109 --------------------------
 1 file changed, 109 deletions(-)
 delete mode 100644 examples/src/adaptive_interruption.ts

diff --git a/examples/src/adaptive_interruption.ts b/examples/src/adaptive_interruption.ts
deleted file mode 100644
index 6e6700f58..000000000
--- a/examples/src/adaptive_interruption.ts
+++ /dev/null
@@ -1,109 +0,0 @@
-// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
-//
-// SPDX-License-Identifier: Apache-2.0
-
-/**
- * This example demonstrates how to use the AdaptiveInterruptionDetector
- * for detecting user interruptions during agent speech.
- *
- * The detector analyzes overlapping speech (when user speaks while agent is speaking)
- * and determines whether the user intends to interrupt or is just providing backchannel
- * feedback (like "uh-huh", "okay", etc).
- *
- * The interruption detection is integrated into AudioRecognition and works automatically
- * when the detector is provided along with VAD. It:
- * 1. Forwards audio frames to the detector when the agent is speaking
- * 2. Triggers overlap detection when VAD detects user speech during agent speech
- * 3. Emits interruption events that can be handled to stop/pause agent speech
- */
-import {
-  AdaptiveInterruptionDetector,
-  type JobContext,
-  type JobProcess,
-  WorkerOptions,
-  cli,
-  defineAgent,
-  log,
-  voice,
-} from '@livekit/agents';
-import * as silero from '@livekit/agents-plugin-silero';
-import { fileURLToPath } from 'node:url';
-
-export default defineAgent({
-  prewarm: async (proc: JobProcess) => {
-    proc.userData.vad = await silero.VAD.load();
-  },
-  entry: async (ctx: JobContext) => {
-    const logger = log();
-    const vad = ctx.proc.userData.vad as silero.VAD;
-
-    await ctx.connect();
-
-    // Create the adaptive interruption detector with custom options
-    const interruptionDetector = new AdaptiveInterruptionDetector({
-      // Threshold for interruption classification (0-1)
-      // Higher = less sensitive, lower = more sensitive
-      threshold: 0.65,
-      // Minimum duration of overlap speech to consider as potential interruption
-      minInterruptionDuration: 0.05,
-      // Maximum audio duration to analyze (including prefix)
-      maxAudioDuration: 3.0,
-      // Audio context to include before overlap started
-      audioPrefixDuration: 0.5,
-      // How often to run inference during overlap
-      detectionInterval: 0.1,
-    });
-
-    // Listen for interruption events on the detector (optional - for logging/metrics)
-    interruptionDetector.on('interruptionDetected', () => {
-      logger.info('Interruption detected via detector event');
-    });
-
-    interruptionDetector.on('overlapSpeechDetected', () => {
-      logger.info('Overlap speech ended without interruption (backchannel)');
-    });
-
-    // Create the agent
-    const agent = new voice.Agent({
-      instructions: `You are a helpful assistant that demonstrates interruption detection.
-        Speak naturally and respond to the user. When you are interrupted,
-        you will stop speaking and listen to the user.`,
-    });
-
-    // Create the session with interruption detection enabled
-    // The detector is passed to AgentSession which wires it through to AudioRecognition
-    const session = new voice.AgentSession({
-      llm: 'openai/gpt-4.1-mini',
-      stt: 'deepgram/nova-3',
-      tts: 'cartesia/sonic-2:c45bc5ec-dc68-4feb-8829-6e6b2748095d',
-      vad,
-      // Pass the interruption detector
-      interruptionDetector,
-      voiceOptions: {
-        allowInterruptions: false,
-      },
-    });
-
-    // Start the session
-    await session.start({
-      agent,
-      room: ctx.room,
-    });
-
-    // // Example: Dynamically adjust threshold based on context
-    // // This could be useful to adapt to different conversation styles
-    // setTimeout(() => {
-    //   logger.info('Adjusting interruption threshold for more sensitive detection');
-    //   interruptionDetector.updateOptions({
-    //     threshold: 0.5, // More sensitive to interruptions
-    //     minInterruptionDuration: 0.03, // Detect shorter interruptions
-    //   });
-    // }, 30000);
-
-    session.say(
-      'Hello! I can detect when you want to interrupt me versus when you are just saying things like uh-huh or okay. Try talking while I am speaking to see how it works!',
-    );
-  },
-});
-
-cli.runApp(new WorkerOptions({ agent: fileURLToPath(import.meta.url) }));

From dd0c98ac460ab4924453a0a85a99841521a91fe0 Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Thu, 22 Jan 2026 11:03:32 +0100
Subject: [PATCH 20/25] fix mutable transport options

---
 .../AdaptiveInterruptionDetector.ts           |  2 +-
 .../interruption/InterruptionStream.ts        | 32 +++++++++++++++----
 .../inference/interruption/http_transport.ts  |  8 ++++-
 3 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts b/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
index aa685dbe1..a473ec985 100644
--- a/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
+++ b/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
@@ -18,7 +18,7 @@ type InterruptionCallbacks = {
   error: (error: InterruptionDetectionError) => void;
 };
 
-export type AdaptiveInterruptionDetectorOptions = Partial<InterruptionOptions>;
+export type AdaptiveInterruptionDetectorOptions = Omit<Partial<InterruptionOptions>, 'useProxy'>;
 
 export class AdaptiveInterruptionDetector extends (EventEmitter as new () => TypedEventEmitter<InterruptionCallbacks>) {
   options: InterruptionOptions;
diff --git a/agents/src/inference/interruption/InterruptionStream.ts b/agents/src/inference/interruption/InterruptionStream.ts
index d2206b787..75d18729a 100644
--- a/agents/src/inference/interruption/InterruptionStream.ts
+++ b/agents/src/inference/interruption/InterruptionStream.ts
@@ -93,6 +93,12 @@ export class InterruptionStreamBase {
   // Store reconnect function for WebSocket transport
   private wsReconnect?: () => Promise<void>;
 
+  // Mutable transport options that can be updated via updateOptions()
+  private transportOptions: {
+    threshold: number;
+    minFrames: number;
+  };
+
   constructor(model: AdaptiveInterruptionDetector, apiOptions: Partial<ApiConnectOptions>) {
     this.inputStream = createStreamChannel<
       InterruptionSentinel | AudioFrame,
@@ -103,6 +109,12 @@ export class InterruptionStreamBase {
     this.options = { ...model.options };
     this.apiOptions = { ...apiConnectDefaults, ...apiOptions };
 
+    // Initialize mutable transport options
+    this.transportOptions = {
+      threshold: this.options.threshold,
+      minFrames: this.options.minFrames,
+    };
+
     this.eventStream = this.setupTransform();
   }
 
@@ -115,10 +127,12 @@ export class InterruptionStreamBase {
   }): Promise<void> {
     if (options.threshold !== undefined) {
       this.options.threshold = options.threshold;
+      this.transportOptions.threshold = options.threshold;
     }
     if (options.minInterruptionDurationInS !== undefined) {
       this.options.minInterruptionDurationInS = options.minInterruptionDurationInS;
       this.options.minFrames = Math.ceil(options.minInterruptionDurationInS * FRAMES_PER_SECOND);
+      this.transportOptions.minFrames = this.options.minFrames;
     }
     // Trigger WebSocket reconnection if using proxy (WebSocket transport)
     if (this.options.useProxy && this.wsReconnect) {
@@ -259,24 +273,30 @@ export class InterruptionStreamBase {
     );
 
     // Second transform: transport layer (HTTP or WebSocket based on useProxy)
-    const transportOptions = {
+    // Use a getter for threshold/minFrames so HTTP transport picks up updated values
+    const getTransportOptions = () => ({
       baseUrl: this.options.baseUrl,
       apiKey: this.options.apiKey,
       apiSecret: this.options.apiSecret,
       sampleRate: this.options.sampleRate,
-      threshold: this.options.threshold,
-      minFrames: this.options.minFrames,
+      threshold: this.transportOptions.threshold,
+      minFrames: this.transportOptions.minFrames,
       timeout: this.options.inferenceTimeout,
       maxRetries: this.apiOptions.maxRetries,
-    };
+    });
 
     let transport: TransformStream<Int16Array | InterruptionEvent, InterruptionEvent>;
     if (this.options.useProxy) {
-      const wsResult = createWsTransport(transportOptions, getState, setState, handleSpanUpdate);
+      const wsResult = createWsTransport(
+        getTransportOptions(),
+        getState,
+        setState,
+        handleSpanUpdate,
+      );
       transport = wsResult.transport;
       this.wsReconnect = wsResult.reconnect;
     } else {
-      transport = createHttpTransport(transportOptions, getState, setState, handleSpanUpdate);
+      transport = createHttpTransport(getTransportOptions, getState, setState, handleSpanUpdate);
     }
 
     // Pipeline: input -> audioTransformer -> transport -> eventStream
diff --git a/agents/src/inference/interruption/http_transport.ts b/agents/src/inference/interruption/http_transport.ts
index 96dfc03eb..41832878d 100644
--- a/agents/src/inference/interruption/http_transport.ts
+++ b/agents/src/inference/interruption/http_transport.ts
@@ -94,9 +94,12 @@ export interface HttpTransportState {
  *
  * This transport receives Int16Array audio slices and outputs InterruptionEvents.
  * Each audio slice triggers an HTTP POST request.
+ *
+ * @param getOptions - Getter function that returns current transport options.
+ *                     This allows options like threshold/minFrames to be updated dynamically.
  */
 export function createHttpTransport(
-  options: HttpTransportOptions,
+  getOptions: () => HttpTransportOptions,
   getState: () => HttpTransportState,
   setState: (partial: Partial<HttpTransportState>) => void,
   updateUserSpeakingSpan?: (entry: InterruptionCacheEntry) => void,
@@ -115,6 +118,9 @@ export function createHttpTransport(
         const state = getState();
         if (!state.overlapSpeechStartedAt) return;
 
+        // Get current options on each request to pick up any updates
+        const options = getOptions();
+
         try {
           const resp = await predictHTTP(
             chunk,

From 00c16ffde82cf35baa0e958023ab4b3373ce7986 Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Thu, 22 Jan 2026 11:05:42 +0100
Subject: [PATCH 21/25] async fixes

---
 .../interruption/InterruptionStream.ts        | 35 +++++++++----------
 .../inference/interruption/http_transport.ts  |  9 ++---
 2 files changed, 19 insertions(+), 25 deletions(-)

diff --git a/agents/src/inference/interruption/InterruptionStream.ts b/agents/src/inference/interruption/InterruptionStream.ts
index 75d18729a..7be72ef5f 100644
--- a/agents/src/inference/interruption/InterruptionStream.ts
+++ b/agents/src/inference/interruption/InterruptionStream.ts
@@ -95,8 +95,14 @@ export class InterruptionStreamBase {
 
   // Mutable transport options that can be updated via updateOptions()
   private transportOptions: {
+    baseUrl: string;
+    apiKey: string;
+    apiSecret: string;
+    sampleRate: number;
     threshold: number;
     minFrames: number;
+    timeout: number;
+    maxRetries: number;
   };
 
   constructor(model: AdaptiveInterruptionDetector, apiOptions: Partial<ApiConnectOptions>) {
@@ -111,8 +117,14 @@ export class InterruptionStreamBase {
 
     // Initialize mutable transport options
     this.transportOptions = {
+      baseUrl: this.options.baseUrl,
+      apiKey: this.options.apiKey,
+      apiSecret: this.options.apiSecret,
+      sampleRate: this.options.sampleRate,
       threshold: this.options.threshold,
       minFrames: this.options.minFrames,
+      timeout: this.options.inferenceTimeout,
+      maxRetries: this.apiOptions.maxRetries,
     };
 
     this.eventStream = this.setupTransform();
@@ -273,30 +285,15 @@ export class InterruptionStreamBase {
     );
 
     // Second transform: transport layer (HTTP or WebSocket based on useProxy)
-    // Use a getter for threshold/minFrames so HTTP transport picks up updated values
-    const getTransportOptions = () => ({
-      baseUrl: this.options.baseUrl,
-      apiKey: this.options.apiKey,
-      apiSecret: this.options.apiSecret,
-      sampleRate: this.options.sampleRate,
-      threshold: this.transportOptions.threshold,
-      minFrames: this.transportOptions.minFrames,
-      timeout: this.options.inferenceTimeout,
-      maxRetries: this.apiOptions.maxRetries,
-    });
+    const transportOptions = this.transportOptions;
 
     let transport: TransformStream<Int16Array | InterruptionEvent, InterruptionEvent>;
     if (this.options.useProxy) {
-      const wsResult = createWsTransport(
-        getTransportOptions(),
-        getState,
-        setState,
-        handleSpanUpdate,
-      );
+      const wsResult = createWsTransport(transportOptions, getState, setState, handleSpanUpdate);
       transport = wsResult.transport;
       this.wsReconnect = wsResult.reconnect;
     } else {
-      transport = createHttpTransport(getTransportOptions, getState, setState, handleSpanUpdate);
+      transport = createHttpTransport(transportOptions, getState, setState, handleSpanUpdate);
     }
 
     // Pipeline: input -> audioTransformer -> transport -> eventStream
@@ -346,7 +343,7 @@ export class InterruptionStreamBase {
 
   async flush(): Promise<void> {
     this.ensureStreamsNotEnded();
-    this.inputStream.write(InterruptionStreamSentinel.flush());
+    await this.inputStream.write(InterruptionStreamSentinel.flush());
   }
 
   async endInput(): Promise<void> {
diff --git a/agents/src/inference/interruption/http_transport.ts b/agents/src/inference/interruption/http_transport.ts
index 41832878d..c26107689 100644
--- a/agents/src/inference/interruption/http_transport.ts
+++ b/agents/src/inference/interruption/http_transport.ts
@@ -95,11 +95,11 @@ export interface HttpTransportState {
  * This transport receives Int16Array audio slices and outputs InterruptionEvents.
  * Each audio slice triggers an HTTP POST request.
  *
- * @param getOptions - Getter function that returns current transport options.
- *                     This allows options like threshold/minFrames to be updated dynamically.
+ * @param options - Transport options object. This is read on each request, so mutations
+ *                  to threshold/minFrames will be picked up dynamically.
  */
 export function createHttpTransport(
-  getOptions: () => HttpTransportOptions,
+  options: HttpTransportOptions,
   getState: () => HttpTransportState,
   setState: (partial: Partial<HttpTransportState>) => void,
   updateUserSpeakingSpan?: (entry: InterruptionCacheEntry) => void,
@@ -118,9 +118,6 @@ export function createHttpTransport(
         const state = getState();
         if (!state.overlapSpeechStartedAt) return;
 
-        // Get current options on each request to pick up any updates
-        const options = getOptions();
-
         try {
           const resp = await predictHTTP(
             chunk,

From 7dd5bbed15e93e24298d812c04c2fd33928bb5e6 Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Thu, 22 Jan 2026 11:08:12 +0100
Subject: [PATCH 22/25] terminate ws on timeout

---
 agents/src/inference/interruption/ws_transport.ts | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/agents/src/inference/interruption/ws_transport.ts b/agents/src/inference/interruption/ws_transport.ts
index 8f6409f02..93d7c568b 100644
--- a/agents/src/inference/interruption/ws_transport.ts
+++ b/agents/src/inference/interruption/ws_transport.ts
@@ -75,16 +75,17 @@ async function connectWebSocket(options: WsTransportOptions): Promise<{
   const { readable, writable } = webSocketToStream(ws);
 
   await new Promise<void>((resolve, reject) => {
-    const timeout = setTimeout(
-      () => reject(new Error('WebSocket connection timeout')),
-      options.timeout,
-    );
+    const timeout = setTimeout(() => {
+      ws.terminate();
+      reject(new Error('WebSocket connection timeout'));
+    }, options.timeout);
     ws.once('open', () => {
       clearTimeout(timeout);
       resolve();
     });
     ws.once('error', (err) => {
       clearTimeout(timeout);
+      ws.terminate();
       reject(err);
     });
   });

From 543742db029af52f135b891b14de78e80efa3fcd Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Thu, 22 Jan 2026 11:11:51 +0100
Subject: [PATCH 23/25] more fixes

---
 agents/src/inference/interruption/ws_transport.ts | 8 +-------
 agents/src/stream/stream_channel.ts               | 1 +
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/agents/src/inference/interruption/ws_transport.ts b/agents/src/inference/interruption/ws_transport.ts
index 93d7c568b..663a9b08e 100644
--- a/agents/src/inference/interruption/ws_transport.ts
+++ b/agents/src/inference/interruption/ws_transport.ts
@@ -183,13 +183,7 @@ export function createWsTransport(
               const message: WsMessage = JSON.parse(line);
               handleMessage(message);
             } catch {
-              // Try parsing the whole buffer as a single message
-              try {
-                const message: WsMessage = JSON.parse(line);
-                handleMessage(message);
-              } catch {
-                logger.warn({ line }, 'Failed to parse WebSocket message');
-              }
+              logger.warn({ line }, 'Failed to parse WebSocket message');
             }
           }
         }
diff --git a/agents/src/stream/stream_channel.ts b/agents/src/stream/stream_channel.ts
index 546cf93ff..75fcfd6c7 100644
--- a/agents/src/stream/stream_channel.ts
+++ b/agents/src/stream/stream_channel.ts
@@ -21,6 +21,7 @@ export function createStreamChannel<T, E extends Error = Error>(): StreamChannel
     write: (chunk: T) => writer.write(chunk),
     stream: () => transform.readable,
     abort: (error: E) => {
+      isClosed = true;
       return writer.abort(error);
     },
     close: async () => {

From 9f6132628153a05fdb9c395916e38c8f3625e9b3 Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Thu, 22 Jan 2026 11:19:52 +0100
Subject: [PATCH 24/25] add license headers

---
 .../src/inference/interruption/AdaptiveInterruptionDetector.ts | 3 +++
 agents/src/inference/interruption/InterruptionCacheEntry.ts    | 3 +++
 agents/src/inference/interruption/InterruptionStream.ts        | 3 +++
 agents/src/inference/interruption/defaults.ts                  | 3 +++
 agents/src/inference/interruption/errors.ts                    | 3 +++
 agents/src/inference/interruption/http_transport.ts            | 3 +++
 agents/src/inference/interruption/types.ts                     | 3 +++
 agents/src/inference/interruption/utils.test.ts                | 2 +-
 agents/src/inference/interruption/utils.ts                     | 3 +++
 9 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts b/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
index a473ec985..eb27a2482 100644
--- a/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
+++ b/agents/src/inference/interruption/AdaptiveInterruptionDetector.ts
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
 import type { TypedEventEmitter } from '@livekit/typed-emitter';
 import EventEmitter from 'events';
 import { log } from '../../log.js';
diff --git a/agents/src/inference/interruption/InterruptionCacheEntry.ts b/agents/src/inference/interruption/InterruptionCacheEntry.ts
index 4f2f0a20b..e6da964d8 100644
--- a/agents/src/inference/interruption/InterruptionCacheEntry.ts
+++ b/agents/src/inference/interruption/InterruptionCacheEntry.ts
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
 import { estimateProbability } from './utils.js';
 
 /**
diff --git a/agents/src/inference/interruption/InterruptionStream.ts b/agents/src/inference/interruption/InterruptionStream.ts
index 7be72ef5f..95565d446 100644
--- a/agents/src/inference/interruption/InterruptionStream.ts
+++ b/agents/src/inference/interruption/InterruptionStream.ts
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
 import { AudioFrame, AudioResampler } from '@livekit/rtc-node';
 import type { Span } from '@opentelemetry/api';
 import { type ReadableStream, TransformStream } from 'stream/web';
diff --git a/agents/src/inference/interruption/defaults.ts b/agents/src/inference/interruption/defaults.ts
index 9dccf40c2..cd7988f6a 100644
--- a/agents/src/inference/interruption/defaults.ts
+++ b/agents/src/inference/interruption/defaults.ts
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
 import type { ApiConnectOptions } from './InterruptionStream.js';
 import type { InterruptionOptions } from './types.js';
 
diff --git a/agents/src/inference/interruption/errors.ts b/agents/src/inference/interruption/errors.ts
index ba95a9674..a346b7d28 100644
--- a/agents/src/inference/interruption/errors.ts
+++ b/agents/src/inference/interruption/errors.ts
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
 /**
  * Error thrown during interruption detection.
  */
diff --git a/agents/src/inference/interruption/http_transport.ts b/agents/src/inference/interruption/http_transport.ts
index c26107689..25f8b7c25 100644
--- a/agents/src/inference/interruption/http_transport.ts
+++ b/agents/src/inference/interruption/http_transport.ts
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
 import { ofetch } from 'ofetch';
 import { TransformStream } from 'stream/web';
 import { log } from '../../log.js';
diff --git a/agents/src/inference/interruption/types.ts b/agents/src/inference/interruption/types.ts
index cf13d4d2d..f6f083f38 100644
--- a/agents/src/inference/interruption/types.ts
+++ b/agents/src/inference/interruption/types.ts
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
 import type { Span } from '@opentelemetry/api';
 
 /**
diff --git a/agents/src/inference/interruption/utils.test.ts b/agents/src/inference/interruption/utils.test.ts
index bcd2fe9a8..762bc5ea3 100644
--- a/agents/src/inference/interruption/utils.test.ts
+++ b/agents/src/inference/interruption/utils.test.ts
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
 //
 // SPDX-License-Identifier: Apache-2.0
 import { describe, expect, it } from 'vitest';
diff --git a/agents/src/inference/interruption/utils.ts b/agents/src/inference/interruption/utils.ts
index 161e08bb1..0c5a4bf40 100644
--- a/agents/src/inference/interruption/utils.ts
+++ b/agents/src/inference/interruption/utils.ts
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
 import { FRAME_DURATION_IN_S, MIN_INTERRUPTION_DURATION_IN_S } from './defaults.js';
 
 /**

From d32c2029593f7a7467761aaddafe43c4c8e81654 Mon Sep 17 00:00:00 2001
From: lukasIO <mail@lukasseiler.de>
Date: Thu, 22 Jan 2026 11:40:34 +0100
Subject: [PATCH 25/25] emit events on detector

---
 .../interruption/InterruptionStream.ts          | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/agents/src/inference/interruption/InterruptionStream.ts b/agents/src/inference/interruption/InterruptionStream.ts
index 95565d446..bdd9b178c 100644
--- a/agents/src/inference/interruption/InterruptionStream.ts
+++ b/agents/src/inference/interruption/InterruptionStream.ts
@@ -299,8 +299,23 @@ export class InterruptionStreamBase {
       transport = createHttpTransport(transportOptions, getState, setState, handleSpanUpdate);
     }
 
+    const eventEmitter = new TransformStream<InterruptionEvent, InterruptionEvent>({
+      transform: (chunk, controller) => {
+        if (chunk.type === InterruptionEventType.INTERRUPTION) {
+          this.model.emit('userInterruptionDetected', chunk);
+        } else if (chunk.type === InterruptionEventType.OVERLAP_SPEECH_ENDED) {
+          this.model.emit('overlapSpeechEnded', chunk);
+        }
+        controller.enqueue(chunk);
+      },
+    });
+
     // Pipeline: input -> audioTransformer -> transport -> eventStream
-    return this.inputStream.stream().pipeThrough(audioTransformer).pipeThrough(transport);
+    return this.inputStream
+      .stream()
+      .pipeThrough(audioTransformer)
+      .pipeThrough(transport)
+      .pipeThrough(eventEmitter);
   }
 
   private ensureInputNotEnded() {