Enable Gemini Realtime Model to Produce Error Log #1016

chatgpt-codex-connector · 2026-02-03T08:44:08Z

Re-check overlap state after HTTP await

Because state is captured before the await predictHTTP(...), an overlap that ends while the request is in flight will still have state.overlapSpeechStarted === true here, which can emit an interruption event after overlap speech has already ended. This shows up when overlap ends quickly or the HTTP call is slow, producing false-positive interruptions. Consider re-reading getState() after the await (or checking a monotonic overlap token) before emitting.

Useful? React with 👍 / 👎.

-Original file line number
+Diff line change
@@ Expand Up / @@ -8,13 +8,7 @@ @@
       ],
       "commit": false,
       "ignore": ["livekit-agents-examples"],
-      "fixed": [
-        [
-          "@livekit/agents",
-          "@livekit/agents-plugin-*",
-          "@livekit/agents-plugins-test"
-        ]
-      ],
+      "fixed": [["@livekit/agents", "@livekit/agents-plugin-*", "@livekit/agents-plugins-test"]],
       "access": "public",
       "baseBranch": "main",
       "updateInternalDependencies": "patch",
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -46,11 +46,11 @@ jobs: @@
           - name: Test agents
             if: steps.filter.outputs.agents-or-tests == 'true' || github.event_name == 'push'
             run: pnpm test agents
-          - name: Test examples
-            if: (steps.filter.outputs.examples == 'true' || github.event_name == 'push') && secrets.OPENAI_API_KEY != ''
-            env:
-              OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-            run: pnpm test:examples
+          # - name: Test examples
+          #   if: (steps.filter.outputs.examples == 'true' || github.event_name == 'push')
+          #   env:
+          #     OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          #   run: pnpm test:examples
           # TODO (AJS-83) Re-enable once plugins are refactored with abort controllers
           # - name: Test all plugins
           #   if: steps.filter.outputs.agents-or-tests == 'true' || github.event_name != 'pull_request'
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -69,6 +69,7 @@ @@
         "heap-js": "^2.6.0",
         "json-schema": "^0.4.0",
         "livekit-server-sdk": "^2.14.1",
+        "ofetch": "^1.5.1",
         "openai": "^6.8.1",
         "pidusage": "^4.0.1",
         "pino": "^8.19.0",
@@ Expand Down @@

-Original file line number
+Diff line change
@@ -0,0 +1,82 @@
+    // SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+    //
+    // SPDX-License-Identifier: Apache-2.0
+    import type { ApiConnectOptions } from './interruption_stream.js';
+    import type { InterruptionOptions } from './types.js';
+    export const MIN_INTERRUPTION_DURATION_IN_S = 0.025 * 2; // 25ms per frame, 2 consecutive frames
+    export const THRESHOLD = 0.65;
+    export const MAX_AUDIO_DURATION_IN_S = 3.0;
+    export const AUDIO_PREFIX_DURATION_IN_S = 0.5;
+    export const DETECTION_INTERVAL_IN_S = 0.1;
+    export const REMOTE_INFERENCE_TIMEOUT_IN_S = 1.0;
+    export const SAMPLE_RATE = 16000;
+    export const FRAMES_PER_SECOND = 40;
+    export const FRAME_DURATION_IN_S = 0.025; // 25ms per frame
+    /** Default production inference URL */
+    export const DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';
+    /** Staging inference URL */
+    export const STAGING_BASE_URL = 'https://agent-gateway-staging.livekit.cloud/v1';
+    /**
+     * Get the default inference URL based on the environment.
+     *
+     * Priority:
+     * 1. LIVEKIT_INFERENCE_URL if set
+     * 2. If LIVEKIT_URL contains '.staging.livekit.cloud', use staging gateway
+     * 3. Otherwise, use production gateway
+     */
+    export function getDefaultInferenceUrl(): string {
+      // Priority 1: LIVEKIT_INFERENCE_URL
+      const inferenceUrl = process.env.LIVEKIT_INFERENCE_URL;
+      if (inferenceUrl) {
+        return inferenceUrl;
+      }
+      // Priority 2: Check LIVEKIT_URL for staging (exact match to Python)
+      const livekitUrl = process.env.LIVEKIT_URL || '';
+      if (livekitUrl.includes('.staging.livekit.cloud')) {
+        return STAGING_BASE_URL;
+      }
+      // Priority 3: Default to production
+      return DEFAULT_BASE_URL;
+    }
+    export const apiConnectDefaults: ApiConnectOptions = {
+      maxRetries: 3,
+      retryInterval: 2_000,
+      timeout: 10_000,
+    } as const;
+    /**
+     * Calculate the retry interval using exponential backoff with jitter.
+     * Matches the Python implementation's _interval_for_retry behavior.
+     */
+    export function intervalForRetry(
+      attempt: number,
+      baseInterval: number = apiConnectDefaults.retryInterval,
+    ): number {
+      // Exponential backoff: baseInterval * 2^attempt with some jitter
+      const exponentialDelay = baseInterval * Math.pow(2, attempt);
+      // Add jitter (0-25% of the delay)
+      const jitter = exponentialDelay * Math.random() * 0.25;
+      return exponentialDelay + jitter;
+    }
+    // baseUrl and useProxy are resolved dynamically in the constructor
+    // to respect LIVEKIT_REMOTE_EOT_URL environment variable
+    export const interruptionOptionDefaults: Omit<InterruptionOptions, 'baseUrl' | 'useProxy'> = {
+      sampleRate: SAMPLE_RATE,
+      threshold: THRESHOLD,
+      minFrames: Math.ceil(MIN_INTERRUPTION_DURATION_IN_S * FRAMES_PER_SECOND),
+      maxAudioDurationInS: MAX_AUDIO_DURATION_IN_S,
+      audioPrefixDurationInS: AUDIO_PREFIX_DURATION_IN_S,
+      detectionIntervalInS: DETECTION_INTERVAL_IN_S,
+      inferenceTimeout: REMOTE_INFERENCE_TIMEOUT_IN_S * 1_000,
+      apiKey: process.env.LIVEKIT_API_KEY || '',
+      apiSecret: process.env.LIVEKIT_API_SECRET || '',
+      minInterruptionDurationInS: MIN_INTERRUPTION_DURATION_IN_S,
+    } as const;

-Original file line number
+Diff line change
@@ -0,0 +1,25 @@
+    // SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+    //
+    // SPDX-License-Identifier: Apache-2.0
+    /**
+     * Error thrown during interruption detection.
+     */
+    export class InterruptionDetectionError extends Error {
+      readonly type = 'InterruptionDetectionError';
+      readonly timestamp: number;
+      readonly label: string;
+      readonly recoverable: boolean;
+      constructor(message: string, timestamp: number, label: string, recoverable: boolean) {
+        super(message);
+        this.name = 'InterruptionDetectionError';
+        this.timestamp = timestamp;
+        this.label = label;
+        this.recoverable = recoverable;
+      }
+      toString(): string {
+        return `${this.name}: ${this.message} (label=${this.label}, timestamp=${this.timestamp}, recoverable=${this.recoverable})`;
+      }
+    }

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Enable Gemini Realtime Model to Produce Error Log #1016

Uh oh!

Diff view

Diff view

There are no files selected for viewing

chatgpt-codex-connector bot Feb 3, 2026

Uh oh!

Uh oh!

-Original file line number
+Diff line change
@@ -0,0 +1,183 @@
+    // SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+    //
+    // SPDX-License-Identifier: Apache-2.0
+    import { ofetch } from 'ofetch';
+    import { TransformStream } from 'stream/web';
+    import { z } from 'zod';
+    import { log } from '../../log.js';
+    import { createAccessToken } from '../utils.js';
+    import { intervalForRetry } from './defaults.js';
+    import { InterruptionCacheEntry } from './interruption_cache_entry.js';
+    import { type InterruptionEvent, InterruptionEventType } from './types.js';
+    import type { BoundedCache } from './utils.js';
+    export interface PostOptions {
+      baseUrl: string;
+      token: string;
+      signal?: AbortSignal;
+      timeout?: number;
+      maxRetries?: number;
+    }
+    export interface PredictOptions {
+      threshold: number;
+      minFrames: number;
+    }
+    export const predictEndpointResponseSchema = z.object({
+      created_at: z.number(),
+      is_bargein: z.boolean(),
+      probabilities: z.array(z.number()),
+    });
+    export type PredictEndpointResponse = z.infer<typeof predictEndpointResponseSchema>;
+    export interface PredictResponse {
+      createdAt: number;
+      isBargein: boolean;
+      probabilities: number[];
+      predictionDurationInS: number;
+    }
+    export async function predictHTTP(
+      data: Int16Array,
+      predictOptions: PredictOptions,
+      options: PostOptions,
+    ): Promise<PredictResponse> {
+      const createdAt = performance.now();
+      const url = new URL(`/bargein`, options.baseUrl);
+      url.searchParams.append('threshold', predictOptions.threshold.toString());
+      url.searchParams.append('min_frames', predictOptions.minFrames.toFixed());
+      url.searchParams.append('created_at', createdAt.toFixed());
+      let retryCount = 0;
+      const response = await ofetch(url.toString(), {
+        retry: options.maxRetries ?? 3,
+        retryDelay: () => {
+          const delay = intervalForRetry(retryCount);
+          retryCount++;
+          return delay;
+        },
+        headers: {
+          'Content-Type': 'application/octet-stream',
+          Authorization: `Bearer ${options.token}`,
+        },
+        signal: options.signal,
+        timeout: options.timeout,
+        method: 'POST',
+        body: data,
+      });
+      const { created_at, is_bargein, probabilities } = predictEndpointResponseSchema.parse(response);
+      return {
+        createdAt: created_at,
+        isBargein: is_bargein,
+        probabilities,
+        predictionDurationInS: (performance.now() - createdAt) / 1000,
+      };
+    }
+    export interface HttpTransportOptions {
+      baseUrl: string;
+      apiKey: string;
+      apiSecret: string;
+      threshold: number;
+      minFrames: number;
+      timeout: number;
+      maxRetries?: number;
+    }
+    export interface HttpTransportState {
+      overlapSpeechStarted: boolean;
+      overlapSpeechStartedAt: number | undefined;
+      cache: BoundedCache<number, InterruptionCacheEntry>;
+    }
+    /**
+     * Creates an HTTP transport TransformStream for interruption detection.
+     *
+     * This transport receives Int16Array audio slices and outputs InterruptionEvents.
+     * Each audio slice triggers an HTTP POST request.
+     *
+     * @param options - Transport options object. This is read on each request, so mutations
+     *                  to threshold/minFrames will be picked up dynamically.
+     */
+    export function createHttpTransport(
+      options: HttpTransportOptions,
+      getState: () => HttpTransportState,
+      setState: (partial: Partial<HttpTransportState>) => void,
+      updateUserSpeakingSpan?: (entry: InterruptionCacheEntry) => void,
+    ): TransformStream<Int16Array | InterruptionEvent, InterruptionEvent> {
+      const logger = log();
+      return new TransformStream<Int16Array | InterruptionEvent, InterruptionEvent>(
+        {
+          async transform(chunk, controller) {
+            // Pass through InterruptionEvents unchanged
+            if (!(chunk instanceof Int16Array)) {
+              controller.enqueue(chunk);
+              return;
+            }
+            const state = getState();
+            if (!state.overlapSpeechStartedAt) return;
+            try {
+              const resp = await predictHTTP(
+                chunk,
+                { threshold: options.threshold, minFrames: options.minFrames },
+                {
+                  baseUrl: options.baseUrl,
+                  timeout: options.timeout,
+                  maxRetries: options.maxRetries,
+                  token: await createAccessToken(options.apiKey, options.apiSecret),
+                },
+              );
+              const { createdAt, isBargein, probabilities, predictionDurationInS } = resp;
+              const entry = new InterruptionCacheEntry({
+                createdAt,
+                probabilities,
+                isInterruption: isBargein,
+                speechInput: chunk,
+                totalDurationInS: (performance.now() - createdAt) / 1000,
+                detectionDelayInS: (Date.now() - state.overlapSpeechStartedAt) / 1000,
+                predictionDurationInS,
+              });
+              state.cache.set(createdAt, entry);
+              if (state.overlapSpeechStarted && entry.isInterruption) {
+                if (updateUserSpeakingSpan) {
+                  updateUserSpeakingSpan(entry);
+                }
+                const event: InterruptionEvent = {
+                  type: InterruptionEventType.INTERRUPTION,
+                  timestamp: Date.now(),
+                  overlapSpeechStartedAt: state.overlapSpeechStartedAt,
+                  isInterruption: entry.isInterruption,
+                  speechInput: entry.speechInput,
+                  probabilities: entry.probabilities,
+                  totalDurationInS: entry.totalDurationInS,
+                  predictionDurationInS: entry.predictionDurationInS,
+                  detectionDelayInS: entry.detectionDelayInS,
+                  probability: entry.probability,
+                };
+                logger.debug(
+                  {
+                    detectionDelayInS: entry.detectionDelayInS,
+                    totalDurationInS: entry.totalDurationInS,
+                  },
+                  'interruption detected',
+                );
+                setState({ overlapSpeechStarted: false });
+                controller.enqueue(event);
+              }
+            } catch (err) {
+              logger.error({ err }, 'Failed to send audio data over HTTP');
+            }
+          },
+        },
+        { highWaterMark: 2 },
+        { highWaterMark: 2 },
+      );
+    }

-Original file line number
+Diff line change
@@ -0,0 +1,47 @@
+    // SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+    //
+    // SPDX-License-Identifier: Apache-2.0
+    import { estimateProbability } from './utils.js';
+    /**
+     * Typed cache entry for interruption inference results.
+     * Mutable to support setOrUpdate pattern from Python's _BoundedCache.
+     */
+    export class InterruptionCacheEntry {
+      createdAt: number;
+      totalDurationInS: number;
+      predictionDurationInS: number;
+      detectionDelayInS: number;
+      speechInput?: Int16Array;
+      probabilities?: number[];
+      isInterruption?: boolean;
+      constructor(params: {
+        createdAt: number;
+        speechInput?: Int16Array;
+        totalDurationInS?: number;
+        predictionDurationInS?: number;
+        detectionDelayInS?: number;
+        probabilities?: number[];
+        isInterruption?: boolean;
+      }) {
+        this.createdAt = params.createdAt;
+        this.totalDurationInS = params.totalDurationInS ?? 0;
+        this.predictionDurationInS = params.predictionDurationInS ?? 0;
+        this.detectionDelayInS = params.detectionDelayInS ?? 0;
+        this.speechInput = params.speechInput;
+        this.probabilities = params.probabilities;
+        this.isInterruption = params.isInterruption;
+      }
+      /**
+       * The conservative estimated probability of the interruption event.
+       */
+      get probability(): number {
+        return this.probabilities ? estimateProbability(this.probabilities) : 0;
+      }
+      static default(): InterruptionCacheEntry {
+        return new InterruptionCacheEntry({ createdAt: 0 });
+      }
+    }

Enable Gemini Realtime Model to Produce Error Log #1016

Uh oh!

Enable Gemini Realtime Model to Produce Error Log #1016

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

chatgpt-codex-connector bot Feb 3, 2026

Choose a reason for hiding this comment

Uh oh!

Uh oh!