braintrustdata · Abhijeet Prasad (AbhiPrasad) · Mar 23, 2026 · Mar 23, 2026
diff --git a/.agents/skills/e2e-tests/SKILL.md b/.agents/skills/e2e-tests/SKILL.md
@@ -14,6 +14,7 @@ pnpm run build                        # Build SDK (required if source changed)
 cd e2e && npx vitest run scenarios/<name>/scenario.test.ts          # Run one scenario
 cd e2e && npx vitest run --reporter=verbose scenarios/<name>/scenario.test.ts  # Verbose
 cd e2e && npx vitest run --update scenarios/<name>/scenario.test.ts # Update snapshots
+cd e2e && npx vitest run -t "<exact test name>"                     # Isolate one test when file args over-match
 pnpm run test:e2e                     # Run all (from repo root)
 pnpm run test:e2e:hermetic            # Run hermetic-only e2e tests
 pnpm run test:e2e:external            # Run external-api-only e2e tests
@@ -67,7 +68,7 @@ test(
 );
 ```
 
-Key harness methods: `runScenarioDir()`, `runNodeScenarioDir()`, `testRunEvents()`, `events()`, `payloads()`, `requestsAfter(cursor)`, `testRunId`.
+Key harness methods: `runScenarioDir()`, `runNodeScenarioDir()`, `runDenoScenarioDir()`, `testRunEvents()`, `events()`, `payloads()`, `requestsAfter(cursor)`, `testRunId`.
 
 For wrapper scenarios use `events()` (not `testRunEvents()`) and scope payloads via `payloadRowsForRootSpan()`.
 
@@ -123,10 +124,18 @@ import { runMyImpl } from "./scenario.impl";
 
 Test loops over versions with `for (const s of scenarios) { test(...) }`. See `wrap-ai-sdk-generation-traces` or `ai-sdk-otel-export`.
 
-### Runner-wrapper (vitest/node:test)
+### Runner-wrapper (vitest/node:test/deno)
 
 When the wrapper runs inside a nested test runner, `scenario.ts` spawns a second process via `runNodeSubprocess`. The nested runner file must NOT be named `*.test.ts`. Tag all data with `metadata.testRunId` and use `payloadRowsForTestRunId()`. See `wrap-vitest-suite-traces`.
 
+Use:
+
+- `runNodeScenarioDir()` for plain Node nested runners
+- `runDenoScenarioDir()` for Deno nested runners
+- `runner.case.ts` for nested Deno entrypoints
+
+Deno scenarios can have intentionally different runtime contracts from Node. Assert the actual Deno/browser behavior rather than copying Node parent-child expectations blindly. See `e2e/scenarios/deno-browser/`.
+
 ### OTEL export
 
 Set up `BraintrustExporter`/`BraintrustSpanProcessor` pointed at the mock server, register globally, then assert on `/otel/v1/traces` requests via `requestsAfter()` + `extractOtelSpans()`. See `ai-sdk-otel-export` or `otel-span-processor-export`.
@@ -145,6 +154,8 @@ Set up `BraintrustExporter`/`BraintrustSpanProcessor` pointed at the mock server
 
 Scenarios run from `e2e/.bt-tmp/run-<id>/scenarios/<name>/`. Node walks up to `e2e/node_modules/` for workspace deps (`braintrust`, `@braintrust/otel`, etc.). Scenario-local deps are in the scenario's own `node_modules/`. Helper imports (`../../helpers/...`) work because `prepareScenarioDir` copies `e2e/helpers/` into the temp dir.
 
+Deno nested runners use `runDenoScenarioDir()`, which invokes `deno test --no-check` with the harness env vars and the prepared temp scenario path.
+
 ## Debugging
 
 - **Subprocess error**: Read the `STDERR` section in the error message.

diff --git a/e2e/README.md b/e2e/README.md
@@ -60,6 +60,7 @@ The main utilities you'll use in test files:
 - `resolveScenarioDir(import.meta.url)` - Resolves the folder that contains the current test.
 - `installScenarioDependencies({ scenarioDir })` - Installs optional scenario-local dependencies.
 - `runScenarioDir({ scenarioDir, entry?, timeoutMs? })` - Runs a TypeScript scenario with `tsx`.
+- `runDenoScenarioDir({ scenarioDir, entry?, args?, timeoutMs? })` - Runs nested Deno scenarios with `deno test`.
 - `runNodeScenarioDir({ scenarioDir, entry?, nodeArgs?, timeoutMs? })` - Runs plain Node scenarios, used for `--import braintrust/hook.mjs`.
 - `testRunEvents()` - Returns parsed events tagged with the current test run id.
 - `events()`, `payloads()`, `requestCursor()`, `requestsAfter()` - Lower-level access for ingestion payloads and HTTP request flow assertions.
@@ -108,6 +109,8 @@ Some wrappers execute inside a nested test runner rather than a single SDK call.
 - Tag every traced test/eval with `metadata.testRunId` so the outer assertions can isolate rows across multiple trace roots with `payloadRowsForTestRunId(...)`.
 - If a nested runner needs its own test discovery rules, keep that config local to the scenario folder so the shared e2e config stays unchanged.
 
+The Deno scenarios follow the same pattern, except the harness invokes `deno test` via `runDenoScenarioDir(...)` and the nested runner entrypoint lives in `runner.case.ts`.
+
 ### Environment variables
 
 `externalApi` scenarios require provider credentials in addition to the mock Braintrust server config supplied by the harness:

diff --git a/e2e/helpers/deno-test-helpers.ts b/e2e/helpers/deno-test-helpers.ts
@@ -0,0 +1,192 @@
+type BraintrustModule = Record<string, unknown>;
+
+function assert(
+  condition: unknown,
+  message: string,
+): asserts condition is true {
+  if (!condition) {
+    throw new Error(message);
+  }
+}
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+
+export function getTestRunId(): string {
+  const testRunId = Deno.env.get("BRAINTRUST_E2E_RUN_ID");
+  assert(testRunId, "BRAINTRUST_E2E_RUN_ID must be set");
+  return testRunId;
+}
+
+export function scopedName(base: string): string {
+  return `${base}-${getTestRunId()
+    .toLowerCase()
+    .replace(/[^a-z0-9-]/g, "-")}`;
+}
+
+export function expectNamedExports(
+  module: BraintrustModule,
+  exportNames: string[],
+): void {
+  for (const exportName of exportNames) {
+    assert(module[exportName], `Expected export "${exportName}" to exist`);
+  }
+}
+
+export function expectBuildType(
+  module: BraintrustModule,
+  expectedBuildType: string,
+): void {
+  const testingOnly = module._exportsForTestingOnly;
+  assert(isRecord(testingOnly), "_exportsForTestingOnly must exist");
+
+  const isomorph = testingOnly.isomorph;
+  assert(isRecord(isomorph), "_exportsForTestingOnly.isomorph must exist");
+  assert(
+    isomorph.buildType === expectedBuildType,
+    `Expected build type "${expectedBuildType}" but got "${String(isomorph.buildType)}"`,
+  );
+}
+
+export function expectMustacheTemplate(module: BraintrustModule): void {
+  const Prompt = module.Prompt as
+    | (new (...args: unknown[]) => {
+        build: (
+          args: Record<string, unknown>,
+          options: { templateFormat: string },
+        ) => { messages?: Array<{ content?: string }> };
+      })
+    | undefined;
+
+  assert(Prompt, "Prompt export must exist");
+
+  const prompt = new Prompt(
+    {
+      name: "mustache-test",
+      slug: "mustache-test",
+      prompt_data: {
+        prompt: {
+          type: "chat",
+          messages: [{ role: "user", content: "Hello, {{name}}!" }],
+        },
+        options: { model: "gpt-4" },
+      },
+    },
+    {},
+    false,
+  );
+
+  const result = prompt.build(
+    { name: "World" },
+    { templateFormat: "mustache" },
+  );
+  assert(
+    result.messages?.[0]?.content === "Hello, World!",
+    "Mustache template rendering failed",
+  );
+}
+
+export function expectNunjucksTemplateUnavailable(
+  module: BraintrustModule,
+): void {
+  const Prompt = module.Prompt as
+    | (new (...args: unknown[]) => {
+        build: (
+          args: Record<string, unknown>,
+          options: { templateFormat: string },
+        ) => unknown;
+      })
+    | undefined;
+
+  assert(Prompt, "Prompt export must exist");
+
+  const prompt = new Prompt(
+    {
+      name: "nunjucks-test",
+      slug: "nunjucks-test",
+      prompt_data: {
+        prompt: {
+          type: "chat",
+          messages: [
+            {
+              role: "user",
+              content:
+                "Items: {% for item in items %}{{ item.name }}{% if not loop.last %}, {% endif %}{% endfor %}",
+            },
+          ],
+        },
+        options: { model: "gpt-4" },
+      },
+    },
+    {},
+    false,
+  );
+
+  let errorMessage: string | undefined;
+  try {
+    prompt.build(
+      {
+        items: [{ name: "apple" }, { name: "banana" }, { name: "cherry" }],
+      },
+      { templateFormat: "nunjucks" },
+    );
+  } catch (error) {
+    errorMessage = error instanceof Error ? error.message : String(error);
+  }
+
+  assert(
+    errorMessage?.includes("requires @braintrust/template-nunjucks"),
+    `Expected missing nunjucks package error, got: ${errorMessage ?? "no error"}`,
+  );
+}
+
+export async function expectEvalWorks(module: BraintrustModule): Promise<void> {
+  const Eval = module.Eval as
+    | ((
+        name: string,
+        definition: Record<string, unknown>,
+        options: Record<string, unknown>,
+      ) => Promise<Record<string, unknown>>)
+    | undefined;
+
+  assert(Eval, "Eval export must exist");
+
+  const evalData = [
+    { input: "Alice", expected: "Hi Alice" },
+    { input: "Bob", expected: "Hi Bob" },
+    { input: "Charlie", expected: "Hi Charlie" },
+  ];
+
+  const result = await Eval(
+    "deno-local-eval",
+    {
+      data: evalData,
+      task: async (input: string) => `Hi ${input}`,
+      scores: [
+        ({ expected, output }: { expected: string; output: string }) => ({
+          name: "exact_match",
+          score: output === expected ? 1 : 0,
+        }),
+      ],
+    },
+    {
+      noSendLogs: true,
+      returnResults: true,
+    },
+  );
+
+  const summary = result.summary;
+  const results = result.results;
+  assert(Array.isArray(results), "Eval results must be an array");
+  assert(
+    results.length === evalData.length,
+    "Eval returned the wrong row count",
+  );
+  assert(isRecord(summary), "Eval summary must exist");
+  assert(isRecord(summary.scores), "Eval summary scores must exist");
+
+  const exactMatch = summary.scores.exact_match;
+  assert(isRecord(exactMatch), "Eval exact_match summary must exist");
+  assert(exactMatch.score === 1, "Eval exact_match summary must be 1");
+}
diff --git a/e2e/helpers/scenario-harness.ts b/e2e/helpers/scenario-harness.ts
@@ -29,6 +29,7 @@ interface ScenarioResult {
 }
 
 const tsxCliPath = createRequire(import.meta.url).resolve("tsx/cli");
+const DENO_COMMAND = process.platform === "win32" ? "deno.exe" : "deno";
 const DEFAULT_SCENARIO_TIMEOUT_MS = 15_000;
 const HELPERS_DIR = path.dirname(fileURLToPath(import.meta.url));
 const REPO_ROOT = path.resolve(HELPERS_DIR, "../..");
@@ -77,13 +78,14 @@ function getTestServerEnv(
 }
 
 async function runProcess(
+  command: string,
   args: string[],
   cwd: string,
   env: Record<string, string>,
   timeoutMs: number,
 ): Promise<ScenarioResult> {
   return await new Promise<ScenarioResult>((resolve, reject) => {
-    const child = spawn(process.execPath, args, {
+    const child = spawn(command, args, {
       cwd,
       env: {
         ...process.env,
@@ -94,7 +96,9 @@ async function runProcess(
     const timeout = setTimeout(() => {
       child.kill("SIGTERM");
       reject(
-        new Error(`Process ${args.join(" ")} timed out after ${timeoutMs}ms`),
+        new Error(
+          `Process ${command} ${args.join(" ")} timed out after ${timeoutMs}ms`,
+        ),
       );
     }, timeoutMs);
 
@@ -146,6 +150,7 @@ async function runScenarioDirOrThrow(
       ? [...(options.nodeArgs ?? []), scenarioPath]
       : [tsxCliPath, scenarioPath];
   const result = await runProcess(
+    process.execPath,
     args,
     scenarioDir,
     env,
@@ -192,6 +197,39 @@ export async function runNodeScenarioDir(options: {
   });
 }
 
+export async function runDenoScenarioDir(options: {
+  args?: string[];
+  entry?: string;
+  env?: Record<string, string>;
+  scenarioDir: string;
+  timeoutMs?: number;
+}): Promise<ScenarioResult> {
+  const entry = options.entry ?? "runner.case.ts";
+  const result = await runProcess(
+    DENO_COMMAND,
+    [
+      "test",
+      "--no-check",
+      "--allow-env",
+      "--allow-net",
+      "--allow-read",
+      ...(options.args ?? []),
+      resolveEntryPath(options.scenarioDir, entry),
+    ],
+    options.scenarioDir,
+    options.env ?? {},
+    options.timeoutMs ?? DEFAULT_SCENARIO_TIMEOUT_MS,
+  );
+
+  if (result.exitCode !== 0) {
+    throw new Error(
+      `Scenario ${path.join(options.scenarioDir, entry)} failed with exit code ${result.exitCode}\nSTDOUT:\n${result.stdout}\nSTDERR:\n${result.stderr}`,
+    );
+  }
+
+  return result;
+}
+
 interface ScenarioHarness {
   events: (predicate?: EventPredicate) => CapturedLogEvent[];
   payloads: (predicate?: PayloadPredicate) => CapturedLogPayload[];
@@ -200,6 +238,13 @@ interface ScenarioHarness {
     after: number,
     predicate?: RequestPredicate,
   ) => CapturedRequest[];
+  runDenoScenarioDir: (options: {
+    args?: string[];
+    entry?: string;
+    env?: Record<string, string>;
+    scenarioDir: string;
+    timeoutMs?: number;
+  }) => Promise<ScenarioResult>;
   runNodeScenarioDir: (options: {
     entry?: string;
     env?: Record<string, string>;
@@ -231,6 +276,14 @@ export async function withScenarioHarness(
       requestCursor: () => server.requests.length,
       requestsAfter: (after, predicate) =>
         filterItems(server.requests.slice(after), predicate),
+      runDenoScenarioDir: (options) =>
+        runDenoScenarioDir({
+          ...options,
+          env: {
+            ...testEnv,
+            ...(options.env ?? {}),
+          },
+        }),
       runNodeScenarioDir: (options) =>
         runNodeScenarioDir({
           ...options,