diff --git a/.gitignore b/.gitignore
index e69de29..bf3579a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,154 @@
+daop-illustration/docs
+.vscode
+
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+lerna-debug.log*
+
+# Diagnostic reports (https://nodejs.org/api/report.html)
+report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
+
+# Runtime data
+pids
+*.pid
+*.seed
+*.pid.lock
+
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+
+# Coverage directory used by tools like istanbul
+coverage
+*.lcov
+
+# nyc test coverage
+.nyc_output
+
+# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+
+# Bower dependency directory (https://bower.io/)
+bower_components
+
+# node-waf configuration
+.lock-wscript
+
+# Compiled binary addons (https://nodejs.org/api/addons.html)
+build/Release
+
+# Dependency directories
+node_modules/
+jspm_packages/
+
+# Snowpack dependency directory (https://snowpack.dev/)
+web_modules/
+
+# TypeScript cache
+*.tsbuildinfo
+
+# Optional npm cache directory
+.npm
+
+# Optional eslint cache
+.eslintcache
+
+# Optional stylelint cache
+.stylelintcache
+
+# Optional REPL history
+.node_repl_history
+
+# Output of 'npm pack'
+*.tgz
+
+# Yarn Integrity file
+.yarn-integrity
+
+# dotenv environment variable files
+.env
+.env.*
+!.env.example
+
+# parcel-bundler cache (https://parceljs.org/)
+.cache
+.parcel-cache
+
+# Next.js build output
+.next
+out
+
+# Nuxt.js build / generate output
+.nuxt
+dist
+
+# Gatsby files
+.cache/
+# Comment in the public line in if your project uses Gatsby and not Next.js
+# https://nextjs.org/blog/next-9-1#public-directory-support
+# public
+
+# vuepress build output
+.vuepress/dist
+
+# vuepress v2.x temp and cache directory
+.temp
+.cache
+
+# Sveltekit cache directory
+.svelte-kit/
+
+# vitepress build output
+**/.vitepress/dist
+
+# vitepress cache directory
+**/.vitepress/cache
+
+# Docusaurus cache and generated files
+.docusaurus
+
+# Serverless directories
+.serverless/
+
+# FuseBox cache
+.fusebox/
+
+# DynamoDB Local files
+.dynamodb/
+
+# Firebase cache directory
+.firebase/
+
+# TernJS port file
+.tern-port
+
+# Stores VSCode versions used for testing VSCode extensions
+.vscode-test
+
+# yarn v3
+.pnp.*
+.yarn/*
+!.yarn/patches
+!.yarn/plugins
+!.yarn/releases
+!.yarn/sdks
+!.yarn/versions
+
+# Vite logs files
+vite.config.js.timestamp-*
+vite.config.ts.timestamp-*
+
+# AI coding agents
+AGENTS.md
+.cursor/
+.cursorrules
+.github/copilot-instructions.md
+.copilot/
+.aider*
+.cline/
+.windsurf/
+.augment/
+
diff --git a/LICENSE.md b/LICENSE.md
new file mode 100644
index 0000000..d63efea
--- /dev/null
+++ b/LICENSE.md
@@ -0,0 +1,10 @@
+All Reports in this Repository are licensed by Contributors
+under the 
+[W3C Software and Document License](https://www.w3.org/copyright/software-license/).  
+
+Contributions to Specifications are made under the
+[W3C CLA](https://www.w3.org/community/about/agreements/cla/).
+
+Contributions to Test Suites are made under the
+[W3C 3-clause BSD License](https://www.w3.org/copyright/3-clause-bsd-license-2008/)
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..3cfd954
--- /dev/null
+++ b/README.md
@@ -0,0 +1,422 @@
+# Dynamic AI Offloading Protocol (DAOP) — Explainer
+
+> 📺 **[Live Demo](daop-illustration/)** — Browser-based illustration of `estimateQoS()` with interactive micro-benchmarks
+
+## Table of Contents
+- [Authors](#authors)
+- [Participate](#participate)
+- [Introduction](#introduction)
+- [User-Facing Problem](#user-facing-problem)
+  - [Goals](#goals)
+  - [Non-goals](#non-goals)
+- [User research](#user-research)
+- [Use Cases](#use-cases)
+  - [Adaptive Video Conferencing Background Blur](#adaptive-video-conferencing-background-blur)
+  - [Privacy-Preserving Photo Enhancement](#privacy-preserving-photo-enhancement)
+- [Proposed Approach: Model-Centric Evaluation (Callee Responsible)](#proposed-approach-model-centric-evaluation-callee-responsible)
+  - [Standardized Specification Requirements](#standardized-specification-requirements)
+  - [The `estimateQoS()` API](#the-estimateqos-api)
+  - [The "Weightless" Requirement and WebNN Spec Extensions](#the-weightless-requirement-and-webnn-spec-extensions)
+  - [Performance Tiers](#performance-tiers)
+- [Implementation Considerations (AI Stack Internals)](#implementation-considerations-ai-stack-internals)
+  - [Example Code: Adaptive Background Blur](#example-code-adaptive-background-blur)
+- [Discussion: Potential API Enhancements](#discussion-potential-api-enhancements)
+  - [1. Boolean Requirement API](#1-boolean-requirement-api)
+  - [2. QoS Change Events](#2-qos-change-events)
+- [Alternatives considered](#alternatives-considered)
+  - [Device-Centric Approach (Caller Responsible)](#device-centric-approach-caller-responsible)
+- [Accessibility, Internationalization, Privacy, and Security Considerations](#accessibility-internationalization-privacy-and-security-considerations)
+  - [Privacy](#privacy)
+  - [Security](#security)
+- [Stakeholder Feedback / Opposition](#stakeholder-feedback--opposition)
+- [References & acknowledgements](#references--acknowledgements)
+
+## Authors
+
+- Jonathan Ding (Intel)
+
+## Participate
+
+- [Issue tracker - Dynamic AI Offloading Protocol (DAOP)](https://github.com/webmachinelearning/proposals/issues/15)
+
+## Introduction
+
+This proposal addresses the challenge of efficiently offloading AI inference tasks from cloud
+servers to client devices while maintaining Quality of Service (QoS). This protocol provides a more
+effective mechanism for applications to evaluate whether a specific AI inference request is suitable
+for execution on the client side. It moves beyond static hardware specifications by enabling
+dynamic, privacy-preserving assessment of device capabilities, helping applications make informed
+offloading decisions. Throughout this document, the **Application (App)** represents the
+decision-making logic, which may reside on the client device (e.g., in a web browser) or on a cloud
+server.
+
+## User-Facing Problem
+
+Modern web applications increasingly rely on AI, but running these models solely in the cloud can be
+expensive and introduce latency. Conversely, running them on client devices is difficult because
+developers cannot easily determine if a target device—given its specific CPU, GPU, and NPU
+capabilities—can host a specific AI model without compromising performance or user privacy.
+
+### Goals
+
+- Standardize a mechanism for identifying device performance buckets for AI tasks.
+- Enable efficient offloading of AI inference from cloud to client devices.
+- Maintain high Quality of Service (QoS) during offloading.
+- Protect user privacy by avoiding detailed hardware fingerprinting.
+- Provide a future-proof abstraction that works across varying hardware (CPU, GPU, NPU).
+- Define a protocol that works regardless of whether the decision logic resides in the App's cloud
+  backend or client frontend.
+
+### Non-goals
+
+- Defining the specific wire protocol for model transmission (this focuses on the
+  negotiation/estimation).
+- Mandatory implementation of any specific inference engine.
+- Solving all AI workload types in version 1 (e.g., extremely large LLMs with dynamic shapes).
+
+## User research
+
+[Placeholder for user research findings. Initial feedback from ISVs and web developers indicates a
+strong need for predictable client-side AI performance metrics.]
+
+## Use Cases
+
+### Adaptive Video Conferencing Background Blur
+
+A video conferencing application wants to offload background blur processing to the user's laptop to
+save server costs and improve privacy, but only if the device can maintain a stable 30fps.
+
+1. **Inquiry**: The application builds a weightless graph of its blur model and calls
+   `context.estimateQoS()`.
+2. **Estimation**: The device evaluates its capability by integrating a wide range of local
+   intelligence: the AI stack software (including specialized drivers and runtimes), the specific
+   hardware accelerators, current system state (thermal state, battery level, power mode), and
+   environmental configurations that might affect performance.
+3. **Decision**:
+   - If the `performanceTier` meets the application's requirements (e.g., "excellent", "good", or
+     "fair" for real-time video), the application logic decides to download the full weights, bind
+     them, and run locally.
+   - Otherwise (e.g., "slow", "very-slow", "poor"), it falls back to cloud-based processing.
+
+### Privacy-Preserving Photo Enhancement
+
+A photo editing web app wants to run complex enhancement filters using the user's mobile NPU to
+reduce latency and maintain privacy.
+
+1. **Inquiry**: The application provides a weightless description of the enhancement model to
+   `context.estimateQoS()`, including specific target resolutions.
+2. **Estimation**: The device evaluates its capability by considering the current hardware and
+   software environment, including AI stack optimizations, hardware accelerators (such as NPU), and
+   overall system state (e.g., battery level, power mode, thermal conditions).
+3. **Decision**: The application enables the "High Quality" filter locally if the performance tier
+   meets the requirements.
+
+## Proposed Approach: Model-Centric Evaluation (Callee Responsible)
+
+The preferred approach is **Model-Centric**, where the device (the callee, i.e., the responder to
+the AI request) is responsible for evaluating its own ability to handle the requested AI workload.
+In this model, the **Application** (the caller) sends a **Model Description Inquiry**—a weightless
+description of the AI model and input characteristics—to the device. The device, as the callee, uses
+its local knowledge of hardware, current system state, software environment, and implementation
+details to estimate the expected Quality of Service (QoS) for the given task.
+
+```mermaid
+sequenceDiagram
+    participant App as App
+    participant Device as Device
+    participant Cloud as Cloud LLM
+    App->>Device: Weightless Model Description & Input Metadata
+    Note over Device: UA/AI Stack runs Local Estimation<br/>(Internal: Static / Dry Run / Black Box)
+    Device-->>App: Return QoS Bucket (Performance Tier)
+    Note over App: App makes Decision<br/>(Compare QoS vs Requirement)
+    alt App Decides: Offload
+        App->>Device: Bind Weights & Run Locally
+    else App Decides: Cloud
+        App->>Cloud: Execute on Cloud
+    end
+```
+
+This "callee responsible" design ensures that sensitive device details remain private, as only broad
+performance tiers are reported back to the application. It also allows the device to make the most
+accurate estimation possible, considering real-time factors like thermal state, background load, and
+hardware-specific optimizations that are not visible to the caller (whether the caller logic is in
+the cloud or on the client). By shifting responsibility for QoS evaluation to the callee, the
+protocol achieves both privacy protection and more reliable offloading decisions.
+
+### Standardized Specification Requirements
+
+To enable consistent cross-vendor estimation, the protocol requires standardization of the following
+inputs and outputs:
+
+1. **Weightless Model Description**:
+   - Based on the **WebNN Graph topology**.
+   - Includes **Lazy Bind Constants**: Placeholders for weights (via descriptors and labels) that
+     enable "weightless" graph construction and estimation without downloading large parameter
+     files.
+  - **Dynamic vs. Static Graph Expression**: This proposal currently uses the dynamic WebNN
+    `MLGraphBuilder` API to construct the weightless graph at runtime. An alternative approach is
+    to express the graph topology statically using a declarative format. The
+    [webnn-graph][ref-webnn-graph] project defines a WebNN-oriented graph DSL (`.webnn` format)
+    that separates the graph definition (structure only, no tensor data) from a weights manifest
+    and binary weights file. This static representation is human-readable,
+     diffable, and enables tooling such as ONNX-to-WebNN conversion and graph visualization. A
+     future version of DAOP could accept either a dynamically built `MLGraph` or a statically
+     defined `.webnn` graph description as input to `estimateQoS()`.
+2. **Model Metadata (Optional)**:
+   - Information about the weights that can significantly impact performance, such as **sparsity**
+     or specific quantization schemes.
+3. **Input Characterization**:
+   - The **shape** and **size** of the input data (e.g., image resolution, sequence length).
+4. **QoS Output**:
+   - Unified **Performance Tiers** (e.g., "excellent", "good", "fair", "moderate", "slow",
+     "very-slow", "poor") to ensure hardware abstraction and prevent privacy-leaking through precise
+     latency metrics.
+
+### The `estimateQoS()` API
+
+We proposes a core API for performance negotiation:
+
+```webidl
+dictionary MLQoSReport {
+  MLPerformanceTier performanceTier;
+};
+
+partial interface MLContext {
+  Promise<MLQoSReport> estimateQoS(MLGraph graph, optional MLQoSOptions options);
+};
+
+dictionary MLQoSOptions {
+  // Input characteristics
+  record<DOMString, MLOperandDescriptor> inputDescriptors;
+
+  // Weights characteristics (Optional)
+  boolean weightsSparsity = false;
+};
+```
+
+### The "Weightless" Requirement and WebNN Spec Extensions
+
+To maximize the benefits of DAOP, the underlying WebNN specification should support a **weightless
+build mode**. Currently, WebNN's `constant()` API typically requires an `ArrayBufferView`, which
+implies the weights must already be present in memory.
+
+We propose that WebNN builders be extended to allow:
+
+1. **Weightless Constants**: Defining constants using only their descriptor (shape, type) and a
+   `label` for late-binding.
+2. **Lazy / Explicit Binding**: Separating the graph topology definition from the binding of heavy
+   weight data. By using an explicit `bindConstants()` (or similar) method, we achieve **lazy
+   binding** where weights are only provided and processed after the offloading decision is made.
+  This design aligns with the proposal in
+  [webnn#901][ref-webnn-901], which addresses the same
+   fundamental problem from a memory-efficiency perspective. That proposal allows
+   `builder.constant()` to accept just an `MLOperandDescriptor` (shape and type, no
+   `ArrayBufferView`), producing a "hollow constant" handle. After `builder.build()`, weights are
+   streamed to device memory one at a time via `graph.setConstantData(constantOperand, dataBuffer)`,
+   reducing peak CPU memory from ~3× model size to ~1×. Our `bindConstants()` API could be
+   integrated with or replaced by this `setConstantData()` mechanism in a future version of the
+   spec, combining the benefits of weightless QoS estimation with memory-efficient weight loading.
+
+### Performance Tiers
+
+The `estimateQoS()` API returns a `performanceTier` string that represents the device's estimated
+ability to execute the given graph. The tiers are designed to be broad enough to prevent hardware
+fingerprinting while still enabling meaningful offloading decisions:
+
+| Tier          | Indicative Latency | Interpretation                         |
+| ------------- | ------------------ | -------------------------------------- |
+| `"excellent"` | < 16 ms            | Real-time (60 fps frame budget)        |
+| `"good"`      | < 100 ms           | Interactive responsiveness             |
+| `"fair"`      | < 1 s              | Responsive for non-real-time tasks     |
+| `"moderate"`  | < 10 s             | Tolerable for batch or one-shot tasks  |
+| `"slow"`      | < 30 s             | Noticeable wait                        |
+| `"very-slow"` | < 60 s             | Long wait                              |
+| `"poor"`      | ≥ 60 s             | Likely unacceptable for most use cases |
+
+The exact tier boundaries are **implementation-defined** and may be adjusted. The key requirement is
+that tiers remain coarse enough to avoid fingerprinting while fine enough for applications to make
+useful offloading decisions.
+
+Applications choose their own acceptance threshold based on use-case requirements. For example, a
+video conferencing blur might require "good" or better, while a one-shot photo enhancement might
+accept "moderate".
+
+## Implementation Considerations (AI Stack Internals)
+
+The underlying system (e.g., User Agent or WebNN implementation) can use several strategies to
+estimate performance for the weightless graph. **These strategies are internal implementation
+details of the AI stack and are transparent to the application developer.** It is important to note
+that these strategies are **not part of the DAOP specification or proposal**; they are discussed
+here only to illustrate possible implementation choices and feasibility. Common techniques include:
+
+1. Static Cost Model: Analytical formulas (e.g., Roofline model) or lookup tables to predict
+   operator costs based on descriptors.
+2. Dry Run: Fast simulation of the inference engine's execution path without heavy computation or
+   weights.
+3. Black Box Profiling: Running the actual model topology using dummy/zero weights to measure
+   timing.
+
+For a concrete demonstration of these techniques, see the [daop-illustration](./daop-illustration)
+project and its [implementation details](./daop-illustration/IMPLEMENTATION.md). It showcases a
+**Static Cost Model** strategy that employs **log-log polynomial interpolation** of
+measured operator latencies derived from per-operator micro-benchmarks. By fitting degree-1
+polynomials (power-law curves) to latency data across multiple tensor sizes in logarithmic space,
+with a left-side clamp to handle small-size noise, the implementation
+captures performance characteristics common in GPU-accelerated workloads. This
+illustration uses a simplified approach for demonstration purposes; production implementations could
+employ other strategies such as Roofline models, learned cost models,
+hardware-specific operator libraries, or ML-based performance predictors. These internal metrics
+(regression coefficients, estimated throughput) are **internal implementation
+details** of the AI stack and are never exposed directly to the web application.
+
+To prevent hardware fingerprinting, the raw estimation results are normalized into broad
+**Performance Tiers** before being returned to the web application. The application logic remains
+decoupled from the hardware-specific details.
+
+### Example Code: Adaptive Background Blur
+
+The following example shows how an application might use the API to decide whether to offload.
+
+```js
+// 1. Initialize WebNN context
+const context = await navigator.ml.createContext({ deviceType: "npu" });
+const builder = new MLGraphBuilder(context);
+
+// 2. Build a WEIGHTLESS graph
+const weights = builder.constant({
+  shape: [3, 3, 64, 64],
+  dataType: "float32",
+  label: "modelWeights", // Identity for late-binding meta-data
+});
+
+const input = builder.input("input", { shape: [1, 3, 224, 224], dataType: "float32" });
+const output = builder.conv2d(input, weights);
+const graph = builder.build();
+
+// 3. DAOP Estimation: Providing input characteristics
+const qos = await context.estimateQoS(graph, {
+  inputDescriptors: {
+    input: { shape: [1, 3, 720, 1280], dataType: "float32" },
+  },
+});
+
+// Check if the performance tier meets our requirements
+const acceptable = ["excellent", "good", "fair", "moderate"];
+if (acceptable.includes(qos.performanceTier)) {
+  const realWeights = await fetch("model-weights.bin").then((r) => r.arrayBuffer());
+
+  // 4. Bind real data (using the label) explicitly.
+  await context.bindConstants(graph, {
+    modelWeights: realWeights,
+  });
+
+  // 5. Subsequent compute calls only need dynamic inputs
+  const results = await context.compute(graph, {
+    input: cameraFrame,
+  });
+} else {
+  runCloudInference();
+}
+```
+
+## Discussion: Potential API Enhancements
+
+We are considering several additions to the API to better support adaptive applications:
+
+### 1. Boolean Requirement API
+
+Instead of returning a bucket, the application could provide its specific requirements (e.g.,
+minimum FPS or maximum latency) and receiving a simple boolean "can meet requirement" response.
+
+```webidl
+partial interface MLContext {
+  Promise<boolean> meetsRequirement(MLGraph graph, MLPerformanceTier requiredTier, optional MLQoSOptions options);
+};
+```
+
+### 2. QoS Change Events
+
+AI performance can change dynamically due to thermal throttling, battery state, or background system
+load. An event-driven mechanism would allow applications to react when the device's ability to meet
+a specific QoS requirement changes.
+
+```webidl
+interface MLQoSChangeEvent : Event {
+  readonly attribute boolean meetsRequirement;
+};
+
+// Application listens for changes in offload capability
+const monitor = context.createQoSMonitor(graph, "excellent");
+monitor.onchange = (e) => {
+  if (!e.meetsRequirement) {
+     console.log("Performance dropped, switching back to cloud.");
+     switchToCloud();
+  } else {
+     console.log("Performance restored, offloading to local.");
+     switchToLocal();
+  }
+};
+```
+
+## Alternatives considered
+
+### Device-Centric Approach (Caller Responsible)
+
+In this alternative, the Application acts as the central intelligence. It collects raw hardware
+specifications and telemetry from the device and makes the offloading decision.
+
+```mermaid
+sequenceDiagram
+    participant App as App
+    participant Device as Device
+    participant Cloud as Cloud LLM
+    App->>Device: Request Device Description
+    Device-->>App: Return Spec (CPU, GPU, NPU, Mem, Microbenchmarks...)
+    Note over App: App estimates QoS<br/>(Mapping H/W Spec -> AI Performance)
+    Note over App: App makes Decision<br/>(Compare QoS vs Requirement)
+    alt App Decides: Offload
+        App->>Device: Execute locally
+    else App Decides: Cloud
+        App->>Cloud: Execute on Cloud
+    end
+```
+
+- **Process**: Device returns specific hardware details (CPU model, GPU frequency, NPU TOPs,
+  micro-benchmark results) -> Application estimates QoS -> Application decides to offload.
+- **Why rejected**:
+  - **Privacy Risks**: Exposes detailed hardware fingerprints and potentially sensitive system
+    telemetry to remote servers.
+  - **Estimation Complexity**: It is extremely difficult for a remote server to accurately map raw
+    hardware specs to actual inference performance across a fragmented device ecosystem (ignoring
+    local drivers, thermal state, and OS-level optimizations).
+  - **Scalability**: Requires maintaining and constantly updating an impractical global database
+    mapping every possible device configuration to AI performance profiles.
+
+## Accessibility, Internationalization, Privacy, and Security Considerations
+
+### Privacy
+
+The Model-Centric approach significantly enhances privacy by:
+
+- Avoiding hardware fingerprinting.
+- Returning broad **Performance Tiers** rather than exact hardware identifiers or precise latency
+  metrics.
+- Enabling local processing of sensitive user data (like photos or video) that would otherwise need
+  to be sent to the cloud.
+
+### Security
+
+- Weightless model descriptions should be validated to prevent malicious topologies from causing
+  resource exhaustion (DoS) during the estimation phase.
+
+## Stakeholder Feedback / Opposition
+
+- [Implementors/ISVs]: Initial interests from several ISVs, to be documented.
+
+## References & acknowledgements
+
+Many thanks for valuable feedback and advice from the contributors to the WebNN and Web Machine
+Learning Working Group.
+
+[ref-webnn-graph]: https://github.com/rustnn/webnn-graph
+[ref-webnn-901]: https://github.com/webmachinelearning/webnn/issues/901
diff --git a/daop-illustration/IMPLEMENTATION.md b/daop-illustration/IMPLEMENTATION.md
new file mode 100644
index 0000000..5169397
--- /dev/null
+++ b/daop-illustration/IMPLEMENTATION.md
@@ -0,0 +1,122 @@
+# DAOP Illustration: Reference Implementation
+
+> **Note on Illustration Purposes**: This implementation is provided for **illustration purposes** to demonstrate the feasibility of the `estimateQoS()` API. It uses a simplified log-log polynomial interpolation approach. A production implementation could employ other strategies — such as Roofline models, learned cost models, hardware-specific operator libraries, or ML-based performance predictors — depending on the target platform and accuracy requirements.
+
+## 1. Overview
+This document describes the implementation strategy for the `estimateQoS()` API in the DAOP (Dynamic AI Offloading Protocol) illustration. The estimation strategy uses **log-log polynomial interpolation** based on operator-level micro-benchmarks.
+
+The internals of these estimations are entirely opaque to the application. The application receives only a high-level performance tier (e.g., "excellent" or "fair"), allowing for hardware-agnostic offloading decisions without exposing raw timing data or device-specific characteristics.
+
+## 2. Performance Tiers
+The implementation categorizes the estimated graph latency into one of seven performance tiers. These tiers correspond to typical user experience expectations:
+
+| Tier | Latency Threshold | Description |
+|------|-------------------|-------------|
+| excellent | < 16ms | Real-time (60fps) performance |
+| good | < 100ms | Interactive / seamless UI |
+| fair | < 1s | Responsive but noticeable |
+| moderate | < 10s | Tolerable for background tasks |
+| slow | < 30s | Significant wait time |
+| very-slow | < 60s | Approaching timeout limits |
+| poor | ≥ 60s | Unacceptable performance |
+
+## 3. Estimation Strategy: Log-Log Polynomial Interpolation
+Empirical observations show that operator execution time on modern hardware often follows a power-law relationship with the total number of processed elements:
+`time ≈ a · (totalElements)^b`
+
+By taking the logarithm of both sides, this relationship becomes linear in log-log space:
+`log(time) = log(a) + b · log(totalElements)`
+
+This implementation fits a **degree-1 polynomial** (linear) in log-log space:
+`log(time) = c0 + c1 · log(n)`
+
+The coefficients (`c0`, `c1`) are found using least-squares normal equations, solved via **Gaussian elimination with partial pivoting**.
+
+#### Small-Size Noise Handling (Clamp)
+
+At small input sizes, GPU dispatch overhead can dominate actual computation time, producing a U-shaped curve in log-log space — small inputs appear slower than medium ones. Left unchecked, a polynomial fit on this data extrapolates catastrophically for very small inputs.
+
+To address this, the fitter applies a **left-side clamp**:
+1. Find the measured point with the **minimum medianMs** (the "valley" of the U).
+2. **Clamp** all points to the left of it to that minimum value.
+3. **Fit** the degree-1 polynomial using only points from the minimum onward — clamped points are excluded from the fit.
+4. At **prediction time**, any input size at or below the minimum-point's size returns the flat clamp value instead of polynomial extrapolation.
+
+This ensures monotonic (non-decreasing) predictions: small inputs never produce absurdly high time estimates.
+
+**Prediction Process:**
+1. Calculate the natural log of the input element count: `ln_n = log(totalElements)`.
+2. If `ln_n` is at or below the clamp boundary, return the clamped floor value directly.
+3. Otherwise, evaluate the fitted polynomial: `ln_time = polyEval(coeffs, ln_n)`.
+4. Revert to time domain: `estimatedTime = exp(ln_time)`.
+
+If a polynomial has not yet been fitted for an operator (e.g., during the first calibration run), the system falls back to piecewise linear interpolation on the raw benchmarked data points.
+
+## 4. Shape-Aware Micro-Benchmarks
+To populate the estimation models, the implementation runs a suite of micro-benchmarks for supported operators across six size variants.
+
+### Size Variants
+Benchmarking across multiple sizes captures the "utilization curve" where small tensors may not fully saturate compute units.
+
+| Variant | Representative Shape | Total Elements |
+|---------|----------------------|----------------|
+| xs | [1, 16, 16, 8] | 2,048 |
+| small | [1, 32, 32, 16] | 16,384 |
+| medium | [1, 64, 64, 24] | 98,304 |
+| large | [1, 128, 128, 32] | 524,288 |
+| xl | [1, 256, 256, 32] | 2,097,152 |
+| xxl | [1, 512, 512, 32] | 8,388,608 |
+
+### Benchmark Methodology
+The system benchmarks 7 operator types: `conv2d`, `add`, `mul`, `relu`, `sigmoid`, `clamp`, and `averagePool2d`.
+
+1. **Baseline Overhead Subtraction**: Before benchmarking real operators, the runner measures the dispatch + readTensor overhead using a trivial (reshape) graph at a small fixed size. To improve accuracy, this measurement is repeated across 3 independent rounds (each with 50 batched iterations), and the median-of-medians is used. This baseline is subtracted from each operator's measured time to isolate pure compute cost.
+2. **Amortized Readback**: The runner dispatches 10 operations (batchSize=10) before a single `readTensor` call, further reducing per-dispatch synchronization overhead.
+3. **Iterations**: Each benchmark performs 5 warmup runs followed by 30 timed iterations to find the median latency.
+4. **Storage**: Raw data points `{ totalElements, medianMs }` are stored in the `TimeModelDatabase`.
+
+## 5. End-to-End Estimation Flow
+
+### Benchmark Phase (Offline/Calibration)
+1. Measure baseline dispatch + readTensor overhead using a trivial graph.
+2. Iterate through supported operator types and size variants.
+3. Execute benchmarks, subtract baseline overhead, and record median latencies.
+4. Store results in `TimeModelDatabase` and fit log-log polynomials.
+5. Persist models to `localStorage` under the key `"daop_time_models"`.
+
+### Estimation Phase (Online)
+1. **Traverse Graph**: Walk the IR of the weightless graph.
+2. **Sum Node Latencies**: For each node, look up the operator in `TimeModelDatabase`.
+   - Call `predict(opType, inputElements)` to get the estimated time.
+3. **Add Overhead**: Add a graph dispatch overhead: `0.5 + numNodes * 0.005 ms`.
+4. **Assign Tier**: Map the total estimated latency to a performance tier string.
+
+## 6. Project Structure
+
+```
+daop-illustration/
+  src/
+    index.js                     # Public API entry point
+    polyfill.js                  # WebNN feature detection and DAOP initialization
+    daop-context.js              # Wraps native MLContext; delegates to interpolation estimator
+    daop-graph-builder.js        # IR graph builder supporting weightless constants
+    daop-graph.js                # IR graph representation and Mermaid export
+    ir/
+      graph-ir.js                # Core IR definitions (TensorDesc, IRNode)
+      shape-inference.js         # Shape inference logic for operators
+    qos/
+      estimate-qos-interp.js     # Interpolation-based QoS estimator
+      interpolation/
+        poly-fit.js              # Polynomial fitting (Normal Equations, Gaussian)
+        time-model.js            # TimeModelDatabase (stores points, fits, predicts)
+      microbench/
+        bench-runner.js          # Hardware-specific benchmark execution engine
+        op-benchmarks.js         # Operator configurations (xs, small, medium, large, xl, xxl)
+  examples/
+    background-blur/
+      background-blur-demo.html  # Interactive two-column demo page
+      selfie-model.js            # Model graph definition + weight loader
+      blur-renderer.js           # Image processing + blur compositing
+      meeting.jpg                # Sample input image
+```
+
diff --git a/daop-illustration/LICENSE b/daop-illustration/LICENSE
new file mode 100644
index 0000000..261eeb9
--- /dev/null
+++ b/daop-illustration/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/daop-illustration/README.md b/daop-illustration/README.md
new file mode 100644
index 0000000..3c9ae3f
--- /dev/null
+++ b/daop-illustration/README.md
@@ -0,0 +1,78 @@
+# DAOP Illustration
+
+A working JavaScript illustration of the
+[Dynamic AI Offloading Protocol (DAOP)](../explainer.md) using the WebNN API.
+
+This project demonstrates how the proposed `estimateQoS()` API can work in practice:
+an application builds a **weightless** computation graph, estimates performance via
+operator micro-benchmarks and polynomial interpolation, decides whether to run locally
+or offload to the cloud, and — if local — downloads weights and executes inference.
+
+## Prerequisites
+
+- A WebNN-capable browser (e.g., Chrome Canary with WebNN flags enabled).
+  See [installation guide](https://webnn.io/en/learn/get-started/installation).
+- Node.js (for the static file server).
+
+## Running the Demo
+
+```bash
+npm install
+npm start          # starts http://localhost:8080
+```
+
+Open `http://localhost:8080/examples/background-blur/background-blur-demo.html` in the WebNN-capable
+browser.
+
+### Background Blur Demo
+
+The demo applies AI-powered background blur to a meeting photo using the MediaPipe Selfie
+Segmentation model. The workflow follows the DAOP protocol:
+
+1. **Build weightless graph** — the model topology is recorded without downloading weights.
+2. **Estimate QoS** — per-operator micro-benchmarks and interpolation produce a performance tier.
+3. **Offloading decision** — if the tier is acceptable (< 10 s), run locally; otherwise
+   offload to cloud.
+4. **Execute** — download weights, compile the native WebNN graph, run inference, apply blur.
+
+The right column of the demo exposes DAOP internals: operator benchmarks, estimation
+curves, computation graph visualization, and a timing comparison between estimated and
+actual latency.
+
+## Implementation Details
+
+See [IMPLEMENTATION.md](./IMPLEMENTATION.md) for a detailed description of the estimation
+strategy, including shape-aware micro-benchmarks, polynomial interpolation, and the 7-tier
+performance classification.
+
+## Project Structure
+
+```
+src/                             # DAOP library (reusable)
+  index.js                       # Public API
+  polyfill.js                    # WebNN detection + DAOP initialization
+  daop-context.js                # Wraps native MLContext with estimateQoS / bindConstants / compute
+  daop-graph-builder.js          # IR graph builder (weightless constants)
+  daop-graph.js                  # IR graph + Mermaid visualization
+  ir/
+    graph-ir.js                  # TensorDesc, IROperand, IRNode, IRGraph
+    shape-inference.js           # Shape inference for conv2d, pool, resample, etc.
+  qos/
+    estimate-qos-interp.js       # Interpolation-based QoS estimation + tier assignment
+    interpolation/
+      poly-fit.js                # Polynomial fitting (Normal Equations, Gaussian)
+      time-model.js              # TimeModelDatabase (stores points, fits, predicts)
+    microbench/
+      bench-runner.js            # Multi-size benchmark runner
+      op-benchmarks.js           # Per-op benchmark configurations (xs–xxl)
+examples/
+  background-blur/               # Background blur demo (self-contained)
+    background-blur-demo.html    # Interactive two-column demo page
+    selfie-model.js              # Model graph definition + weight loader
+    blur-renderer.js             # Image processing + blur compositing
+    meeting.jpg                  # Sample input image
+```
+
+## License
+
+Apache 2.0
diff --git a/daop-illustration/demo-server.js b/daop-illustration/demo-server.js
new file mode 100644
index 0000000..34f57bc
--- /dev/null
+++ b/daop-illustration/demo-server.js
@@ -0,0 +1,63 @@
+import http from "http";
+import fs from "fs";
+import path from "path";
+import { fileURLToPath } from "url";
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+const PORT = 8080;
+
+const MIME_TYPES = {
+  ".html": "text/html",
+  ".js": "text/javascript",
+  ".css": "text/css",
+  ".json": "application/json",
+  ".png": "image/png",
+  ".jpg": "image/jpg",
+  ".jpeg": "image/jpeg",
+  ".gif": "image/gif",
+  ".svg": "image/svg+xml",
+  ".webp": "image/webp",
+  ".wasm": "application/wasm",
+};
+
+const server = http.createServer((req, res) => {
+  console.log(`${req.method} ${req.url}`);
+
+  // Default to the background blur demo
+  let filePath = req.url === "/" ? "/examples/background-blur/background-blur-demo.html" : req.url;
+
+  // Remove query strings or hashes if present
+  filePath = filePath.split("?")[0].split("#")[0];
+
+  // Ensure we don't try to access files outside the directory
+  // Remove leading slash for path.join to behave consistently
+  const safePath = path.normalize(filePath).replace(/^[\/\\]+/, "");
+  let fullPath = path.join(__dirname, safePath);
+
+  console.log(`Serving: ${fullPath}`);
+
+  const extname = path.extname(fullPath);
+  let contentType = MIME_TYPES[extname] || "application/octet-stream";
+
+  fs.readFile(fullPath, (error, content) => {
+    if (error) {
+      if (error.code === "ENOENT") {
+        res.writeHead(404);
+        res.end("File not found");
+      } else {
+        res.writeHead(500);
+        res.end(`Server error: ${error.code}`);
+      }
+    } else {
+      res.writeHead(200, { "Content-Type": contentType });
+      res.end(content, "utf-8");
+    }
+  });
+});
+
+server.listen(PORT, () => {
+  console.log(`Server running at http://localhost:${PORT}/`);
+  console.log(`Demo page: http://localhost:${PORT}/examples/background-blur/background-blur-demo.html`);
+});
diff --git a/daop-illustration/examples/background-blur/background-blur-demo.html b/daop-illustration/examples/background-blur/background-blur-demo.html
new file mode 100644
index 0000000..06497a4
--- /dev/null
+++ b/daop-illustration/examples/background-blur/background-blur-demo.html
@@ -0,0 +1,641 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>DAOP: Background Blur Demo</title>
+  <link rel="icon" href="data:,">
+  
+  <!-- CDN dependencies -->
+  <link href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/themes/prism-tomorrow.min.css" rel="stylesheet" crossorigin="anonymous" />
+  <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/prism.min.js" crossorigin="anonymous"></script>
+  <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/components/prism-javascript.min.js" crossorigin="anonymous"></script>
+  <script src="https://cdn.jsdelivr.net/npm/mermaid@10.6.1/dist/mermaid.min.js" crossorigin="anonymous"></script>
+
+  <style>
+    body { font-family: system-ui, sans-serif; line-height: 1.5; margin: 0 auto; padding: 20px; background: #f4f4f9; color: #333; }
+    h1, h2, h3, h4, h5 { margin-top: 0; }
+    
+    .card { background: white; padding: 20px; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); margin-bottom: 20px; }
+    .btn { background: #0078d4; color: white; border: none; padding: 10px 20px; border-radius: 4px; cursor: pointer; font-size: 14px; transition: background 0.2s; }
+    .btn:hover { background: #106ebe; }
+    .btn:disabled { background: #ccc; cursor: not-allowed; }
+    .btn-warm { background: #e67e22; }
+    .btn-warm:hover { background: #d35400; }
+    .btn-warm:disabled { background: #ccc; }
+    .btn-icon { background: none; border: 1px solid #ccc; color: #555; padding: 6px 12px; border-radius: 4px; cursor: pointer; font-size: 13px; transition: all 0.2s; display: inline-flex; align-items: center; gap: 5px; }
+    .btn-icon:hover { border-color: #0078d4; color: #0078d4; }
+    .btn-icon:disabled { opacity: 0.4; cursor: not-allowed; }
+    .btn-icon.active { border-color: #0078d4; color: #0078d4; background: #e8f0fe; }
+    
+    .two-column-layout { display: grid; grid-template-columns: 1fr 1fr; gap: 0; max-width: 1400px; margin: 0 auto; }
+    .two-column-layout > .divider { background: #555; }
+    .column { padding: 0 16px; }
+    .column-header { margin-bottom: 14px; }
+    .column-header h2 { margin: 0; font-size: 15px; font-weight: 700; text-transform: uppercase; letter-spacing: 0.5px; }
+    .column-header p { margin: 2px 0 0; font-size: 12px; color: #888; }
+    .col-app { }
+    .col-internals { background: #1e1e2e; border-radius: 8px; padding-top: 16px !important; padding-bottom: 8px; margin: -4px 0; color: #d4d4d4; }
+    .col-internals .column-header h2 { color: #ccc; }
+    .col-internals .column-header p { color: #888; }
+    .col-internals .card { background: #282840; border: 1px solid #444; border-radius: 4px; box-shadow: none; color: #d4d4d4; }
+    .col-internals .card h3 { color: #e0e0e0; font-family: "SF Mono", "Cascadia Code", "Fira Code", Consolas, monospace; font-size: 14px; letter-spacing: 0.3px; }
+    .col-internals .card p { color: #bbb; }
+    .col-internals .card table { border-color: #444; }
+    .col-internals .card th { background: #333350; color: #ccc; border-color: #444; }
+    .col-internals .card td { border-color: #444; color: #bbb; }
+    .col-internals .card tr:nth-child(even) { background: #2e2e48; }
+    .col-internals .btn-icon { border-color: #555; color: #aaa; }
+    .col-internals .btn-icon:hover { border-color: #6ea8fe; color: #6ea8fe; }
+    .col-internals .btn-icon.active { border-color: #6ea8fe; color: #6ea8fe; background: rgba(110,168,254,0.12); }
+    .col-internals .btn-warm { background: #d35400; }
+    .col-internals .btn-warm:hover { background: #e67e22; }
+    .col-internals summary { color: #6ea8fe; }
+    .col-internals .card .status-msg { color: #bbb; }
+    .col-internals .mermaid { background: #fff; color: #333; }
+    @media (max-width: 900px) { .two-column-layout { grid-template-columns: 1fr; } .col-internals { margin-top: 10px; } }
+    
+    .excellent { background: #d4edda; color: #155724; padding: 2px 6px; border-radius: 4px; font-weight: bold; }
+    .good { background: #d4edda; color: #155724; padding: 2px 6px; border-radius: 4px; font-weight: bold; }
+    .fair { background: #fff3cd; color: #856404; padding: 2px 6px; border-radius: 4px; font-weight: bold; }
+    .moderate { background: #ffe0b2; color: #e65100; padding: 2px 6px; border-radius: 4px; font-weight: bold; }
+    .slow { background: #fce4ec; color: #880e4f; padding: 2px 6px; border-radius: 4px; font-weight: bold; }
+    .very-slow { background: #f8d7da; color: #721c24; padding: 2px 6px; border-radius: 4px; font-weight: bold; }
+    .poor { background: #f8d7da; color: #721c24; padding: 2px 6px; border-radius: 4px; font-weight: bold; }
+    
+    .mermaid { background: white; padding: 10px; border-radius: 8px; border: 1px solid #ddd; min-height: 200px; overflow: auto; }
+    
+    #image-canvas { width: 100%; max-width: 512px; border: 1px solid #ddd; border-radius: 4px; display: block; margin-bottom: 15px; }
+    
+    table { width: 100%; border-collapse: collapse; margin: 10px 0; }
+    th, td { padding: 8px 12px; border: 1px solid #ddd; text-align: left; font-size: 13px; }
+    th { background: #f5f5f5; font-weight: 600; }
+    tr:nth-child(even) { background: #fafafa; }
+    
+    .op-chart { break-inside: avoid; }
+    details { margin-top: 10px; cursor: pointer; }
+    summary { font-weight: 600; color: #0078d4; outline: none; }
+  </style>
+</head>
+<body>
+
+<div id="support-banner"></div>
+
+<div class="two-column-layout">
+  <!-- Left Column: Web Application -->
+  <div class="column col-app">
+    <div class="column-header">
+      <h2>Web Application</h2>
+    </div>
+    <div class="card">
+      <h1>Background Blur with WebNN</h1>
+      <p>This demo applies AI-powered background blur using the WebNN API. The application only sees a high-level performance tier — all estimation internals are opaque.</p>
+      
+      <canvas id="image-canvas" width="512" height="512"></canvas>
+      
+      <div style="display: flex; align-items: center; gap: 15px; margin-bottom: 10px;">
+        <button id="blur-btn" class="btn">🎨 Apply Background Blur</button>
+      </div>
+      
+      <div id="blur-status" style="margin-top: 10px; min-height: 20px;"></div>
+    </div>
+  </div>
+
+  <!-- Right Column: DAOP Internals -->
+  <div class="column col-internals">
+    <div class="column-header">
+      <h2>Under the Hood — DAOP Internals</h2>
+      <p>One possible implementation strategy (for illustration only)</p>
+    </div>
+    
+    <!-- Operator Analysis -->
+    <div class="card">
+      <h3>⚙️ Operator Micro-benchmarks</h3>
+      <div style="display: flex; gap: 8px; flex-wrap: wrap; margin-bottom: 15px; align-items: center;">
+        <button class="btn-icon" id="toggle-results-btn" disabled title="Toggle benchmark results table"><span class="eye">👁</span> Benchmark Results</button>
+        <button class="btn-icon" id="show-curves-btn" disabled title="Toggle estimation curves"><span class="eye">👁</span> Estimation Curves</button>
+        <button class="btn btn-warm" id="bench-btn" style="padding: 6px 14px; font-size: 13px;">🔄 Rerun Micro-benchmarks</button>
+      </div>
+      <div id="bench-summary" style="margin-bottom: 10px;"></div>
+      <div id="bench-results" style="display:none;"></div>
+      <div id="model-details" style="display:none;"></div>
+    </div>
+
+    <!-- Computation Graph -->
+    <div id="graph-card" class="card" style="display: none;">
+      <div style="display: flex; justify-content: space-between; align-items: center;">
+        <h3 style="margin-bottom: 0;">1. Computation Graph (Weightless)</h3>
+        <button class="btn" id="toggle-graph-btn" style="padding: 4px 12px; font-size: 12px;">Expand</button>
+      </div>
+      <p>The graph structure is built without weights to allow instant analysis.</p>
+      <div id="comp-graph-wrap" style="display: none;">
+        <div class="mermaid" id="comp-graph"></div>
+      </div>
+    </div>
+
+    <!-- QoS Estimation -->
+    <div id="qos-card" class="card" style="display: none;">
+      <h3>2. QoS Estimation</h3>
+      <div id="qos-results"></div>
+    </div>
+
+    <!-- Offloading Decision -->
+    <div id="decision-card" class="card" style="display: none;">
+      <h3>3. Offloading Decision</h3>
+      <div id="decision-text"></div>
+    </div>
+
+    <!-- Execution -->
+    <div id="execution-card" class="card" style="display: none;">
+      <h3>4. Execution & Timing</h3>
+      <div id="execution-log"></div>
+      <div id="timing-comparison" style="margin-top: 15px;"></div>
+    </div>
+    
+  </div>
+</div>
+
+<script type="module">
+import { initDAOP, detectWebNNSupport } from "../../src/index.js";
+import { timeModelDatabase } from "../../src/qos/interpolation/time-model.js";
+import { polyEval } from "../../src/qos/interpolation/poly-fit.js";
+import { BenchRunner } from "../../src/qos/microbench/bench-runner.js";
+import { buildSelfieSegmentationGraph, loadWeights } from "./selfie-model.js";
+import { BlurRenderer } from "./blur-renderer.js";
+
+// Initialize Mermaid
+mermaid.initialize({
+  startOnLoad: false,
+  theme: "default",
+  securityLevel: "loose",
+  flowchart: { useMaxWidth: true },
+});
+
+let renderer;
+
+// 1. On page load
+window.addEventListener('DOMContentLoaded', async () => {
+  // Check WebNN support
+  const support = detectWebNNSupport();
+  if (!support.supported) {
+    document.getElementById("support-banner").innerHTML = `
+      <div class="card" style="background: #f8d7da; border-left: 4px solid #dc3545;">
+        <h3>WebNN Not Available</h3>
+        <p>${support.reason}</p>
+        <p>Please install a WebNN-capable browser: 
+          <a href="https://webnn.io/en/learn/get-started/installation" target="_blank">Installation Guide</a>
+        </p>
+      </div>`;
+    // Disable buttons
+    document.getElementById("blur-btn").disabled = true;
+    document.getElementById("bench-btn").disabled = true;
+    return;
+  }
+
+  // Initialize DAOP polyfill
+  const result = initDAOP();
+  if (!result.ok) {
+    console.error("Failed to init DAOP:", result.error);
+    return;
+  }
+
+  // Load image
+  try {
+    renderer = new BlurRenderer(document.getElementById("image-canvas"));
+    await renderer.loadImage("./meeting.jpg"); 
+  } catch (e) {
+    console.error("Failed to load image:", e);
+    document.getElementById("blur-status").innerHTML = `<p style="color:red">Error loading image: ${e.message}</p>`;
+  }
+
+  // Enable toggle buttons if cached benchmark data exists in localStorage
+  if (timeModelDatabase.hasBenchmarkData()) {
+    document.getElementById("toggle-results-btn").disabled = false;
+    document.getElementById("show-curves-btn").disabled = false;
+    renderBenchResultsFromCache();
+  }
+});
+
+// Graph collapse/expand toggle
+// Mermaid can't render into display:none containers, so we store the
+// raw Mermaid code and render on first expand.
+let _pendingMermaidCode = null;
+
+document.getElementById("toggle-graph-btn").onclick = async () => {
+  const wrap = document.getElementById("comp-graph-wrap");
+  const btn = document.getElementById("toggle-graph-btn");
+  const collapsed = wrap.style.display === "none";
+  wrap.style.display = collapsed ? "" : "none";
+  btn.textContent = collapsed ? "Collapse" : "Expand";
+
+  // If expanding and there is pending Mermaid code, render it now
+  if (collapsed && _pendingMermaidCode) {
+    const graphDiv = document.getElementById("comp-graph");
+    graphDiv.removeAttribute("data-processed");
+    graphDiv.innerHTML = _pendingMermaidCode;
+    await mermaid.run({ nodes: [graphDiv] });
+    _pendingMermaidCode = null;
+  }
+};
+
+// Build results table HTML from timeModelDatabase cached points
+function renderBenchResultsFromCache() {
+  const allModels = timeModelDatabase.getAllModels();
+  const resultsDiv = document.getElementById("bench-results");
+  
+  let html = `<table>
+    <tr>
+      <th>Op</th>
+      <th>Size</th>
+      <th>Total Elements</th>
+      <th>Median (ms)</th>
+      <th>Min (ms)</th>
+      <th>P90 (ms)</th>
+    </tr>`;
+  
+  for (const [opType, model] of Object.entries(allModels)) {
+    if (!model.points || model.points.length === 0) continue;
+    for (const p of model.points) {
+      html += `<tr>
+        <td>${opType}</td>
+        <td>${p.label || "—"}</td>
+        <td>${p.totalElements.toLocaleString()}</td>
+        <td>${p.medianMs.toFixed(3)}</td>
+        <td>${(p.minMs || 0).toFixed(3)}</td>
+        <td>${(p.p90Ms || 0).toFixed(3)}</td>
+      </tr>`;
+    }
+  }
+  html += "</table>";
+  resultsDiv.innerHTML = html;
+}
+
+// 2. Re-run Benchmarks
+document.getElementById("bench-btn").onclick = async () => {
+  const benchBtn = document.getElementById("bench-btn");
+  benchBtn.disabled = true;
+  const resultsDiv = document.getElementById("bench-results");
+  const summaryDiv = document.getElementById("bench-summary");
+  resultsDiv.innerHTML = "<p>Running benchmarks...</p>";
+  resultsDiv.style.display = "";
+  summaryDiv.innerHTML = "";
+  
+  // Hide curves from previous run
+  document.getElementById("model-details").style.display = "none";
+  const curvesBtn = document.getElementById("show-curves-btn");
+  curvesBtn.classList.remove("active");
+  curvesBtn.querySelector(".eye").textContent = "👁";
+  
+  // Update toggle button to reflect visible state (results table shown during run)
+  const resultsBtn = document.getElementById("toggle-results-btn");
+  resultsBtn.classList.add("active");
+  resultsBtn.querySelector(".eye").textContent = "👁‍🗨";
+  resultsBtn.disabled = false;
+  
+  const benchStart = performance.now();
+  
+  try {
+    const runner = new BenchRunner({ deviceType: "gpu" });
+    let baselineMs = 0;
+    const results = await runner.benchmarkAll((progress) => {
+      if (progress.phase === "baseline") {
+        resultsDiv.innerHTML = `<p>Measuring dispatch overhead (baseline)...</p>`;
+        return;
+      }
+      if (progress.phase === "baseline-done") {
+        baselineMs = progress.baselineMs;
+        resultsDiv.innerHTML = `<p>Baseline overhead: ${progress.baselineMs.toFixed(3)} ms/dispatch. Benchmarking operators...</p>`;
+        return;
+      }
+      const label = progress.label ? ` [${progress.label}]` : "";
+      resultsDiv.innerHTML = `<p>Benchmarking: ${progress.opType}${label} (${progress.phase})...</p>`;
+    });
+    
+    const benchTime = ((performance.now() - benchStart) / 1000).toFixed(1);
+    
+    let totalOps = 0;
+    let totalVariants = 0;
+    for (const r of results) {
+      if (!r.error) {
+        totalOps++;
+        totalVariants += r.sizes.length;
+      }
+    }
+    
+    summaryDiv.innerHTML = `<p style="color: #28a745; font-weight: 600;">${totalOps} operators, ${totalVariants} size variants benchmarked in ${benchTime}s (baseline overhead: ${baselineMs.toFixed(3)} ms/dispatch)</p>`;
+    
+    // Rebuild table from fresh cached data
+    renderBenchResultsFromCache();
+    
+    // Enable both toggle buttons
+    document.getElementById("toggle-results-btn").disabled = false;
+    document.getElementById("show-curves-btn").disabled = false;
+    
+  } catch (e) {
+    resultsDiv.innerHTML = `<p style="color:red">Error: ${e.message}</p>`;
+  } finally {
+    benchBtn.disabled = false;
+  }
+};
+
+// Toggle benchmark results table visibility
+document.getElementById("toggle-results-btn").onclick = () => {
+  const resultsDiv = document.getElementById("bench-results");
+  const btn = document.getElementById("toggle-results-btn");
+  const visible = resultsDiv.style.display !== "none";
+  resultsDiv.style.display = visible ? "none" : "";
+  btn.classList.toggle("active", !visible);
+  btn.querySelector(".eye").textContent = visible ? "👁" : "👁‍🗨";
+};
+
+// Toggle estimation curves (per-op SVG charts)
+document.getElementById("show-curves-btn").onclick = () => {
+  const detailsDiv = document.getElementById("model-details");
+  const btn = document.getElementById("show-curves-btn");
+  
+  if (detailsDiv.style.display !== "none") {
+    detailsDiv.style.display = "none";
+    btn.classList.remove("active");
+    btn.querySelector(".eye").textContent = "👁";
+    return;
+  }
+  
+  const allModels = timeModelDatabase.getAllModels();
+  if (!allModels || Object.keys(allModels).length === 0) {
+    detailsDiv.innerHTML = "<p>No benchmark data available. Run benchmarks first.</p>";
+    detailsDiv.style.display = "";
+    btn.textContent = "📈 Hide Estimation Curves";
+    return;
+  }
+  
+  let html = '<h4 style="color: #ccc;">Per-Operator Estimation Curves</h4><div style="display: grid; grid-template-columns: repeat(auto-fill, 350px); gap: 12px; justify-content: center;">';
+  
+  for (const [opType, model] of Object.entries(allModels)) {
+    if (!model.points || model.points.length === 0) continue;
+    html += `<div class="op-chart" style="text-align: center;">
+      <strong style="display: block; margin-bottom: 4px; font-size: 13px; color: #ccc;">${opType}</strong>
+      ${drawTimeModelChart(opType, model)}
+    </div>`;
+  }
+  
+  html += '</div>';
+  detailsDiv.innerHTML = html;
+  detailsDiv.style.display = "";
+  btn.classList.add("active");
+  btn.querySelector(".eye").textContent = "👁‍🗨";
+};
+
+function drawTimeModelChart(opType, model) {
+  const width = 350, height = 200;
+  const padL = 55, padR = 15, padT = 15, padB = 40;
+  const plotW = width - padL - padR;
+  const plotH = height - padT - padB;
+  
+  const points = model.points;
+  if (!points || points.length === 0) return "";
+  
+  // Both axes in log space (log-log plot)
+  const logXs = points.map(p => Math.log(p.totalElements));
+  const logYs = points.map(p => Math.log(Math.max(1e-6, p.medianMs)));
+  
+  const xMin = Math.min(...logXs);
+  const xMax = Math.max(...logXs);
+  const yMin = Math.min(...logYs) - 0.3;
+  const yMax = Math.max(...logYs) + 0.3;
+  
+  const xRange = xMax - xMin || 1;
+  const yRange = yMax - yMin || 1;
+  
+  function toSvgX(logX) { return padL + (logX - xMin) / xRange * plotW; }
+  function toSvgY(logY) { return padT + plotH - (logY - yMin) / yRange * plotH; }
+  
+  function fmtElements(n) {
+    if (n >= 1e6) return (n / 1e6).toFixed(1) + "M";
+    if (n >= 1e3) return (n / 1e3).toFixed(0) + "K";
+    return String(n);
+  }
+  
+  function fmtMs(ms) {
+    if (ms >= 1) return ms.toFixed(1);
+    return ms.toFixed(3);
+  }
+  
+  let svg = `<svg width="${width}" height="${height}" viewBox="0 0 ${width} ${height}" style="background:#232338; border:1px solid #444; border-radius:4px;">`;
+  
+  // Axes
+  svg += `<line x1="${padL}" y1="${padT}" x2="${padL}" y2="${padT + plotH}" stroke="#888" stroke-width="1"/>`;
+  svg += `<line x1="${padL}" y1="${padT + plotH}" x2="${padL + plotW}" y2="${padT + plotH}" stroke="#888" stroke-width="1"/>`;
+  
+  // Axis labels — indicate log scale
+  svg += `<text x="${padL + plotW / 2}" y="${height - 3}" font-size="10" fill="#999" text-anchor="middle">Input Size — log scale</text>`;
+  svg += `<text x="12" y="${padT + plotH / 2}" font-size="10" fill="#999" text-anchor="middle" transform="rotate(-90,12,${padT + plotH / 2})">Time (ms) — log scale</text>`;
+  
+  // X-axis ticks at measured points
+  for (let i = 0; i < points.length; i++) {
+    const sx = toSvgX(logXs[i]);
+    svg += `<line x1="${sx}" y1="${padT + plotH}" x2="${sx}" y2="${padT + plotH + 4}" stroke="#666" stroke-width="1"/>`;
+    svg += `<text x="${sx}" y="${padT + plotH + 16}" font-size="8" fill="#999" text-anchor="middle">${fmtElements(points[i].totalElements)}</text>`;
+  }
+  
+  // Y-axis ticks at measured points (show actual ms values)
+  const yTicks = [...new Set(logYs)].sort((a, b) => a - b);
+  for (const logY of yTicks) {
+    const sy = toSvgY(logY);
+    const msVal = Math.exp(logY);
+    svg += `<line x1="${padL - 3}" y1="${sy}" x2="${padL}" y2="${sy}" stroke="#666" stroke-width="1"/>`;
+    svg += `<text x="${padL - 5}" y="${sy + 3}" font-size="8" fill="#999" text-anchor="end">${fmtMs(msVal)}</text>`;
+    svg += `<line x1="${padL}" y1="${sy}" x2="${padL + plotW}" y2="${sy}" stroke="#333" stroke-width="1"/>`;
+  }
+  
+  // Fitted curve (coeffs are in log-log space, with optional left-side clamp)
+  if (model.coeffs) {
+    const steps = 50;
+    const hasClamp = model.clampBelowLogX != null && model.clampLogY != null;
+
+    if (hasClamp) {
+      // 1. Flat clamp segment: xMin → clampBelowLogX (dashed, muted color)
+      const clampPoints = [];
+      const clampEnd = Math.min(model.clampBelowLogX, xMin + xRange);
+      const clampSteps = Math.max(2, Math.round(steps * (clampEnd - xMin) / xRange));
+      for (let i = 0; i <= clampSteps; i++) {
+        const logX = xMin + (i / clampSteps) * (clampEnd - xMin);
+        clampPoints.push(`${toSvgX(logX).toFixed(1)},${toSvgY(model.clampLogY).toFixed(1)}`);
+      }
+      svg += `<polyline points="${clampPoints.join(" ")}" fill="none" stroke="#e53935" stroke-width="2" stroke-dasharray="5,3" opacity="0.6"/>`;
+
+      // 2. Fitted line segment: clampBelowLogX → xMax (solid)
+      const fitPoints = [];
+      const fitSteps = Math.max(2, Math.round(steps * (xMin + xRange - model.clampBelowLogX) / xRange));
+      for (let i = 0; i <= fitSteps; i++) {
+        const logX = model.clampBelowLogX + (i / fitSteps) * (xMin + xRange - model.clampBelowLogX);
+        const logY = polyEval(model.coeffs, logX);
+        fitPoints.push(`${toSvgX(logX).toFixed(1)},${toSvgY(logY).toFixed(1)}`);
+      }
+      svg += `<polyline points="${fitPoints.join(" ")}" fill="none" stroke="#e53935" stroke-width="2"/>`;
+
+      // Clamp boundary marker (small vertical tick)
+      const cx = toSvgX(model.clampBelowLogX);
+      const cy = toSvgY(model.clampLogY);
+      svg += `<circle cx="${cx}" cy="${cy}" r="3" fill="none" stroke="#e53935" stroke-width="1.5"/>`;
+    } else {
+      // No clamp — single fitted line across full range
+      const curvePoints = [];
+      for (let i = 0; i <= steps; i++) {
+        const logX = xMin + (i / steps) * xRange;
+        const logY = polyEval(model.coeffs, logX);
+        curvePoints.push(`${toSvgX(logX).toFixed(1)},${toSvgY(logY).toFixed(1)}`);
+      }
+      svg += `<polyline points="${curvePoints.join(" ")}" fill="none" stroke="#e53935" stroke-width="2"/>`;
+    }
+  }
+  
+  // Data points as circles
+  for (let i = 0; i < points.length; i++) {
+    const sx = toSvgX(logXs[i]);
+    const sy = toSvgY(logYs[i]);
+    svg += `<circle cx="${sx}" cy="${sy}" r="5" fill="#1e88e5"/>`;
+    svg += `<text x="${sx}" y="${sy - 8}" font-size="8" fill="#1e88e5" text-anchor="middle">${points[i].medianMs.toFixed(2)}</text>`;
+  }
+  
+  svg += "</svg>";
+  return svg;
+}
+
+// 4. Apply Background Blur
+document.getElementById("blur-btn").onclick = async () => {
+  const blurBtn = document.getElementById("blur-btn");
+  blurBtn.disabled = true;
+  const status = document.getElementById("blur-status");
+  
+  // Reset cards
+  document.getElementById("graph-card").style.display = "none";
+  document.getElementById("qos-card").style.display = "none";
+  document.getElementById("decision-card").style.display = "none";
+  document.getElementById("execution-card").style.display = "none";
+  
+  try {
+    // Step 1: Build weightless graph
+    status.innerHTML = "<p>Step 1: Building weightless computation graph...</p>";
+    const context = await navigator.ml.createContext({ deviceType: "gpu" });
+    const builder = new MLGraphBuilder(context);
+    const { graph, weightMeta } = buildSelfieSegmentationGraph(builder);
+    
+    // Show computation graph card (collapsed by default)
+    const graphCard = document.getElementById("graph-card");
+    graphCard.style.display = "block";
+    const mermaidCode = graph.toMermaid();
+    // Store code for deferred rendering — container is display:none
+    _pendingMermaidCode = mermaidCode;
+    // Reset collapse state
+    document.getElementById("comp-graph-wrap").style.display = "none";
+    document.getElementById("toggle-graph-btn").textContent = "Expand";
+    
+    // Step 2: QoS Estimation
+    if (!timeModelDatabase.hasBenchmarkData()) {
+      status.innerHTML = `<p style="color: #e67e22; font-weight: bold;">⚠️ No benchmark data available. Please run micro-benchmarks first (right panel) before estimating QoS.</p>`;
+      blurBtn.disabled = false;
+      return;
+    }
+    status.innerHTML = "<p>Step 2: Estimating QoS...</p>";
+    const qos = await context.estimateQoS(graph);
+    
+    const qosCard = document.getElementById("qos-card");
+    qosCard.style.display = "block";
+      document.getElementById("qos-results").innerHTML = `
+        <p>Performance Tier: <span class="status ${qos.performanceTier}">${qos.performanceTier.toUpperCase()}</span></p>
+        <p>Estimation Method: <strong>Direct Interpolation</strong></p>
+      <p>Estimated Total Time: ${qos.internal.totalTimeMs.toFixed(2)} ms (incl. ${qos.internal.graphDispatchOverheadMs.toFixed(2)} ms graph dispatch overhead)</p>
+      <p>Operators: ${qos.internal.breakdown.length}</p>
+      <details>
+        <summary>Operator Breakdown</summary>
+           <table>
+            <tr><th>Op</th><th>Time (ms)</th><th>Input Elements</th></tr>
+            ${qos.internal.breakdown.map(b =>
+              `<tr><td>${b.opType}</td><td>${b.timeMs.toFixed(3)}</td><td>${(b.inputElements || 0).toLocaleString()}</td></tr>`
+            ).join("")}
+          </table>
+      </details>`;
+    
+    // Update Mermaid code with QoS annotations (deferred render on expand)
+    _pendingMermaidCode = graph.toMermaid(qos.internal.breakdown);
+    
+    // Step 3: Offloading decision
+    const decisionCard = document.getElementById("decision-card");
+    decisionCard.style.display = "block";
+    const offload = ["slow", "very-slow", "poor"].includes(qos.performanceTier);
+    document.getElementById("decision-text").innerHTML = offload
+      ? `<p style="color: #dc3545; font-weight: bold;">Decision: Offload to Cloud — local performance is insufficient.</p>`
+      : `<p style="color: #28a745; font-weight: bold;">Decision: Run Locally — estimated ${qos.internal.totalTimeMs.toFixed(1)}ms is acceptable.</p>
+         <p>Proceeding with local execution...</p>`;
+    
+    if (offload) {
+      status.innerHTML = "<p>Model offloaded to cloud (in a real app, this would call a cloud API).</p>";
+      blurBtn.disabled = false;
+      return;
+    }
+    
+    // Step 4: Download weights + bind + compile + compute
+    status.innerHTML = "<p>Step 4: Downloading model weights...</p>";
+    const executionCard = document.getElementById("execution-card");
+    executionCard.style.display = "block";
+    const execLog = document.getElementById("execution-log");
+    
+    const downloadStart = performance.now();
+    const weights = await loadWeights((progress) => {
+      status.innerHTML = `<p>Downloading weights: ${progress.label}...</p>`;
+    });
+    const downloadTime = performance.now() - downloadStart;
+    execLog.innerHTML = `<p>Weight download: ${(downloadTime/1000).toFixed(1)}s</p>`;
+    
+    // Bind constants
+    status.innerHTML = "<p>Step 4: Binding constants...</p>";
+    await context.bindConstants(graph, weights);
+    
+    // Compile graph (separate from inference)
+    status.innerHTML = "<p>Step 4: Compiling native graph...</p>";
+    const compileStart = performance.now();
+    await context.compileGraph(graph);
+    const compileTime = performance.now() - compileStart;
+    execLog.innerHTML += `<p>Graph compilation: ${(compileTime/1000).toFixed(1)}s</p>`;
+    
+    // Prepare input
+    status.innerHTML = "<p>Step 5: Running inference...</p>";
+    if (!renderer) {
+        throw new Error("Renderer not initialized");
+    }
+    const inputData = renderer.getModelInput(256, 256, "nhwc");
+    
+    // Run compute with timing (compilation already done — this is pure inference)
+    const inferStart = performance.now();
+    const results = await context.compute(graph, { input: inputData });
+    const inferTime = performance.now() - inferStart;
+    
+    // Apply blur
+    const mask = new Float32Array(results.output);
+    renderer.applyBlur(mask);
+    
+    // Step 5: Show timing comparison
+    const timingDiv = document.getElementById("timing-comparison");
+    
+    timingDiv.innerHTML = `
+      <h4>Timing Comparison</h4>
+      <table>
+        <tr><th>Metric</th><th>Value</th></tr>
+        <tr><td>Estimated (Interpolation)</td><td>${qos.internal.totalTimeMs.toFixed(2)} ms</td></tr>
+        <tr><td>Actual Inference</td><td>${inferTime.toFixed(2)} ms</td></tr>
+        <tr><td>Graph Compilation</td><td>${compileTime.toFixed(0)} ms</td></tr>
+        <tr><td>Weight Download</td><td>${downloadTime.toFixed(0)} ms</td></tr>
+      </table>`;
+    
+    status.innerHTML = `<p style="color: #28a745; font-weight: bold;">Background blur applied successfully!</p>`;
+    
+  } catch (err) {
+    status.innerHTML = `<p style="color: #dc3545;">Error: ${err.message}</p>`;
+    console.error(err);
+  } finally {
+    blurBtn.disabled = false;
+  }
+};
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/daop-illustration/examples/background-blur/blur-renderer.js b/daop-illustration/examples/background-blur/blur-renderer.js
new file mode 100644
index 0000000..ba57d4e
--- /dev/null
+++ b/daop-illustration/examples/background-blur/blur-renderer.js
@@ -0,0 +1,151 @@
+// src/demo/background-blur/blur-renderer.js
+
+/**
+ * BlurRenderer — image processing for background blur demo.
+ *
+ * Handles: image loading → resize → model input prep → mask application → display
+ */
+export class BlurRenderer {
+  /**
+   * @param {HTMLCanvasElement} canvas - Display canvas
+   */
+  constructor(canvas) {
+    this.canvas = canvas;
+    this.ctx = canvas.getContext("2d");
+    this._originalImage = null;
+  }
+
+  /**
+   * Load an image from URL into the canvas.
+   */
+  async loadImage(url) {
+    return new Promise((resolve, reject) => {
+      const img = new Image();
+      img.crossOrigin = "anonymous";
+      img.onload = () => {
+        this._originalImage = img;
+        this.canvas.width = img.width;
+        this.canvas.height = img.height;
+        this.ctx.drawImage(img, 0, 0);
+        resolve(img);
+      };
+      img.onerror = reject;
+      img.src = url;
+    });
+  }
+
+  /**
+   * Get the current image as a Float32Array suitable for model input.
+   * Resizes to modelWidth × modelHeight and normalizes to [0, 1].
+   *
+   * @param {number} modelWidth
+   * @param {number} modelHeight
+   * @param {string} layout - "nchw" or "nhwc"
+   * @returns {Float32Array}
+   */
+  getModelInput(modelWidth, modelHeight, layout = "nchw") {
+    if (!this._originalImage) throw new Error("No image loaded");
+
+    // Create offscreen canvas for resize
+    const offscreen = document.createElement("canvas");
+    offscreen.width = modelWidth;
+    offscreen.height = modelHeight;
+    const offCtx = offscreen.getContext("2d");
+    offCtx.drawImage(this._originalImage, 0, 0, modelWidth, modelHeight);
+
+    const imageData = offCtx.getImageData(0, 0, modelWidth, modelHeight);
+    const { data } = imageData; // RGBA uint8
+
+    const size = modelWidth * modelHeight;
+    const float32 = new Float32Array(1 * 3 * size);
+
+    if (layout === "nchw") {
+      // [1, 3, H, W]
+      for (let i = 0; i < size; i++) {
+        float32[i] = data[i * 4] / 255.0;             // R
+        float32[size + i] = data[i * 4 + 1] / 255.0;  // G
+        float32[2 * size + i] = data[i * 4 + 2] / 255.0; // B
+      }
+    } else {
+      // [1, H, W, 3]
+      for (let i = 0; i < size; i++) {
+        float32[i * 3] = data[i * 4] / 255.0;
+        float32[i * 3 + 1] = data[i * 4 + 1] / 255.0;
+        float32[i * 3 + 2] = data[i * 4 + 2] / 255.0;
+      }
+    }
+
+    return float32;
+  }
+
+  /**
+   * Apply segmentation mask to blur the background.
+   *
+   * @param {Float32Array} mask - 256x256 segmentation mask (0=bg, 1=fg)
+   * @param {number} blurRadius - CSS blur radius in px
+   */
+  applyBlur(mask, blurRadius = 15) {
+    if (!this._originalImage) throw new Error("No image loaded");
+
+    const { width, height } = this.canvas;
+
+    // 1. Draw blurred version
+    const blurCanvas = document.createElement("canvas");
+    blurCanvas.width = width;
+    blurCanvas.height = height;
+    const blurCtx = blurCanvas.getContext("2d");
+    blurCtx.filter = `blur(${blurRadius}px)`;
+    blurCtx.drawImage(this._originalImage, 0, 0, width, height);
+
+    // 2. Scale mask to original image size
+    const maskCanvas = document.createElement("canvas");
+    maskCanvas.width = 256;
+    maskCanvas.height = 256;
+    const maskCtx = maskCanvas.getContext("2d");
+    const maskImageData = maskCtx.createImageData(256, 256);
+
+    for (let i = 0; i < 256 * 256; i++) {
+      const val = Math.round(mask[i] * 255);
+      maskImageData.data[i * 4] = val;
+      maskImageData.data[i * 4 + 1] = val;
+      maskImageData.data[i * 4 + 2] = val;
+      maskImageData.data[i * 4 + 3] = 255;
+    }
+    maskCtx.putImageData(maskImageData, 0, 0);
+
+    // Scale mask to image size
+    const scaledMaskCanvas = document.createElement("canvas");
+    scaledMaskCanvas.width = width;
+    scaledMaskCanvas.height = height;
+    const scaledMaskCtx = scaledMaskCanvas.getContext("2d");
+    scaledMaskCtx.drawImage(maskCanvas, 0, 0, width, height);
+
+    // 3. Composite: foreground (original) where mask=1, background (blurred) where mask=0
+    this.ctx.drawImage(blurCanvas, 0, 0); // Start with blurred
+
+    // Use mask as alpha for original image
+    const origCanvas = document.createElement("canvas");
+    origCanvas.width = width;
+    origCanvas.height = height;
+    const origCtx = origCanvas.getContext("2d");
+    origCtx.drawImage(this._originalImage, 0, 0, width, height);
+
+    const origData = origCtx.getImageData(0, 0, width, height);
+    const scaledMask = scaledMaskCtx.getImageData(0, 0, width, height);
+
+    // Apply mask alpha
+    for (let i = 0; i < origData.data.length; i += 4) {
+      origData.data[i + 3] = scaledMask.data[i]; // Use R channel of mask as alpha
+    }
+    origCtx.putImageData(origData, 0, 0);
+
+    this.ctx.drawImage(origCanvas, 0, 0);
+  }
+
+  /** Reset to original image */
+  reset() {
+    if (this._originalImage) {
+      this.ctx.drawImage(this._originalImage, 0, 0);
+    }
+  }
+}
diff --git a/daop-illustration/examples/background-blur/meeting.jpg b/daop-illustration/examples/background-blur/meeting.jpg
new file mode 100644
index 0000000..1627b81
Binary files /dev/null and b/daop-illustration/examples/background-blur/meeting.jpg differ
diff --git a/daop-illustration/examples/background-blur/selfie-model.js b/daop-illustration/examples/background-blur/selfie-model.js
new file mode 100644
index 0000000..b3ef6c3
--- /dev/null
+++ b/daop-illustration/examples/background-blur/selfie-model.js
@@ -0,0 +1,349 @@
+// src/demo/background-blur/selfie-model.js
+//
+// MediaPipe Selfie Segmentation (General) — Model Graph Builder
+// Input: [1, 256, 256, 3] NHWC float32 → Output: [1, 256, 256, 1] segmentation mask
+//
+// Architecture faithfully mirrors webmachinelearning/webnn-samples reference:
+//   github.com/webmachinelearning/webnn-samples/blob/master/
+//     selfie_segmentation/selfie_segmentation_general.js
+
+const WEIGHTS_BASE_URL =
+  "https://webmachinelearning.github.io/test-data/models/selfie_segmentation/general";
+
+/**
+ * Weight and bias tensor shapes from the official model metadata.
+ *
+ * Standard convs use filterLayout "ohwi": [outCh, H, W, inCh]
+ * Depthwise convs use filterLayout "ihwo": [inCh/groups, H, W, outCh]
+ * Bias is always 1D: [outChannels]
+ */
+const WEIGHT_SHAPES = {
+  conv0:  { weight: [16, 3, 3, 3],    bias: [16]  },
+  conv1:  { weight: [16, 1, 1, 16],   bias: [16]  },
+  conv2:  { weight: [1, 3, 3, 16],    bias: [16]  },  // depthwise
+  conv3:  { weight: [8, 1, 1, 16],    bias: [8]   },
+  conv4:  { weight: [16, 1, 1, 8],    bias: [16]  },
+  conv5:  { weight: [16, 1, 1, 16],   bias: [16]  },
+  conv6:  { weight: [72, 1, 1, 16],   bias: [72]  },
+  conv7:  { weight: [1, 3, 3, 72],    bias: [72]  },  // depthwise
+  conv8:  { weight: [24, 1, 1, 72],   bias: [24]  },
+  conv9:  { weight: [88, 1, 1, 24],   bias: [88]  },
+  conv10: { weight: [1, 3, 3, 88],    bias: [88]  },  // depthwise
+  conv11: { weight: [24, 1, 1, 88],   bias: [24]  },
+  conv12: { weight: [96, 1, 1, 24],   bias: [96]  },
+  conv13: { weight: [1, 5, 5, 96],    bias: [96]  },  // depthwise
+  conv14: { weight: [24, 1, 1, 96],   bias: [24]  },
+  conv15: { weight: [96, 1, 1, 24],   bias: [96]  },
+  conv16: { weight: [32, 1, 1, 96],   bias: [32]  },
+  conv17: { weight: [128, 1, 1, 32],  bias: [128] },
+  conv18: { weight: [1, 5, 5, 128],   bias: [128] },  // depthwise
+  conv19: { weight: [32, 1, 1, 128],  bias: [32]  },
+  conv20: { weight: [128, 1, 1, 32],  bias: [128] },
+  conv21: { weight: [32, 1, 1, 128],  bias: [32]  },
+  conv22: { weight: [128, 1, 1, 32],  bias: [128] },
+  conv23: { weight: [1, 5, 5, 128],   bias: [128] },  // depthwise
+  conv24: { weight: [32, 1, 1, 128],  bias: [32]  },
+  conv25: { weight: [128, 1, 1, 32],  bias: [128] },
+  conv26: { weight: [32, 1, 1, 128],  bias: [32]  },
+  conv27: { weight: [96, 1, 1, 32],   bias: [96]  },
+  conv28: { weight: [1, 5, 5, 96],    bias: [96]  },  // depthwise
+  conv29: { weight: [24, 1, 1, 96],   bias: [24]  },
+  conv30: { weight: [96, 1, 1, 24],   bias: [96]  },
+  conv31: { weight: [32, 1, 1, 96],   bias: [32]  },
+  conv32: { weight: [96, 1, 1, 32],   bias: [96]  },
+  conv33: { weight: [1, 5, 5, 96],    bias: [96]  },  // depthwise
+  conv34: { weight: [24, 1, 1, 96],   bias: [24]  },
+  conv35: { weight: [96, 1, 1, 24],   bias: [96]  },
+  conv36: { weight: [32, 1, 1, 96],   bias: [32]  },
+  conv37: { weight: [128, 1, 1, 32],  bias: [128] },
+  conv38: { weight: [128, 1, 1, 32],  bias: [128] },
+  conv39: { weight: [24, 1, 1, 128],  bias: [24]  },
+  conv40: { weight: [24, 1, 1, 24],   bias: [24]  },
+  conv41: { weight: [24, 1, 1, 24],   bias: [24]  },
+  conv42: { weight: [24, 1, 1, 24],   bias: [24]  },
+  conv43: { weight: [1, 3, 3, 24],    bias: [24]  },  // depthwise
+  conv44: { weight: [16, 1, 1, 24],   bias: [16]  },
+  conv45: { weight: [16, 1, 1, 16],   bias: [16]  },
+  conv46: { weight: [16, 1, 1, 16],   bias: [16]  },
+  conv47: { weight: [16, 1, 1, 16],   bias: [16]  },
+  conv48: { weight: [1, 3, 3, 16],    bias: [16]  },  // depthwise
+  conv49: { weight: [16, 1, 1, 16],   bias: [16]  },
+  conv50: { weight: [16, 1, 1, 16],   bias: [16]  },
+  conv51: { weight: [16, 1, 1, 16],   bias: [16]  },
+  conv52: { weight: [16, 1, 1, 16],   bias: [16]  },
+  conv53: { weight: [1, 3, 3, 16],    bias: [16]  },  // depthwise
+  convTranspose0: { weight: [1, 2, 2, 16], bias: null },
+};
+
+/**
+ * Depthwise convolutions (groups == inputChannels) use filterLayout "ihwo";
+ * standard convolutions use "ohwi".
+ */
+function conv(builder, input, index, activation, options = {}) {
+  const shapes = WEIGHT_SHAPES[`conv${index}`];
+  const weight = builder.constant({
+    shape: shapes.weight, dataType: "float32", label: `conv${index}_weight`,
+  });
+  const bias = builder.constant({
+    shape: shapes.bias, dataType: "float32", label: `conv${index}_bias`,
+  });
+
+  const isDepthwise = options.groups > 1;
+  const convOut = builder.conv2d(input, weight, {
+    ...options,
+    bias,
+    inputLayout: "nhwc",
+    filterLayout: isDepthwise ? "ihwo" : "ohwi",
+  });
+
+  if (activation === "relu") return builder.relu(convOut);
+  if (activation === "sigmoid") return builder.sigmoid(convOut);
+  return convOut;
+}
+
+/**
+ * SubGraphA — hardswish-like activation fused with convolution:
+ *   out = conv(input) * clamp(conv(input) + 3, 0, 6) * (1/6)
+ */
+function subGraphA(builder, input, convIndex, addB, mulA, convOptions = {}) {
+  const c = conv(builder, input, convIndex, "", convOptions);
+  const added = builder.add(c, addB);
+  const clamped = builder.clamp(added, { minValue: 0, maxValue: 6 });
+  const scaled = builder.mul(mulA, clamped);
+  return builder.mul(c, scaled);
+}
+
+/**
+ * SubGraphB — SE-like attention block:
+ *   avgPool → conv(relu) → conv(sigmoid) → mul(mulTarget || input)
+ */
+function subGraphB(builder, input, convIndex, poolStride, mulTarget) {
+  const strides = [poolStride, poolStride];
+  const pooled = builder.averagePool2d(input, {
+    windowDimensions: strides,
+    strides,
+    layout: "nhwc",
+  });
+  const reduced = conv(builder, pooled, convIndex, "relu");
+  const gate = conv(builder, reduced, convIndex + 1, "sigmoid");
+  return builder.mul(mulTarget || input, gate);
+}
+
+/**
+ * Build the full Selfie Segmentation General graph.
+ *
+ * Weight constants include correct tensor shapes (from the official model
+ * metadata) but no buffer data — they are "weightless". This enables shape
+ * inference and QoS estimation before any weight download. Actual weight
+ * buffers are attached later via graph.bindConstants().
+ *
+ * @param {MLGraphBuilder} builder - DAOP or native graph builder
+ * @returns {{ graph: Object, weightMeta: null }}
+ */
+export function buildSelfieSegmentationGraph(builder) {
+  const input = builder.input("input", { dataType: "float32", shape: [1, 256, 256, 3] });
+
+  const addB = builder.constant(
+    { shape: [1, 1, 1, 1], dataType: "float32", label: null },
+    new Float32Array([3]),
+  );
+  const mulA = builder.constantScalar("float32", 0.1666666716337204);
+
+  // ── Encoder ──────────────────────────────────────────────
+
+  const sgA0 = subGraphA(builder, input, 0, addB, mulA, {
+    strides: [2, 2],
+    padding: [0, 1, 0, 1],
+  });
+
+  const c1 = conv(builder, sgA0, 1, "relu");
+  const c2 = conv(builder, c1, 2, "relu", {
+    strides: [2, 2],
+    padding: [0, 1, 0, 1],
+    groups: 16,
+  });
+
+  const sgB0 = subGraphB(builder, c2, 3, 64);
+
+  const c5 = conv(builder, sgB0, 5, "");
+  const c6 = conv(builder, c5, 6, "relu");
+  const c7 = conv(builder, c6, 7, "relu", {
+    strides: [2, 2],
+    padding: [0, 1, 0, 1],
+    groups: 72,
+  });
+  const c8 = conv(builder, c7, 8, "");
+
+  const c9 = conv(builder, c8, 9, "relu");
+  const c10 = conv(builder, c9, 10, "relu", {
+    padding: [1, 1, 1, 1],
+    groups: 88,
+  });
+  const c11 = conv(builder, c10, 11, "");
+  const add0 = builder.add(c11, c8);
+
+  const sgA1 = subGraphA(builder, add0, 12, addB, mulA);
+  const sgA2 = subGraphA(builder, sgA1, 13, addB, mulA, {
+    strides: [2, 2],
+    padding: [1, 2, 1, 2],
+    groups: 96,
+  });
+  const sgB1 = subGraphB(builder, sgA2, 14, 16);
+  const c16 = conv(builder, sgB1, 16, "");
+
+  const sgA3 = subGraphA(builder, c16, 17, addB, mulA);
+  const sgA4 = subGraphA(builder, sgA3, 18, addB, mulA, {
+    padding: [2, 2, 2, 2],
+    groups: 128,
+  });
+  const sgB2 = subGraphB(builder, sgA4, 19, 16);
+  const c21 = conv(builder, sgB2, 21, "");
+  const add1 = builder.add(c21, c16);
+
+  const sgA5 = subGraphA(builder, add1, 22, addB, mulA);
+  const sgA6 = subGraphA(builder, sgA5, 23, addB, mulA, {
+    padding: [2, 2, 2, 2],
+    groups: 128,
+  });
+  const sgB3 = subGraphB(builder, sgA6, 24, 16);
+  const c26 = conv(builder, sgB3, 26, "");
+  const add2 = builder.add(c26, add1);
+
+  const sgA7 = subGraphA(builder, add2, 27, addB, mulA);
+  const sgA8 = subGraphA(builder, sgA7, 28, addB, mulA, {
+    padding: [2, 2, 2, 2],
+    groups: 96,
+  });
+  const sgB4 = subGraphB(builder, sgA8, 29, 16);
+  const c31 = conv(builder, sgB4, 31, "");
+  const add3 = builder.add(c31, add2);
+
+  const sgA9 = subGraphA(builder, add3, 32, addB, mulA);
+  const sgA10 = subGraphA(builder, sgA9, 33, addB, mulA, {
+    padding: [2, 2, 2, 2],
+    groups: 96,
+  });
+  const sgB5 = subGraphB(builder, sgA10, 34, 16);
+  const c36 = conv(builder, sgB5, 36, "");
+  const add4 = builder.add(c36, add3);
+
+  // ── Decoder ──────────────────────────────────────────────
+
+  const c37 = conv(builder, add4, 37, "relu");
+  const avgPool0 = builder.averagePool2d(add4, {
+    windowDimensions: [16, 16],
+    strides: [16, 16],
+    layout: "nhwc",
+  });
+  const c38 = conv(builder, avgPool0, 38, "sigmoid");
+  const mul0 = builder.mul(c37, c38);
+
+  const resample0 = builder.resample2d(mul0, {
+    sizes: [32, 32],
+    mode: "linear",
+    axes: [1, 2],
+  });
+  const c39 = conv(builder, resample0, 39, "");
+  const add5 = builder.add(c39, add0);
+
+  const sgB6 = subGraphB(builder, add5, 40, 32, add0);
+  const add6 = builder.add(sgB6, c39);
+
+  const c42 = conv(builder, add6, 42, "relu");
+  const c43 = conv(builder, c42, 43, "relu", {
+    padding: [1, 1, 1, 1],
+    groups: 24,
+  });
+  const add7 = builder.add(c42, c43);
+
+  const resample1 = builder.resample2d(add7, {
+    sizes: [64, 64],
+    mode: "linear",
+    axes: [1, 2],
+  });
+  const c44 = conv(builder, resample1, 44, "");
+  const add8 = builder.add(c5, c44);
+
+  const sgB7 = subGraphB(builder, add8, 45, 64, c5);
+  const add9 = builder.add(sgB7, c44);
+
+  const c47 = conv(builder, add9, 47, "relu");
+  const c48 = conv(builder, c47, 48, "relu", {
+    padding: [1, 1, 1, 1],
+    groups: 16,
+  });
+  const add10 = builder.add(c47, c48);
+
+  const resample2 = builder.resample2d(add10, {
+    sizes: [128, 128],
+    mode: "linear",
+    axes: [1, 2],
+  });
+  const c49 = conv(builder, resample2, 49, "");
+  const add11 = builder.add(sgA0, c49);
+
+  const sgB8 = subGraphB(builder, add11, 50, 128, sgA0);
+  const add12 = builder.add(sgB8, c49);
+
+  const c52 = conv(builder, add12, 52, "relu");
+  const c53 = conv(builder, c52, 53, "relu", {
+    padding: [1, 1, 1, 1],
+    groups: 16,
+  });
+  const add13 = builder.add(c52, c53);
+
+  // ── Final: convTranspose2d 128→256 + sigmoid ───────────
+
+  const convTransposeWeight = builder.constant({
+    shape: [1, 2, 2, 16], dataType: "float32", label: "convTranspose0_weight",
+  });
+  const convTransposeBias = builder.constant(
+    { shape: [1], dataType: "float32", label: null },
+    new Float32Array([0.53271484375]),
+  );
+  const convTranspose = builder.convTranspose2d(add13, convTransposeWeight, {
+    bias: convTransposeBias,
+    padding: [0, 0, 0, 0],
+    strides: [2, 2],
+    outputSizes: [256, 256],
+    filterLayout: "ohwi",
+    inputLayout: "nhwc",
+  });
+
+  const output = builder.sigmoid(convTranspose);
+
+  const graph = builder.build({ output });
+  return { graph, weightMeta: null };
+}
+
+/**
+ * @param {Function} [onProgress] - ({ label, phase }) callback
+ * @returns {Promise<Object<string, Float32Array>>}
+ */
+export async function loadWeights(onProgress) {
+  const [weightsInfo, biasesInfo] = await Promise.all([
+    fetch(`${WEIGHTS_BASE_URL}/weights_nhwc.json`).then(r => r.json()),
+    fetch(`${WEIGHTS_BASE_URL}/biases.json`).then(r => r.json()),
+  ]);
+
+  const [weightsBin, biasesBin] = await Promise.all([
+    fetch(`${WEIGHTS_BASE_URL}/weights_nhwc.bin`).then(r => r.arrayBuffer()),
+    fetch(`${WEIGHTS_BASE_URL}/biases.bin`).then(r => r.arrayBuffer()),
+  ]);
+
+  const weights = {};
+
+  for (const [name, meta] of Object.entries(weightsInfo)) {
+    const label = `${name}_weight`;
+    weights[label] = new Float32Array(weightsBin, meta.dataOffset, meta.byteLength / 4);
+    if (onProgress) onProgress({ label, phase: "weight" });
+  }
+
+  for (const [name, meta] of Object.entries(biasesInfo)) {
+    const label = `${name}_bias`;
+    weights[label] = new Float32Array(biasesBin, meta.dataOffset, meta.byteLength / 4);
+    if (onProgress) onProgress({ label, phase: "bias" });
+  }
+
+  return weights;
+}
diff --git a/daop-illustration/package.json b/daop-illustration/package.json
new file mode 100644
index 0000000..e6b519c
--- /dev/null
+++ b/daop-illustration/package.json
@@ -0,0 +1,14 @@
+{
+  "name": "daop-illustration",
+  "version": "1.0.0",
+  "description": "DAOP (Dynamic AI Offloading Protocol) illustration using WebNN",
+  "main": "src/daop/index.js",
+  "type": "module",
+  "scripts": {
+    "start": "node demo-server.js",
+    "test": "echo \"Error: no test specified\" && exit 1"
+  },
+  "keywords": [],
+  "author": "",
+  "license": "ISC"
+}
diff --git a/daop-illustration/src/daop-context.js b/daop-illustration/src/daop-context.js
new file mode 100644
index 0000000..a4d8f73
--- /dev/null
+++ b/daop-illustration/src/daop-context.js
@@ -0,0 +1,131 @@
+// src/daop-context.js
+
+import { estimateQoSInterp } from "./qos/estimate-qos-interp.js";
+import { DAOPGraphBuilder } from "./daop-graph-builder.js";
+
+/**
+ * DAOPContext — wraps native MLContext with DAOP extensions.
+ *
+ * Provides the standard MLContext interface plus:
+ * - estimateQoS(graph, options) — QoS estimation on weightless graph
+ * - bindConstants(graph, constants) — late-bind weight buffers
+ * - compute(graph, inputs) — replay IR → native WebNN → execute
+ */
+export class DAOPContext {
+  /**
+   * @param {MLContext} nativeContext - The real browser-provided MLContext
+   * @param {Object} options - Creation options (deviceType, etc.)
+   */
+  constructor(nativeContext, options = {}) {
+    this._native = nativeContext;
+    this.deviceType = options.deviceType || "gpu";
+  }
+
+  /** Access the underlying native MLContext */
+  get nativeContext() {
+    return this._native;
+  }
+
+  /**
+   * DAOP Extension: Estimate QoS for a weightless graph.
+   *
+   * @param {import("./daop-graph.js").DAOPGraph} graph
+   * @param {Object} [options={}]
+   * @returns {Object} QoS report with performanceTier
+   */
+  async estimateQoS(graph, options = {}) {
+    return estimateQoSInterp(graph, options);
+  }
+
+  /**
+   * DAOP Extension: Bind constants to a weightless graph.
+   *
+   * @param {import("./daop-graph.js").DAOPGraph} graph
+   * @param {Object<string, ArrayBufferView>} constants - label → typed array
+   */
+  async bindConstants(graph, constants) {
+    graph.bindConstants(constants);
+    return { status: "success" };
+  }
+
+  /**
+   * DAOP Extension: Pre-compile a fully-bound graph for execution.
+   *
+   * Replays the IR into native WebNN and compiles it. Call this before
+   * compute() to separate compilation latency from inference latency.
+   *
+   * @param {import("./daop-graph.js").DAOPGraph} graph
+   * @returns {Promise<void>}
+   */
+  async compileGraph(graph) {
+    if (!graph.isFullyBound()) {
+      throw new Error("[DAOP] Cannot compile: not all constants are bound. Call bindConstants() first.");
+    }
+    await graph.compile(this._native);
+  }
+
+  /**
+   * Compile and execute a fully-bound graph.
+   *
+   * Replays IR into native WebNN, compiles, dispatches, and returns results.
+   *
+   * @param {import("./daop-graph.js").DAOPGraph} graph
+   * @param {Object<string, MLTensor|ArrayBufferView>} inputs
+   * @returns {Promise<Object<string, ArrayBuffer>>}
+   */
+  async compute(graph, inputs) {
+    if (!graph.isFullyBound()) {
+      throw new Error("[DAOP] Cannot compute: not all constants are bound. Call bindConstants() first.");
+    }
+
+    // Compile (replay IR → native WebNN graph)
+    const { graph: nativeGraph, nativeContext } = await graph.compile(this._native);
+
+    // Create input/output tensors
+    const inputTensors = {};
+    const outputTensors = {};
+
+    // Create input MLTensors
+    for (const irInput of graph.ir.getInputs()) {
+      const inputData = inputs[irInput.name];
+      if (!inputData) {
+        throw new Error(`[DAOP] Missing input "${irInput.name}"`);
+      }
+      const tensor = await nativeContext.createTensor({
+        dataType: irInput.desc.dataType,
+        shape: irInput.desc.shape,
+        writable: true,
+        readable: false,
+      });
+      nativeContext.writeTensor(tensor, inputData);
+      inputTensors[irInput.name] = tensor;
+    }
+
+    // Create output MLTensors
+    for (const [outName, outOpId] of graph.ir.outputs) {
+      const outOp = graph.ir.getOperand(outOpId);
+      const tensor = await nativeContext.createTensor({
+        dataType: outOp.desc.dataType,
+        shape: outOp.desc.shape,
+        writable: false,
+        readable: true,
+      });
+      outputTensors[outName] = tensor;
+    }
+
+    // Dispatch
+    nativeContext.dispatch(nativeGraph, inputTensors, outputTensors);
+
+    // Read back results
+    const results = {};
+    for (const [name, tensor] of Object.entries(outputTensors)) {
+      results[name] = await nativeContext.readTensor(tensor);
+    }
+
+    // Cleanup tensors
+    for (const t of Object.values(inputTensors)) t.destroy();
+    for (const t of Object.values(outputTensors)) t.destroy();
+
+    return results;
+  }
+}
diff --git a/daop-illustration/src/daop-graph-builder.js b/daop-illustration/src/daop-graph-builder.js
new file mode 100644
index 0000000..f5b1180
--- /dev/null
+++ b/daop-illustration/src/daop-graph-builder.js
@@ -0,0 +1,209 @@
+// src/daop-graph-builder.js
+
+import { TensorDesc, IROperand, IRNode, IRGraph, generateId } from "./ir/graph-ir.js";
+import { inferShape } from "./ir/shape-inference.js";
+import { DAOPGraph } from "./daop-graph.js";
+
+/**
+ * DAOPGraphBuilder — mirrors WebNN MLGraphBuilder API.
+ *
+ * Builds an IR graph for weightless QoS estimation. When the application
+ * later binds constants and calls compute(), the IR is replayed into a
+ * real native MLGraphBuilder.
+ */
+export class DAOPGraphBuilder {
+  /**
+   * @param {DAOPContext} context
+   */
+  constructor(context) {
+    this.context = context;
+    this._ir = new IRGraph();
+  }
+
+  /**
+   * Declare a named input operand.
+   * Mirrors: MLGraphBuilder.input(name, descriptor)
+   */
+  input(name, descriptor) {
+    const desc = new TensorDesc(descriptor);
+    const operand = new IROperand({
+      id: name,
+      kind: "input",
+      desc,
+      name,
+    });
+    this._ir.addOperand(operand);
+    return name;
+  }
+
+  /**
+   * Declare a constant operand (optionally weightless).
+   * Mirrors: MLGraphBuilder.constant(descriptor, bufferView?)
+   *
+   * DAOP Extension: When called with only a descriptor (no buffer),
+   * creates a "weightless" constant that can be bound later via
+   * context.bindConstants(). The descriptor MUST include a `label`
+   * for late-binding identification.
+   */
+  constant(descriptorOrType, bufferOrValue = null) {
+    // Support native 2-arg scalar form: constant(dataType, value)
+    if (typeof descriptorOrType === "string") {
+      return this.constantScalar(descriptorOrType, bufferOrValue);
+    }
+    const desc = new TensorDesc(descriptorOrType);
+    const label = descriptorOrType.label || null;
+    const id = label || generateId("const");
+    const operand = new IROperand({
+      id,
+      kind: "constant",
+      desc,
+      label,
+      buffer: bufferOrValue || null,
+    });
+    this._ir.addOperand(operand);
+    return id;
+  }
+
+  /**
+   * Scalar constant helper.
+   * Mirrors: MLGraphBuilder.constant(dataType, value)
+   */
+  constantScalar(dataType, value) {
+    const id = generateId("scalar");
+    const desc = new TensorDesc({ shape: [], dataType });
+    const operand = new IROperand({
+      id,
+      kind: "constant",
+      desc,
+      buffer: value, // Store scalar value directly
+    });
+    this._ir.addOperand(operand);
+    return id;
+  }
+
+  // ─── Operators ───────────────────────────────────────────
+
+  conv2d(input, filter, options = {}) {
+    return this._addOp("conv2d", [input, filter], options);
+  }
+
+  convTranspose2d(input, filter, options = {}) {
+    return this._addOp("convTranspose2d", [input, filter], options);
+  }
+
+  add(a, b) {
+    return this._addOp("add", [a, b]);
+  }
+
+  sub(a, b) {
+    return this._addOp("sub", [a, b]);
+  }
+
+  mul(a, b) {
+    return this._addOp("mul", [a, b]);
+  }
+
+  div(a, b) {
+    return this._addOp("div", [a, b]);
+  }
+
+  relu(input) {
+    return this._addOp("relu", [input]);
+  }
+
+  sigmoid(input) {
+    return this._addOp("sigmoid", [input]);
+  }
+
+  tanh(input) {
+    return this._addOp("tanh", [input]);
+  }
+
+  clamp(input, options = {}) {
+    return this._addOp("clamp", [input], options);
+  }
+
+  averagePool2d(input, options = {}) {
+    return this._addOp("averagePool2d", [input], options);
+  }
+
+  maxPool2d(input, options = {}) {
+    return this._addOp("maxPool2d", [input], options);
+  }
+
+  matmul(a, b) {
+    return this._addOp("matmul", [a, b]);
+  }
+
+  softmax(input, axis) {
+    return this._addOp("softmax", [input], { axis });
+  }
+
+  reshape(input, newShape) {
+    return this._addOp("reshape", [input], { newShape });
+  }
+
+  transpose(input, options = {}) {
+    return this._addOp("transpose", [input], options);
+  }
+
+  concat(inputs, axis) {
+    return this._addOp("concat", inputs, { axis });
+  }
+
+  resample2d(input, options = {}) {
+    return this._addOp("resample2d", [input], options);
+  }
+
+  // ─── Build ───────────────────────────────────────────────
+
+  /**
+   * Build the graph.
+   * Mirrors: MLGraphBuilder.build(outputs)
+   *
+   * @param {Object<string, string>} outputs - Map of output name → operand ID
+   * @returns {DAOPGraph}
+   */
+  build(outputs) {
+    for (const [name, operandId] of Object.entries(outputs)) {
+      this._ir.outputs.set(name, operandId);
+    }
+    return new DAOPGraph(this._ir, this.context);
+  }
+
+  // ─── Internal ────────────────────────────────────────────
+
+  /**
+   * Add an operator node to the IR, infer output shape, return output operand ID.
+   */
+  _addOp(opType, inputIds, attrs = {}) {
+    const inputDescs = inputIds.map(id => {
+      const operand = this._ir.getOperand(id);
+      if (!operand) {
+        throw new Error(`[DAOP] Unknown operand ID: ${id}`);
+      }
+      return { shape: operand.desc.shape, dataType: operand.desc.dataType };
+    });
+
+    const outputDescs = inferShape(opType, inputDescs, attrs);
+    const outputIds = outputDescs.map((desc, i) => {
+      const id = generateId(opType);
+      const operand = new IROperand({
+        id,
+        kind: "intermediate",
+        desc: new TensorDesc(desc),
+      });
+      this._ir.addOperand(operand);
+      return id;
+    });
+
+    this._ir.addNode(new IRNode({
+      opType,
+      inputs: inputIds,
+      outputs: outputIds,
+      attrs,
+    }));
+
+    return outputIds.length === 1 ? outputIds[0] : outputIds;
+  }
+}
diff --git a/daop-illustration/src/daop-graph.js b/daop-illustration/src/daop-graph.js
new file mode 100644
index 0000000..da94e8d
--- /dev/null
+++ b/daop-illustration/src/daop-graph.js
@@ -0,0 +1,281 @@
+// src/daop-graph.js
+
+import { getNativeMLGraphBuilder } from "./polyfill.js";
+
+/**
+ * DAOPGraph — IR graph container with replay capability.
+ *
+ * Holds the recorded IR from DAOPGraphBuilder. Provides:
+ * - Topology inspection for QoS estimation and visualization
+ * - Weight binding for labeled constants
+ * - Replay into native WebNN for real execution
+ */
+export class DAOPGraph {
+  /**
+   * @param {import("./ir/graph-ir.js").IRGraph} ir
+   * @param {import("./daop-context.js").DAOPContext} context
+   */
+  constructor(ir, context) {
+    this._ir = ir;
+    this._context = context;
+    this._boundWeights = new Map();
+    this._compiledGraph = null; // Cached native MLGraph
+    this._compiledTensors = null; // Cached native MLTensors for constants
+  }
+
+  /** Access the IR for estimation and visualization */
+  get ir() {
+    return this._ir;
+  }
+
+  /** Get all nodes (operators) for iteration */
+  get nodes() {
+    return this._ir.nodes;
+  }
+
+  /** Get all operands */
+  get operands() {
+    return this._ir.operands;
+  }
+
+  /**
+   * Bind weight buffers to labeled constants.
+   *
+   * @param {Object<string, ArrayBufferView>} constants - label → buffer map
+   */
+  bindConstants(constants) {
+    for (const [label, buffer] of Object.entries(constants)) {
+      const operand = this._ir.getOperand(label);
+      if (!operand) {
+        console.warn(`[DAOP] bindConstants: no operand with label "${label}"`);
+        continue;
+      }
+      if (operand.kind !== "constant") {
+        console.warn(`[DAOP] bindConstants: operand "${label}" is not a constant`);
+        continue;
+      }
+      operand.buffer = buffer;
+      this._boundWeights.set(label, buffer);
+    }
+    // Invalidate cached compilation
+    this._compiledGraph = null;
+    this._compiledTensors = null;
+  }
+
+  /**
+   * Check if all constants have been bound.
+   */
+  isFullyBound() {
+    return this._ir.isFullyBound();
+  }
+
+  /**
+   * Replay the IR into a native WebNN graph and compile it.
+   *
+   * @param {MLContext} nativeContext - The real native WebNN context
+   * @returns {Promise<{graph: MLGraph, nativeContext: MLContext}>}
+   */
+  async compile(nativeContext) {
+    if (this._compiledGraph) {
+      return this._compiledGraph;
+    }
+
+    // Verify all constants are bound
+    const unbound = this._ir.getWeightlessConstants();
+    if (unbound.length > 0) {
+      const labels = unbound.map(op => op.label || op.id).join(", ");
+      throw new Error(`[DAOP] Cannot compile: unbound constants: ${labels}`);
+    }
+
+    const NativeBuilder = getNativeMLGraphBuilder();
+    const nativeBuilder = new NativeBuilder(nativeContext);
+    const operandMap = new Map(); // IR operand ID → native MLOperand
+
+    // 1. Create input operands
+    for (const irOp of this._ir.getInputs()) {
+      const nativeOperand = nativeBuilder.input(irOp.name, {
+        dataType: irOp.desc.dataType,
+        shape: irOp.desc.shape,
+      });
+      operandMap.set(irOp.id, nativeOperand);
+    }
+
+    // 2. Create constant operands (with bound buffers)
+    for (const irOp of this._ir.getConstants()) {
+      let nativeOperand;
+      if (irOp.buffer !== null && typeof irOp.buffer === "number") {
+        // Scalar constant
+        nativeOperand = nativeBuilder.constant(irOp.desc.dataType, irOp.buffer);
+      } else if (irOp.buffer !== null) {
+        // Constant with buffer data
+        nativeOperand = nativeBuilder.constant(
+          { dataType: irOp.desc.dataType, shape: irOp.desc.shape },
+          irOp.buffer
+        );
+      } else {
+        throw new Error(`[DAOP] Constant "${irOp.label || irOp.id}" has no buffer`);
+      }
+      operandMap.set(irOp.id, nativeOperand);
+    }
+
+    // 3. Replay operator nodes in topological order (they're already recorded in order)
+    for (const node of this._ir.nodes) {
+      const nativeInputs = node.inputs.map(id => {
+        const op = operandMap.get(id);
+        if (!op) throw new Error(`[DAOP] Replay: missing operand ${id} for ${node.opType}`);
+        return op;
+      });
+
+      let nativeOutput;
+      const resolvedAttrs = { ...node.attrs };
+      if (resolvedAttrs.bias && typeof resolvedAttrs.bias === "string") {
+        resolvedAttrs.bias = operandMap.get(resolvedAttrs.bias);
+        if (!resolvedAttrs.bias) {
+          throw new Error(`[DAOP] Replay: missing bias operand for ${node.opType}`);
+        }
+      }
+
+      switch (node.opType) {
+        case "conv2d":
+          nativeOutput = nativeBuilder.conv2d(nativeInputs[0], nativeInputs[1], resolvedAttrs);
+          break;
+        case "convTranspose2d":
+          nativeOutput = nativeBuilder.convTranspose2d(nativeInputs[0], nativeInputs[1], resolvedAttrs);
+          break;
+        case "add":
+          nativeOutput = nativeBuilder.add(nativeInputs[0], nativeInputs[1]);
+          break;
+        case "sub":
+          nativeOutput = nativeBuilder.sub(nativeInputs[0], nativeInputs[1]);
+          break;
+        case "mul":
+          nativeOutput = nativeBuilder.mul(nativeInputs[0], nativeInputs[1]);
+          break;
+        case "div":
+          nativeOutput = nativeBuilder.div(nativeInputs[0], nativeInputs[1]);
+          break;
+        case "relu":
+          nativeOutput = nativeBuilder.relu(nativeInputs[0]);
+          break;
+        case "sigmoid":
+          nativeOutput = nativeBuilder.sigmoid(nativeInputs[0]);
+          break;
+        case "tanh":
+          nativeOutput = nativeBuilder.tanh(nativeInputs[0]);
+          break;
+        case "clamp":
+          nativeOutput = nativeBuilder.clamp(nativeInputs[0], resolvedAttrs);
+          break;
+        case "averagePool2d":
+          nativeOutput = nativeBuilder.averagePool2d(nativeInputs[0], resolvedAttrs);
+          break;
+        case "maxPool2d":
+          nativeOutput = nativeBuilder.maxPool2d(nativeInputs[0], resolvedAttrs);
+          break;
+        case "matmul":
+          nativeOutput = nativeBuilder.matmul(nativeInputs[0], nativeInputs[1]);
+          break;
+        case "softmax":
+          nativeOutput = nativeBuilder.softmax(nativeInputs[0], resolvedAttrs.axis);
+          break;
+        case "reshape":
+          nativeOutput = nativeBuilder.reshape(nativeInputs[0], resolvedAttrs.newShape);
+          break;
+        case "transpose":
+          nativeOutput = nativeBuilder.transpose(nativeInputs[0], resolvedAttrs);
+          break;
+        case "concat":
+          nativeOutput = nativeBuilder.concat(nativeInputs, resolvedAttrs.axis);
+          break;
+        case "resample2d":
+          nativeOutput = nativeBuilder.resample2d(nativeInputs[0], resolvedAttrs);
+          break;
+        default:
+          throw new Error(`[DAOP] Replay: unsupported op "${node.opType}"`);
+      }
+
+      // Map IR output IDs to native operands
+      if (Array.isArray(nativeOutput)) {
+        node.outputs.forEach((id, i) => operandMap.set(id, nativeOutput[i]));
+      } else {
+        node.outputs.forEach(id => operandMap.set(id, nativeOutput));
+      }
+    }
+
+    // 4. Build the native graph
+    const nativeOutputs = {};
+    for (const [name, operandId] of this._ir.outputs) {
+      nativeOutputs[name] = operandMap.get(operandId);
+    }
+
+    const nativeGraph = await nativeBuilder.build(nativeOutputs);
+
+    this._compiledGraph = { graph: nativeGraph, nativeContext };
+    return this._compiledGraph;
+  }
+
+  /**
+   * Generate Mermaid diagram code for this graph.
+   * Used by the demo's right panel for visualization.
+   *
+   * @param {Object[]} [qosBreakdown] - Optional per-node QoS data for annotations
+   * @returns {string} Mermaid flowchart code
+   */
+  toMermaid(qosBreakdown = null) {
+    let code = "graph TD\n";
+    code += "  classDef computeBound fill:#ffcdd2,stroke:#e53935,stroke-width:2px;\n";
+    code += "  classDef memoryBound fill:#bbdefb,stroke:#1e88e5,stroke-width:2px;\n";
+    code += "  classDef inputNode fill:#e8f5e9,stroke:#2e7d32,stroke-width:2px;\n";
+    code += "  classDef constantNode fill:#fff3e0,stroke:#ef6c00,stroke-width:1px;\n";
+
+    // Track which operand IDs are produced by which node
+    const producedBy = new Map();
+
+    this._ir.nodes.forEach((node, i) => {
+      const nodeId = `op${i}`;
+      let label;
+      if (qosBreakdown && qosBreakdown[i]) {
+        const est = qosBreakdown[i];
+        const boundIcon = est.bottleneck === "compute" ? "COMPUTE" : "MEMORY";
+        label = `"${boundIcon}<br/><b>${node.opType.toUpperCase()}</b><br/>${est.timeMs.toFixed(2)}ms"`;
+        const cls = est.bottleneck === "compute" ? "computeBound" : "memoryBound";
+        code += `  ${nodeId}[${label}]:::${cls}\n`;
+      } else {
+        // Show shapes in label
+        const outOp = this._ir.getOperand(node.outputs[0]);
+        const shapeStr = outOp ? outOp.desc.shape.join("x") : "?";
+        label = `"<b>${node.opType.toUpperCase()}</b><br/>[${shapeStr}]"`;
+        code += `  ${nodeId}[${label}]\n`;
+      }
+
+      node.outputs.forEach(outId => producedBy.set(outId, nodeId));
+    });
+
+    // Add edges
+    this._ir.nodes.forEach((node, i) => {
+      const nodeId = `op${i}`;
+      node.inputs.forEach(inputId => {
+        const sourceNode = producedBy.get(inputId);
+        if (sourceNode) {
+          code += `  ${sourceNode} --> ${nodeId}\n`;
+        } else {
+          // It's a graph input or constant
+          const operand = this._ir.getOperand(inputId);
+          if (operand) {
+            const displayName = operand.name || operand.label || inputId;
+            if (operand.kind === "input") {
+              code += `  ${inputId}([${displayName}]):::inputNode --> ${nodeId}\n`;
+            } else if (operand.kind === "constant") {
+              // Don't clutter graph with every constant; only show labeled ones
+              if (operand.label) {
+                code += `  ${inputId}[/${operand.label}/]:::constantNode --> ${nodeId}\n`;
+              }
+            }
+          }
+        }
+      });
+    });
+
+    return code;
+  }
+}
diff --git a/daop-illustration/src/index.js b/daop-illustration/src/index.js
new file mode 100644
index 0000000..0ad1175
--- /dev/null
+++ b/daop-illustration/src/index.js
@@ -0,0 +1,18 @@
+// src/index.js
+
+/**
+ * DAOP Library — Public API
+ *
+ * Usage:
+ *   import { initDAOP, detectWebNNSupport } from "./src/index.js";
+ *   const result = initDAOP();
+ *   if (!result.ok) { showError(result.error); return; }
+ *   // Now use standard WebNN API — DAOP layer is active
+ */
+
+export { initDAOP, detectWebNNSupport, getNativeML } from "./polyfill.js";
+export { DAOPContext } from "./daop-context.js";
+export { DAOPGraphBuilder } from "./daop-graph-builder.js";
+export { DAOPGraph } from "./daop-graph.js";
+export { timeModelDatabase } from "./qos/interpolation/time-model.js";
+export { estimateQoSInterp } from "./qos/estimate-qos-interp.js";
diff --git a/daop-illustration/src/ir/graph-ir.js b/daop-illustration/src/ir/graph-ir.js
new file mode 100644
index 0000000..8b84630
--- /dev/null
+++ b/daop-illustration/src/ir/graph-ir.js
@@ -0,0 +1,149 @@
+// src/ir/graph-ir.js
+
+/**
+ * DAOP Intermediate Representation for WebNN Graphs
+ *
+ * Records graph topology (operators, operands, connections) without
+ * requiring actual weight buffers — enabling "weightless" QoS estimation.
+ */
+
+let _nextId = 0;
+
+/** Generate a unique operand ID */
+export function generateId(prefix = "op") {
+  return `${prefix}_${_nextId++}`;
+}
+
+/** Reset ID counter (useful for testing) */
+export function resetIdCounter() {
+  _nextId = 0;
+}
+
+/**
+ * Describes a tensor's metadata (no buffer data).
+ */
+export class TensorDesc {
+  /**
+   * @param {Object} opts
+   * @param {number[]} opts.shape
+   * @param {string} [opts.dataType="float32"]
+   */
+  constructor({ shape, dataType = "float32" }) {
+    this.shape = [...shape];
+    this.dataType = dataType;
+  }
+
+  /** Total number of elements */
+  get elements() {
+    return this.shape.reduce((a, b) => a * b, 1);
+  }
+
+  /** Bytes per element for this dataType */
+  get bytesPerElement() {
+    switch (this.dataType) {
+      case "float32": return 4;
+      case "float16": return 2;
+      case "int32": return 4;
+      case "int8": return 1;
+      case "uint8": return 1;
+      default: return 4;
+    }
+  }
+
+  /** Total byte size */
+  get byteSize() {
+    return this.elements * this.bytesPerElement;
+  }
+}
+
+/**
+ * An operand in the IR graph.
+ */
+export class IROperand {
+  /**
+   * @param {Object} opts
+   * @param {string} opts.id - Unique identifier
+   * @param {string} opts.kind - "input" | "constant" | "intermediate"
+   * @param {TensorDesc} opts.desc - Tensor descriptor
+   * @param {string} [opts.name] - User-facing name (for inputs)
+   * @param {string} [opts.label] - Label for late-binding (for constants)
+   * @param {ArrayBufferView|null} [opts.buffer] - Actual data (null for weightless)
+   */
+  constructor({ id, kind, desc, name = null, label = null, buffer = null }) {
+    this.id = id;
+    this.kind = kind;
+    this.desc = desc;
+    this.name = name;
+    this.label = label;
+    this.buffer = buffer;
+  }
+
+  get isWeightless() {
+    return this.kind === "constant" && this.buffer === null;
+  }
+}
+
+/**
+ * An operator node in the IR graph.
+ */
+export class IRNode {
+  /**
+   * @param {Object} opts
+   * @param {string} opts.opType - WebNN op name ("conv2d", "add", etc.)
+   * @param {string[]} opts.inputs - Input operand IDs
+   * @param {string[]} opts.outputs - Output operand IDs
+   * @param {Object} [opts.attrs={}] - Op-specific attributes (strides, pads, etc.)
+   */
+  constructor({ opType, inputs, outputs, attrs = {} }) {
+    this.opType = opType;
+    this.inputs = [...inputs];
+    this.outputs = [...outputs];
+    this.attrs = { ...attrs };
+  }
+}
+
+/**
+ * Complete IR graph: operands + operators in topological order.
+ */
+export class IRGraph {
+  constructor() {
+    /** @type {Map<string, IROperand>} */
+    this.operands = new Map();
+    /** @type {IRNode[]} */
+    this.nodes = [];
+    /** @type {Map<string, string>} output name → operand ID */
+    this.outputs = new Map();
+  }
+
+  addOperand(operand) {
+    this.operands.set(operand.id, operand);
+  }
+
+  addNode(node) {
+    this.nodes.push(node);
+  }
+
+  getOperand(id) {
+    return this.operands.get(id);
+  }
+
+  /** All input operands */
+  getInputs() {
+    return [...this.operands.values()].filter(op => op.kind === "input");
+  }
+
+  /** All constant operands */
+  getConstants() {
+    return [...this.operands.values()].filter(op => op.kind === "constant");
+  }
+
+  /** All weightless constants (need binding before compute) */
+  getWeightlessConstants() {
+    return this.getConstants().filter(op => op.isWeightless);
+  }
+
+  /** Check if all constants have buffers bound */
+  isFullyBound() {
+    return this.getConstants().every(op => !op.isWeightless);
+  }
+}
diff --git a/daop-illustration/src/ir/shape-inference.js b/daop-illustration/src/ir/shape-inference.js
new file mode 100644
index 0000000..8c18edb
--- /dev/null
+++ b/daop-illustration/src/ir/shape-inference.js
@@ -0,0 +1,280 @@
+// src/ir/shape-inference.js
+
+/**
+ * Shape inference for WebNN operators.
+ *
+ * Given input shapes and op attributes, computes the output shape(s).
+ * Supports the operator subset used by Selfie Segmentation + common ops.
+ */
+
+/**
+ * Infer output shape for a WebNN operator.
+ *
+ * @param {string} opType - Operator name
+ * @param {Array<{shape: number[], dataType: string}>} inputs - Input descriptors
+ * @param {Object} attrs - Operator attributes
+ * @returns {{shape: number[], dataType: string}[]} Output descriptor(s)
+ */
+export function inferShape(opType, inputs, attrs = {}) {
+  const fn = SHAPE_FNS[opType];
+  if (!fn) {
+    throw new Error(`[DAOP] Shape inference not implemented for op: ${opType}`);
+  }
+  return fn(inputs, attrs);
+}
+
+/**
+ * Check if shape inference is available for an op type.
+ */
+export function hasShapeInference(opType) {
+  return opType in SHAPE_FNS;
+}
+
+const SHAPE_FNS = {
+  conv2d(inputs, attrs) {
+    // inputs[0] = input tensor, inputs[1] = filter tensor
+    // WebNN conv2d supports both NCHW and NHWC layouts
+    const layout = attrs.inputLayout || "nchw";
+    const input = inputs[0].shape;
+    const filter = inputs[1].shape;
+
+    let batch, inH, inW, outChannels, filterH, filterW;
+    if (layout === "nchw") {
+      [batch, , inH, inW] = input;
+      // filter layout for nchw: [outChannels, inChannels/groups, filterH, filterW]
+      const filterLayout = attrs.filterLayout || "oihw";
+      if (filterLayout === "oihw") {
+        [outChannels, , filterH, filterW] = filter;
+      } else if (filterLayout === "hwio") {
+        [filterH, filterW, , outChannels] = filter;
+      } else {
+        [outChannels, , filterH, filterW] = filter;
+      }
+    } else {
+      // nhwc
+      [batch, inH, inW, ] = input;
+      const filterLayout = attrs.filterLayout || "ohwi";
+      if (filterLayout === "ohwi") {
+        [outChannels, filterH, filterW, ] = filter;
+      } else if (filterLayout === "hwio") {
+        [filterH, filterW, , outChannels] = filter;
+      } else if (filterLayout === "ihwo") {
+        // [inputChannels/groups, filterH, filterW, outputChannels]
+        outChannels = filter[3];
+        filterH = filter[1];
+        filterW = filter[2];
+      } else {
+        [outChannels, filterH, filterW, ] = filter;
+      }
+    }
+
+    const padding = attrs.padding || [0, 0, 0, 0]; // [top, bottom, left, right]
+    const strides = attrs.strides || [1, 1];
+    const dilations = attrs.dilations || [1, 1];
+
+    const effectiveFilterH = (filterH - 1) * dilations[0] + 1;
+    const effectiveFilterW = (filterW - 1) * dilations[1] + 1;
+    const outH = Math.floor((inH + padding[0] + padding[1] - effectiveFilterH) / strides[0]) + 1;
+    const outW = Math.floor((inW + padding[2] + padding[3] - effectiveFilterW) / strides[1]) + 1;
+
+    const outShape = layout === "nchw"
+      ? [batch, outChannels, outH, outW]
+      : [batch, outH, outW, outChannels];
+
+    return [{ shape: outShape, dataType: inputs[0].dataType }];
+  },
+
+  // Element-wise binary ops: output shape = broadcast(input shapes)
+  add: (inputs) => [{ shape: broadcastShape(inputs[0].shape, inputs[1].shape), dataType: inputs[0].dataType }],
+  sub: (inputs) => [{ shape: broadcastShape(inputs[0].shape, inputs[1].shape), dataType: inputs[0].dataType }],
+  mul: (inputs) => [{ shape: broadcastShape(inputs[0].shape, inputs[1].shape), dataType: inputs[0].dataType }],
+  div: (inputs) => [{ shape: broadcastShape(inputs[0].shape, inputs[1].shape), dataType: inputs[0].dataType }],
+
+  // Element-wise unary ops: output shape = input shape
+  relu: (inputs) => [{ shape: [...inputs[0].shape], dataType: inputs[0].dataType }],
+  sigmoid: (inputs) => [{ shape: [...inputs[0].shape], dataType: inputs[0].dataType }],
+  tanh: (inputs) => [{ shape: [...inputs[0].shape], dataType: inputs[0].dataType }],
+  clamp: (inputs) => [{ shape: [...inputs[0].shape], dataType: inputs[0].dataType }],
+
+  averagePool2d(inputs, attrs) {
+    return [poolShape(inputs[0], attrs)];
+  },
+
+  maxPool2d(inputs, attrs) {
+    return [poolShape(inputs[0], attrs)];
+  },
+
+  matmul(inputs) {
+    const a = inputs[0].shape;
+    const b = inputs[1].shape;
+    // Support batched matmul: [...batch, M, K] x [...batch, K, N] -> [...batch, M, N]
+    const m = a[a.length - 2];
+    const n = b[b.length - 1];
+    const batchDims = a.length > 2 ? a.slice(0, -2) : [];
+    return [{ shape: [...batchDims, m, n], dataType: inputs[0].dataType }];
+  },
+
+  softmax(inputs) {
+    return [{ shape: [...inputs[0].shape], dataType: inputs[0].dataType }];
+  },
+
+  reshape(inputs, attrs) {
+    const newShape = attrs.newShape || attrs.shape;
+    if (!newShape) {
+      throw new Error("[DAOP] reshape requires newShape attribute");
+    }
+    return [{ shape: [...newShape], dataType: inputs[0].dataType }];
+  },
+
+  transpose(inputs, attrs) {
+    const perm = attrs.permutation;
+    if (!perm) {
+      // Default: reverse dimensions
+      const shape = [...inputs[0].shape].reverse();
+      return [{ shape, dataType: inputs[0].dataType }];
+    }
+    const shape = perm.map(i => inputs[0].shape[i]);
+    return [{ shape, dataType: inputs[0].dataType }];
+  },
+
+  concat(inputs, attrs) {
+    const axis = attrs.axis || 0;
+    const shape = [...inputs[0].shape];
+    shape[axis] = inputs.reduce((sum, inp) => sum + inp.shape[axis], 0);
+    return [{ shape, dataType: inputs[0].dataType }];
+  },
+
+  resample2d(inputs, attrs) {
+    const layout = attrs.layout || "nchw";
+    const shape = [...inputs[0].shape];
+
+    if (attrs.axes && attrs.sizes) {
+      const out = [...inputs[0].shape];
+      attrs.axes.forEach((axis, i) => {
+        out[axis] = attrs.sizes[i];
+      });
+      return [{ shape: out, dataType: inputs[0].dataType }];
+    }
+    if (attrs.axes && attrs.scales) {
+      const out = [...inputs[0].shape];
+      attrs.axes.forEach((axis, i) => {
+        out[axis] = Math.floor(out[axis] * attrs.scales[i]);
+      });
+      return [{ shape: out, dataType: inputs[0].dataType }];
+    }
+
+    if (attrs.sizes) {
+      // Explicit output sizes [outH, outW]
+      if (layout === "nchw") {
+        shape[2] = attrs.sizes[0];
+        shape[3] = attrs.sizes[1];
+      } else {
+        shape[1] = attrs.sizes[0];
+        shape[2] = attrs.sizes[1];
+      }
+    } else if (attrs.scales) {
+      // Scale factors [scaleH, scaleW]
+      if (layout === "nchw") {
+        shape[2] = Math.floor(shape[2] * attrs.scales[0]);
+        shape[3] = Math.floor(shape[3] * attrs.scales[1]);
+      } else {
+        shape[1] = Math.floor(shape[1] * attrs.scales[0]);
+        shape[2] = Math.floor(shape[2] * attrs.scales[1]);
+      }
+    }
+    return [{ shape, dataType: inputs[0].dataType }];
+  },
+  convTranspose2d(inputs, attrs) {
+    const layout = attrs.inputLayout || "nchw";
+    const input = inputs[0].shape;
+    const filter = inputs[1].shape;
+
+    let batch, inH, inW, outChannels, filterH, filterW;
+    if (layout === "nchw") {
+      [batch, , inH, inW] = input;
+      [, outChannels, filterH, filterW] = filter;
+    } else {
+      // nhwc
+      [batch, inH, inW, ] = input;
+      const filterLayout = attrs.filterLayout || "ihwo";
+      if (filterLayout === "ihwo") {
+        outChannels = filter[3];
+        filterH = filter[1];
+        filterW = filter[2];
+      } else if (filterLayout === "ohwi") {
+        [outChannels, filterH, filterW, ] = filter;
+      } else {
+        outChannels = filter[3];
+        filterH = filter[1];
+        filterW = filter[2];
+      }
+    }
+
+    const strides = attrs.strides || [1, 1];
+    const padding = attrs.padding || [0, 0, 0, 0];
+    const outputPadding = attrs.outputPadding || [0, 0];
+    const dilations = attrs.dilations || [1, 1];
+
+    if (attrs.outputSizes) {
+      const [outH, outW] = attrs.outputSizes;
+      const outShape = layout === "nchw"
+        ? [batch, outChannels, outH, outW]
+        : [batch, outH, outW, outChannels];
+      return [{ shape: outShape, dataType: inputs[0].dataType }];
+    }
+
+    const outH = (inH - 1) * strides[0] - padding[0] - padding[1]
+      + (filterH - 1) * dilations[0] + outputPadding[0] + 1;
+    const outW = (inW - 1) * strides[1] - padding[2] - padding[3]
+      + (filterW - 1) * dilations[1] + outputPadding[1] + 1;
+
+    const outShape = layout === "nchw"
+      ? [batch, outChannels, outH, outW]
+      : [batch, outH, outW, outChannels];
+    return [{ shape: outShape, dataType: inputs[0].dataType }];
+  },
+};
+
+/**
+ * Compute broadcast-compatible output shape.
+ */
+function broadcastShape(a, b) {
+  const rank = Math.max(a.length, b.length);
+  const result = new Array(rank);
+  for (let i = 0; i < rank; i++) {
+    const dimA = i < a.length ? a[a.length - 1 - i] : 1;
+    const dimB = i < b.length ? b[b.length - 1 - i] : 1;
+    if (dimA !== dimB && dimA !== 1 && dimB !== 1) {
+      throw new Error(`[DAOP] Shape broadcast failed: ${a} vs ${b}`);
+    }
+    result[rank - 1 - i] = Math.max(dimA, dimB);
+  }
+  return result;
+}
+
+/**
+ * Pool output shape helper.
+ */
+function poolShape(input, attrs) {
+  const layout = attrs.layout || "nchw";
+  const shape = input.shape;
+  const windowDimensions = attrs.windowDimensions || [2, 2];
+  const strides = attrs.strides || windowDimensions;
+  const padding = attrs.padding || [0, 0, 0, 0];
+
+  let batch, channels, inH, inW;
+  if (layout === "nchw") {
+    [batch, channels, inH, inW] = shape;
+  } else {
+    [batch, inH, inW, channels] = shape;
+  }
+
+  const outH = Math.floor((inH + padding[0] + padding[1] - windowDimensions[0]) / strides[0]) + 1;
+  const outW = Math.floor((inW + padding[2] + padding[3] - windowDimensions[1]) / strides[1]) + 1;
+
+  const outShape = layout === "nchw"
+    ? [batch, channels, outH, outW]
+    : [batch, outH, outW, channels];
+
+  return { shape: outShape, dataType: input.dataType };
+}
diff --git a/daop-illustration/src/polyfill.js b/daop-illustration/src/polyfill.js
new file mode 100644
index 0000000..9a3db70
--- /dev/null
+++ b/daop-illustration/src/polyfill.js
@@ -0,0 +1,102 @@
+// src/polyfill.js
+
+import { DAOPContext } from "./daop-context.js";
+import { DAOPGraphBuilder } from "./daop-graph-builder.js";
+
+/**
+ * DAOP Polyfill — intercepts WebNN API to add DAOP extensions.
+ *
+ * When initialized:
+ * 1. Checks for native WebNN (navigator.ml). If missing → returns error.
+ * 2. Saves references to native ML/MLGraphBuilder.
+ * 3. Replaces window.MLGraphBuilder with DAOPGraphBuilder.
+ * 4. Wraps navigator.ml.createContext() to return DAOPContext.
+ */
+
+let _initialized = false;
+let _nativeML = null;
+let _NativeMLGraphBuilder = null;
+
+/**
+ * Detect if native WebNN is available.
+ *
+ * Checks for the presence of navigator.ml and window.MLGraphBuilder.
+ * This function only tests the NATIVE API — if DAOP has already been
+ * initialized it still returns true (the native references are saved
+ * internally).
+ */
+export function detectWebNNSupport() {
+  if (typeof navigator === "undefined") {
+    return { supported: false, reason: "No navigator object" };
+  }
+  if (!navigator.ml) {
+    return { supported: false, reason: "navigator.ml not available" };
+  }
+  if (typeof MLGraphBuilder === "undefined") {
+    return { supported: false, reason: "MLGraphBuilder not available" };
+  }
+  return { supported: true };
+}
+
+/**
+ * Initialize the DAOP polyfill layer.
+ *
+ * @returns {{ok: boolean, error?: string}}
+ */
+export function initDAOP() {
+  if (_initialized) {
+    return { ok: true };
+  }
+
+  const support = detectWebNNSupport();
+  if (!support.supported) {
+    return {
+      ok: false,
+      error: `WebNN is not available: ${support.reason}. ` +
+        `Please install a WebNN-capable browser: https://webnn.io/en/learn/get-started/installation`,
+    };
+  }
+
+  // Save native references
+  _nativeML = navigator.ml;
+  _NativeMLGraphBuilder = window.MLGraphBuilder;
+
+  // Wrap navigator.ml.createContext to return DAOPContext
+  const originalCreateContext = _nativeML.createContext.bind(_nativeML);
+
+  const wrappedML = {
+    async createContext(options = {}) {
+      const nativeContext = await originalCreateContext(options);
+      return new DAOPContext(nativeContext, options);
+    },
+    __daopPolyfill: true,
+  };
+
+  // Replace globals
+  // navigator.ml is a read-only getter on the Navigator prototype —
+  // a plain assignment throws. Use Object.defineProperty to override.
+  Object.defineProperty(navigator, "ml", {
+    value: wrappedML,
+    writable: true,
+    configurable: true,
+  });
+  window.MLGraphBuilder = DAOPGraphBuilder;
+
+  _initialized = true;
+  return { ok: true };
+}
+
+/**
+ * Get a reference to the native (un-wrapped) MLGraphBuilder.
+ * Used internally by DAOPGraph.compile() for replay.
+ */
+export function getNativeMLGraphBuilder() {
+  return _NativeMLGraphBuilder || window.MLGraphBuilder;
+}
+
+/**
+ * Get a reference to the native ML API.
+ */
+export function getNativeML() {
+  return _nativeML || navigator.ml;
+}
diff --git a/daop-illustration/src/qos/estimate-qos-interp.js b/daop-illustration/src/qos/estimate-qos-interp.js
new file mode 100644
index 0000000..f540169
--- /dev/null
+++ b/daop-illustration/src/qos/estimate-qos-interp.js
@@ -0,0 +1,56 @@
+// src/qos/estimate-qos-interp.js
+
+import { timeModelDatabase } from "./interpolation/time-model.js";
+
+/**
+ * Estimate QoS using direct interpolation from measured benchmark data.
+ *
+ * This estimator directly predicts per-operator execution time from
+ * measured (inputSize -> time) data points using polynomial regression.
+ *
+ * @param {import("../daop-graph.js").DAOPGraph} daopGraph
+ * @param {Object} [options={}]
+ * @returns {Object} QoS report
+ */
+export function estimateQoSInterp(daopGraph, options = {}) {
+  const ir = daopGraph.ir;
+  let totalTimeMs = 0;
+  const breakdown = [];
+
+  for (const node of ir.nodes) {
+    const inputDescs = node.inputs.map(id => ir.getOperand(id));
+    const primaryElements = (inputDescs[0] && inputDescs[0].desc)
+      ? inputDescs[0].desc.elements : 0;
+
+    const predictedMs = timeModelDatabase.predict(node.opType, primaryElements);
+    totalTimeMs += predictedMs;
+
+    breakdown.push({
+      opType: node.opType,
+      timeMs: predictedMs,
+      inputElements: primaryElements,
+    });
+  }
+
+  // Graph-level dispatch overhead (single dispatch for compiled graph)
+  const graphDispatchOverheadMs = 0.5 + ir.nodes.length * 0.005;
+  totalTimeMs += graphDispatchOverheadMs;
+
+  const performanceTier = totalTimeMs < 16 ? "excellent"
+    : totalTimeMs < 100 ? "good"
+    : totalTimeMs < 1000 ? "fair"
+    : totalTimeMs < 10000 ? "moderate"
+    : totalTimeMs < 30000 ? "slow"
+    : totalTimeMs < 60000 ? "very-slow"
+    : "poor";
+
+  return {
+    performanceTier,
+    internal: {
+      totalTimeMs,
+      graphDispatchOverheadMs,
+      breakdown,
+      method: "interpolation",
+    },
+  };
+}
diff --git a/daop-illustration/src/qos/interpolation/poly-fit.js b/daop-illustration/src/qos/interpolation/poly-fit.js
new file mode 100644
index 0000000..f394d8b
--- /dev/null
+++ b/daop-illustration/src/qos/interpolation/poly-fit.js
@@ -0,0 +1,130 @@
+// src/qos/interpolation/poly-fit.js
+
+/**
+ * Polynomial least-squares regression.
+ *
+ * Fits y = c0 + c1*x + c2*x^2 + ... + cn*x^n to minimize squared error.
+ * Uses the normal equations approach (sufficient for degree 2-3 with <20 points).
+ */
+
+/**
+ * Fit a polynomial of given degree to (x, y) data points.
+ *
+ * @param {number[]} xs - Independent variable values
+ * @param {number[]} ys - Dependent variable values
+ * @param {number} degree - Polynomial degree (2 or 3 recommended)
+ * @returns {number[]} Coefficients [c0, c1, c2, ...] where y = c0 + c1*x + c2*x^2 + ...
+ */
+export function polyFit(xs, ys, degree = 2) {
+  if (xs.length !== ys.length) {
+    throw new Error("polyFit: xs and ys must have same length");
+  }
+  if (xs.length <= degree) {
+    // Not enough points for this degree — fall back to lower degree
+    degree = Math.max(1, xs.length - 1);
+  }
+
+  const n = xs.length;
+  const m = degree + 1; // number of coefficients
+
+  // Build normal equations: A^T A c = A^T y
+  // ATA[i][j] = sum(x^(i+j)), ATy[i] = sum(y * x^i)
+  const ATA = Array.from({ length: m }, () => new Array(m).fill(0));
+  const ATy = new Array(m).fill(0);
+
+  // Pre-compute x^p for each data point (p = 0..2*degree)
+  for (let k = 0; k < n; k++) {
+    const xPows = new Array(2 * degree + 1);
+    xPows[0] = 1;
+    for (let p = 1; p < xPows.length; p++) {
+      xPows[p] = xPows[p - 1] * xs[k];
+    }
+    for (let i = 0; i < m; i++) {
+      ATy[i] += ys[k] * xPows[i];
+      for (let j = i; j < m; j++) {
+        ATA[i][j] += xPows[i + j];
+      }
+    }
+  }
+
+  // Fill symmetric lower triangle
+  for (let i = 1; i < m; i++) {
+    for (let j = 0; j < i; j++) {
+      ATA[i][j] = ATA[j][i];
+    }
+  }
+
+  // Solve via Gaussian elimination with partial pivoting
+  return _solveLinearSystem(ATA, ATy);
+}
+
+/**
+ * Evaluate a polynomial at a given x.
+ *
+ * @param {number[]} coeffs - [c0, c1, c2, ...] from polyFit
+ * @param {number} x - Value to evaluate at
+ * @returns {number} y = c0 + c1*x + c2*x^2 + ...
+ */
+export function polyEval(coeffs, x) {
+  let result = 0;
+  let xPow = 1;
+  for (const c of coeffs) {
+    result += c * xPow;
+    xPow *= x;
+  }
+  return result;
+}
+
+/**
+ * Solve Ax = b using Gaussian elimination with partial pivoting.
+ * Modifies A and b in place.
+ *
+ * @param {number[][]} A - Square matrix
+ * @param {number[]} b - Right-hand side
+ * @returns {number[]} Solution vector x
+ */
+function _solveLinearSystem(A, b) {
+  const n = A.length;
+
+  // Forward elimination with partial pivoting
+  for (let col = 0; col < n; col++) {
+    // Find pivot
+    let maxVal = Math.abs(A[col][col]);
+    let maxRow = col;
+    for (let row = col + 1; row < n; row++) {
+      if (Math.abs(A[row][col]) > maxVal) {
+        maxVal = Math.abs(A[row][col]);
+        maxRow = row;
+      }
+    }
+
+    // Swap rows
+    if (maxRow !== col) {
+      [A[col], A[maxRow]] = [A[maxRow], A[col]];
+      [b[col], b[maxRow]] = [b[maxRow], b[col]];
+    }
+
+    // Eliminate
+    const pivot = A[col][col];
+    if (Math.abs(pivot) < 1e-12) continue; // singular — skip
+    for (let row = col + 1; row < n; row++) {
+      const factor = A[row][col] / pivot;
+      for (let j = col; j < n; j++) {
+        A[row][j] -= factor * A[col][j];
+      }
+      b[row] -= factor * b[col];
+    }
+  }
+
+  // Back substitution
+  const x = new Array(n).fill(0);
+  for (let row = n - 1; row >= 0; row--) {
+    let sum = b[row];
+    for (let j = row + 1; j < n; j++) {
+      sum -= A[row][j] * x[j];
+    }
+    x[row] = Math.abs(A[row][row]) > 1e-12 ? sum / A[row][row] : 0;
+  }
+
+  return x;
+}
diff --git a/daop-illustration/src/qos/interpolation/time-model.js b/daop-illustration/src/qos/interpolation/time-model.js
new file mode 100644
index 0000000..0bb2bf5
--- /dev/null
+++ b/daop-illustration/src/qos/interpolation/time-model.js
@@ -0,0 +1,206 @@
+// src/qos/interpolation/time-model.js
+
+import { polyFit, polyEval } from "./poly-fit.js";
+
+const STORAGE_KEY = "daop_time_models";
+const DEFAULT_POLY_DEGREE = 1;
+
+/**
+ * TimeModelDatabase — stores measured time data points per operator
+ * and fits polynomial curves for direct time prediction.
+ *
+ * Stores raw (inputSize -> time) measurements and uses polynomial
+ * regression to predict times for unseen sizes.
+ */
+class TimeModelDatabase {
+  constructor() {
+    /** @type {Object<string, {points: Array, coeffs: number[]|null}>} */
+    this.models = {};
+    this._loadFromLocalStorage();
+  }
+
+  _loadFromLocalStorage() {
+    if (typeof localStorage !== "undefined") {
+      const saved = localStorage.getItem(STORAGE_KEY);
+      if (saved) {
+        try {
+          this.models = JSON.parse(saved);
+        } catch (e) {
+          console.error("[DAOP TimeModel] Failed to parse from localStorage", e);
+        }
+      }
+    }
+  }
+
+  _saveToLocalStorage() {
+    if (typeof localStorage !== "undefined") {
+      localStorage.setItem(STORAGE_KEY, JSON.stringify(this.models));
+    }
+  }
+
+  /**
+   * Add a measured data point for an operator.
+   *
+   * @param {string} opType - Operator name (e.g., "conv2d")
+   * @param {Object} point - { totalElements, medianMs, label, inputShape, ... }
+   */
+  addDataPoint(opType, point) {
+    if (!this.models[opType]) {
+      this.models[opType] = { points: [], coeffs: null };
+    }
+    const model = this.models[opType];
+    // Replace existing point at same totalElements
+    model.points = model.points.filter(p => p.totalElements !== point.totalElements);
+    model.points.push(point);
+    model.points.sort((a, b) => a.totalElements - b.totalElements);
+    // Invalidate fitted curve
+    model.coeffs = null;
+    this._saveToLocalStorage();
+  }
+
+  /**
+   * Fit polynomial curves for all ops that have data points.
+   * Call this after all benchmarks are complete.
+   *
+   * @param {number} degree - Polynomial degree (default 2)
+   */
+  fitAll(degree = DEFAULT_POLY_DEGREE) {
+    for (const [opType, model] of Object.entries(this.models)) {
+      if (model.points && model.points.length >= 2) {
+        this.fitOp(opType, degree);
+      }
+    }
+    this._saveToLocalStorage();
+  }
+
+  /**
+   * Fit polynomial for a single op.
+   *
+   * Uses log-log space: x = log(totalElements), y = log(medianMs).
+   *
+   * To handle noise at small sizes (where dispatch overhead dominates and
+   * can produce U-shaped data), the fitter:
+   * 1. Finds the point with the minimum medianMs.
+   * 2. Clamps all points to the left of it to that minimum value.
+   * 3. Fits a degree-1 polynomial (power law) using only the points from
+   *    the minimum onward — the clamped left-side points are excluded.
+   * 4. Stores `clampBelowLogX` and `clampLogY` so predict() can return
+   *    the flat clamp value for inputs smaller than the minimum point.
+   */
+  fitOp(opType, degree = DEFAULT_POLY_DEGREE) {
+    const model = this.models[opType];
+    if (!model || !model.points || model.points.length < 2) return;
+
+    // Find the index of the point with the smallest medianMs
+    let minIdx = 0;
+    for (let i = 1; i < model.points.length; i++) {
+      if (model.points[i].medianMs < model.points[minIdx].medianMs) {
+        minIdx = i;
+      }
+    }
+
+    const minMs = model.points[minIdx].medianMs;
+    const clampLogX = Math.log(model.points[minIdx].totalElements);
+    const clampLogY = Math.log(Math.max(1e-6, minMs));
+
+    // Clamp left-side points to the minimum value (mutate in place)
+    for (let i = 0; i < minIdx; i++) {
+      model.points[i].medianMs = minMs;
+    }
+
+    // Fit using only points from minIdx onward (right side of the minimum)
+    const fitPoints = model.points.slice(minIdx);
+    const xs = fitPoints.map(p => Math.log(p.totalElements));
+    const ys = fitPoints.map(p => Math.log(Math.max(1e-6, p.medianMs)));
+
+    model.coeffs = (fitPoints.length >= 2) ? polyFit(xs, ys, degree) : null;
+    model.clampBelowLogX = (minIdx > 0) ? clampLogX : null;
+    model.clampLogY = (minIdx > 0) ? clampLogY : null;
+    model.fitDegree = degree;
+    model.fittedAt = Date.now();
+  }
+
+  /**
+   * Predict execution time (ms) for an operator at a given input size.
+   *
+   * @param {string} opType
+   * @param {number} totalElements - Total elements of primary input tensor
+   * @returns {number} Predicted time in ms
+   */
+  predict(opType, totalElements) {
+    const model = this.models[opType];
+    if (!model) {
+      return 0.1;
+    }
+
+    if (model.coeffs) {
+      const logX = Math.log(Math.max(1, totalElements));
+
+      // Left-side clamp: if input is at or below the minimum-time point,
+      // return the clamped floor value instead of extrapolating
+      if (model.clampBelowLogX != null && logX <= model.clampBelowLogX) {
+        return Math.max(0.001, Math.exp(model.clampLogY));
+      }
+
+      const logPredicted = polyEval(model.coeffs, logX);
+      return Math.max(0.001, Math.exp(logPredicted));
+    }
+
+    return this._linearInterpolate(model.points, totalElements);
+  }
+
+  /**
+   * Piecewise linear interpolation fallback.
+   */
+  _linearInterpolate(points, totalElements) {
+    if (!points || points.length === 0) return 0.1;
+    if (points.length === 1) return points[0].medianMs;
+
+    if (totalElements <= points[0].totalElements) return points[0].medianMs;
+    if (totalElements >= points[points.length - 1].totalElements) {
+      return points[points.length - 1].medianMs;
+    }
+
+    for (let i = 1; i < points.length; i++) {
+      if (points[i].totalElements >= totalElements) {
+        const lo = points[i - 1];
+        const hi = points[i];
+        const t = (totalElements - lo.totalElements) / (hi.totalElements - lo.totalElements);
+        return lo.medianMs + t * (hi.medianMs - lo.medianMs);
+      }
+    }
+    return points[points.length - 1].medianMs;
+  }
+
+  /**
+   * Get the model for an op (points + fitted coefficients).
+   */
+  getModel(opType) {
+    return this.models[opType] || null;
+  }
+
+  /**
+   * Get all models.
+   */
+  getAllModels() {
+    return { ...this.models };
+  }
+
+  /**
+   * Check if benchmark data exists.
+   */
+  hasBenchmarkData() {
+    return Object.keys(this.models).length > 0 &&
+      Object.values(this.models).some(m => m.points && m.points.length > 0);
+  }
+
+  /**
+   * Reset all models.
+   */
+  resetToDefaults() {
+    this.models = {};
+    this._saveToLocalStorage();
+  }
+}
+
+export const timeModelDatabase = new TimeModelDatabase();
diff --git a/daop-illustration/src/qos/microbench/bench-runner.js b/daop-illustration/src/qos/microbench/bench-runner.js
new file mode 100644
index 0000000..c98b513
--- /dev/null
+++ b/daop-illustration/src/qos/microbench/bench-runner.js
@@ -0,0 +1,264 @@
+// src/qos/microbench/bench-runner.js
+
+import { getNativeML, getNativeMLGraphBuilder } from "../../polyfill.js";
+import { OP_BENCH_CONFIGS } from "./op-benchmarks.js";
+import { timeModelDatabase } from "../interpolation/time-model.js";
+
+function fillRandomBuffer(uint8) {
+  const CHUNK = 65536;
+  for (let offset = 0; offset < uint8.length; offset += CHUNK) {
+    const end = Math.min(offset + CHUNK, uint8.length);
+    crypto.getRandomValues(uint8.subarray(offset, end));
+  }
+}
+
+export class BenchRunner {
+  constructor(options = {}) {
+    this.warmupIterations = options.warmupIterations || 5;
+    this.measureIterations = options.measureIterations || 30;
+    this.deviceType = options.deviceType || "gpu";
+    /** @type {number} Measured dispatch+readTensor overhead in ms (set by _measureBaselineOverhead) */
+    this._baselineOverheadMs = 0;
+  }
+
+  /**
+   * Measure the fixed dispatch + readTensor overhead using a trivial graph.
+   * Builds a tiny reshape [1,1,1,1] → [1,1,1,1], runs the same warmup +
+   * batched-measurement loop used for real operators, and returns the median
+   * per-dispatch time.  This captures GPU command-submission and sync cost
+   * with negligible compute, so we can subtract it from real measurements.
+   *
+   * Runs multiple independent rounds and returns the median-of-medians for
+   * higher accuracy, since this value is subtracted from every operator
+   * measurement.
+   */
+  async _measureBaselineOverhead(onProgress) {
+    if (onProgress) onProgress({ phase: "baseline", label: "measuring dispatch overhead" });
+
+    const nativeML = getNativeML();
+    const NativeBuilder = getNativeMLGraphBuilder();
+
+    const rounds = 3;
+    const roundMedians = [];
+
+    for (let r = 0; r < rounds; r++) {
+      const context = await nativeML.createContext({ deviceType: this.deviceType });
+      const builder = new NativeBuilder(context);
+
+      const shape = [1, 1, 1, 1];
+      const input = builder.input("input", { dataType: "float32", shape });
+      const output = builder.reshape(input, shape);
+      const graph = await builder.build({ output });
+
+      const inputTensor = await context.createTensor({
+        dataType: "float32",
+        shape,
+        writable: true,
+        readable: false,
+      });
+      const outputTensor = await context.createTensor({
+        dataType: "float32",
+        shape,
+        writable: false,
+        readable: true,
+      });
+
+      const inputData = new Float32Array(1);
+      inputData[0] = 1.0;
+      context.writeTensor(inputTensor, inputData);
+
+      // Warmup
+      for (let i = 0; i < this.warmupIterations; i++) {
+        context.dispatch(graph, { input: inputTensor }, { output: outputTensor });
+        await context.readTensor(outputTensor);
+      }
+
+      // Measurement — same batched loop as _benchmarkSizeVariant
+      const batchSize = 10;
+      const batchTimes = [];
+      for (let i = 0; i < 50; i++) {
+        const batchStart = performance.now();
+        for (let j = 0; j < batchSize; j++) {
+          context.dispatch(graph, { input: inputTensor }, { output: outputTensor });
+        }
+        await context.readTensor(outputTensor);
+        const batchEnd = performance.now();
+        batchTimes.push((batchEnd - batchStart) / batchSize);
+      }
+
+      inputTensor.destroy();
+      outputTensor.destroy();
+
+      batchTimes.sort((a, b) => a - b);
+      roundMedians.push(batchTimes[Math.floor(batchTimes.length / 2)]);
+    }
+
+    // Median-of-medians across rounds
+    roundMedians.sort((a, b) => a - b);
+    return roundMedians[Math.floor(roundMedians.length / 2)];
+  }
+
+  /**
+   * Benchmark a single size variant of an operator.
+   * Builds the graph, warms up, measures, and returns timing statistics.
+   */
+  async _benchmarkSizeVariant(opType, sizeConfig, onProgress) {
+    const nativeML = getNativeML();
+    const context = await nativeML.createContext({ deviceType: this.deviceType });
+    const NativeBuilder = getNativeMLGraphBuilder();
+    const builder = new NativeBuilder(context);
+
+    if (onProgress) onProgress({ phase: "building", opType, label: sizeConfig.label });
+
+    const { outputs, inputName, inputShape, flops, bytes, outputShape, totalElements } = sizeConfig.buildGraph(builder);
+    const graph = await builder.build(outputs);
+
+    const inputTensor = await context.createTensor({
+      dataType: "float32",
+      shape: inputShape,
+      writable: true,
+      readable: false,
+    });
+    const outputTensor = await context.createTensor({
+      dataType: "float32",
+      shape: outputShape,
+      writable: false,
+      readable: true,
+    });
+
+    const inputData = new Float32Array(inputShape.reduce((a, b) => a * b, 1));
+    fillRandomBuffer(new Uint8Array(inputData.buffer));
+    context.writeTensor(inputTensor, inputData);
+
+    if (onProgress) onProgress({ phase: "warmup", opType, label: sizeConfig.label });
+    for (let i = 0; i < this.warmupIterations; i++) {
+      context.dispatch(graph, { [inputName]: inputTensor }, { output: outputTensor });
+      await context.readTensor(outputTensor);
+    }
+
+    if (onProgress) onProgress({ phase: "measuring", opType, label: sizeConfig.label });
+    const batchSize = 10;
+    const batchTimes = [];
+    for (let i = 0; i < this.measureIterations; i++) {
+      const batchStart = performance.now();
+      for (let j = 0; j < batchSize; j++) {
+        context.dispatch(graph, { [inputName]: inputTensor }, { output: outputTensor });
+      }
+      await context.readTensor(outputTensor);
+      const batchEnd = performance.now();
+      batchTimes.push((batchEnd - batchStart) / batchSize);
+    }
+
+    inputTensor.destroy();
+    outputTensor.destroy();
+
+    batchTimes.sort((a, b) => a - b);
+    const medianMs = batchTimes[Math.floor(batchTimes.length / 2)];
+    const p90Ms = batchTimes[Math.floor(batchTimes.length * 0.9)];
+    const meanMs = batchTimes.reduce((a, b) => a + b, 0) / batchTimes.length;
+    const minMs = batchTimes[0];
+
+    // Compute throughput parameters from the overhead-corrected median time.
+    const correctedMedianMs = Math.max(0.001, medianMs - this._baselineOverheadMs);
+    const correctedP90Ms = Math.max(0.001, p90Ms - this._baselineOverheadMs);
+    const correctedMeanMs = Math.max(0.001, meanMs - this._baselineOverheadMs);
+    const correctedMinMs = Math.max(0.001, minMs - this._baselineOverheadMs);
+
+    const gflops = (flops / 1e9) / (correctedMedianMs / 1000);
+    const bandwidthGBs = (bytes / 1e9) / (correctedMedianMs / 1000);
+
+    return {
+      opType,
+      label: sizeConfig.label,
+      totalElements,
+      medianMs: correctedMedianMs,
+      p90Ms: correctedP90Ms,
+      meanMs: correctedMeanMs,
+      minMs: correctedMinMs,
+      overheadMs: this._baselineOverheadMs,
+      gflops,
+      bandwidthGBs,
+      arithmeticIntensity: flops / bytes,
+      flops,
+      bytes,
+    };
+  }
+
+  /**
+   * Benchmark a single operator across all its size variants.
+   */
+  async benchmarkOp(opType, onProgress) {
+    const config = OP_BENCH_CONFIGS[opType];
+    if (!config) {
+      throw new Error(`[DAOP Bench] No benchmark config for op: ${opType}`);
+    }
+
+    const sizeResults = [];
+    for (const sizeConfig of config.sizes) {
+      const result = await this._benchmarkSizeVariant(opType, sizeConfig, onProgress);
+      sizeResults.push(result);
+
+      await new Promise(r => setTimeout(r, 30));
+    }
+
+    // Store in TimeModelDatabase for interpolation estimator
+    for (const result of sizeResults) {
+      timeModelDatabase.addDataPoint(opType, {
+        totalElements: result.totalElements,
+        medianMs: result.medianMs,
+        minMs: result.minMs,
+        p90Ms: result.p90Ms,
+        label: result.label,
+      });
+    }
+
+    if (onProgress) onProgress({ phase: "done", opType, result: sizeResults });
+
+    return sizeResults;
+  }
+
+  async benchmarkAll(onProgress) {
+    const results = [];
+    const ops = Object.keys(OP_BENCH_CONFIGS);
+
+    // Measure baseline dispatch+readTensor overhead before benchmarking operators
+    this._baselineOverheadMs = await this._measureBaselineOverhead(onProgress);
+    if (onProgress) {
+      onProgress({
+        phase: "baseline-done",
+        baselineMs: this._baselineOverheadMs,
+      });
+    }
+
+    for (let i = 0; i < ops.length; i++) {
+      const opType = ops[i];
+      if (onProgress) {
+        onProgress({
+          phase: "start",
+          opType,
+          index: i,
+          total: ops.length,
+        });
+      }
+
+      try {
+        const sizeResults = await this.benchmarkOp(opType, onProgress);
+        results.push({ opType, sizes: sizeResults });
+      } catch (err) {
+        console.error(`[DAOP Bench] Failed to benchmark ${opType}:`, err);
+        results.push({ opType, error: err.message });
+      }
+
+      await new Promise(r => setTimeout(r, 50));
+    }
+
+    // Fit polynomial curves for the interpolation estimator
+    timeModelDatabase.fitAll();
+
+    return results;
+  }
+
+  getAvailableOps() {
+    return Object.keys(OP_BENCH_CONFIGS);
+  }
+}
diff --git a/daop-illustration/src/qos/microbench/op-benchmarks.js b/daop-illustration/src/qos/microbench/op-benchmarks.js
new file mode 100644
index 0000000..53d46ab
--- /dev/null
+++ b/daop-illustration/src/qos/microbench/op-benchmarks.js
@@ -0,0 +1,577 @@
+// src/daop/qos/microbench/op-benchmarks.js
+
+function fillRandom(uint8) {
+  const CHUNK = 65536;
+  for (let offset = 0; offset < uint8.length; offset += CHUNK) {
+    const end = Math.min(offset + CHUNK, uint8.length);
+    crypto.getRandomValues(uint8.subarray(offset, end));
+  }
+}
+
+/**
+ * Each op benchmark defines multiple size variants (small/medium/large).
+ * The bench runner tests all variants and stores per-size timing data,
+ * enabling interpolation for accurate estimation on arbitrary shapes.
+ *
+ * Every buildGraph() must return { outputs, inputName, inputShape, flops,
+ * bytes, outputShape, totalElements }.
+ */
+export const OP_BENCH_CONFIGS = {
+  conv2d: {
+    opType: "conv2d",
+    sizes: [
+      {
+        label: "xs",
+        buildGraph(builder) {
+          const inputShape = [1, 16, 16, 8];
+          const filterShape = [16, 3, 3, 8];
+          const input = builder.input("input", { dataType: "float32", shape: inputShape });
+          const filterData = new Float32Array(16 * 3 * 3 * 8);
+          fillRandom(new Uint8Array(filterData.buffer));
+          const filter = builder.constant({ dataType: "float32", shape: filterShape }, filterData);
+          const output = builder.conv2d(input, filter, { inputLayout: "nhwc", filterLayout: "ohwi" });
+          const outH = 14, outW = 14;
+          const flops = 2 * outH * outW * 16 * 8 * 3 * 3;
+          const totalElements = 1 * 16 * 16 * 8;
+          const bytes = (1*16*16*8 + 16*3*3*8 + 1*outH*outW*16) * 4;
+          return { outputs: { output }, inputName: "input", inputShape, flops, bytes, outputShape: [1, outH, outW, 16], totalElements };
+        },
+      },
+      {
+        label: "small",
+        buildGraph(builder) {
+          const inputShape = [1, 32, 32, 16];
+          const filterShape = [32, 3, 3, 16];
+          const input = builder.input("input", { dataType: "float32", shape: inputShape });
+          const filterData = new Float32Array(32 * 3 * 3 * 16);
+          fillRandom(new Uint8Array(filterData.buffer));
+          const filter = builder.constant({ dataType: "float32", shape: filterShape }, filterData);
+          const output = builder.conv2d(input, filter, { inputLayout: "nhwc", filterLayout: "ohwi" });
+          const outH = 30, outW = 30;
+          const flops = 2 * outH * outW * 32 * 16 * 3 * 3;
+          const totalElements = 1 * 32 * 32 * 16;
+          const bytes = (1*32*32*16 + 32*3*3*16 + 1*outH*outW*32) * 4;
+          return { outputs: { output }, inputName: "input", inputShape, flops, bytes, outputShape: [1, outH, outW, 32], totalElements };
+        },
+      },
+      {
+        label: "medium",
+        buildGraph(builder) {
+          const inputShape = [1, 64, 64, 24];
+          const filterShape = [48, 3, 3, 24];
+          const input = builder.input("input", { dataType: "float32", shape: inputShape });
+          const filterData = new Float32Array(48 * 3 * 3 * 24);
+          fillRandom(new Uint8Array(filterData.buffer));
+          const filter = builder.constant({ dataType: "float32", shape: filterShape }, filterData);
+          const output = builder.conv2d(input, filter, { inputLayout: "nhwc", filterLayout: "ohwi" });
+          const outH = 62, outW = 62;
+          const flops = 2 * outH * outW * 48 * 24 * 3 * 3;
+          const totalElements = 1 * 64 * 64 * 24;
+          const bytes = (1*64*64*24 + 48*3*3*24 + 1*outH*outW*48) * 4;
+          return { outputs: { output }, inputName: "input", inputShape, flops, bytes, outputShape: [1, outH, outW, 48], totalElements };
+        },
+      },
+      {
+        label: "large",
+        buildGraph(builder) {
+          const inputShape = [1, 128, 128, 32];
+          const filterShape = [64, 3, 3, 32];
+          const input = builder.input("input", { dataType: "float32", shape: inputShape });
+          const filterData = new Float32Array(64 * 3 * 3 * 32);
+          fillRandom(new Uint8Array(filterData.buffer));
+          const filter = builder.constant({ dataType: "float32", shape: filterShape }, filterData);
+          const output = builder.conv2d(input, filter, { inputLayout: "nhwc", filterLayout: "ohwi" });
+          const outH = 126, outW = 126;
+          const flops = 2 * outH * outW * 64 * 32 * 3 * 3;
+          const totalElements = 1 * 128 * 128 * 32;
+          const bytes = (1*128*128*32 + 64*3*3*32 + 1*outH*outW*64) * 4;
+          return { outputs: { output }, inputName: "input", inputShape, flops, bytes, outputShape: [1, outH, outW, 64], totalElements };
+        },
+      },
+      {
+        label: "xl",
+        buildGraph(builder) {
+          const inputShape = [1, 256, 256, 32];
+          const filterShape = [64, 3, 3, 32];
+          const input = builder.input("input", { dataType: "float32", shape: inputShape });
+          const filterData = new Float32Array(64 * 3 * 3 * 32);
+          fillRandom(new Uint8Array(filterData.buffer));
+          const filter = builder.constant({ dataType: "float32", shape: filterShape }, filterData);
+          const output = builder.conv2d(input, filter, { inputLayout: "nhwc", filterLayout: "ohwi" });
+          const outH = 254, outW = 254;
+          const flops = 2 * outH * outW * 64 * 32 * 3 * 3;
+          const totalElements = 1 * 256 * 256 * 32;
+          const bytes = (1*256*256*32 + 64*3*3*32 + 1*outH*outW*64) * 4;
+          return { outputs: { output }, inputName: "input", inputShape, flops, bytes, outputShape: [1, outH, outW, 64], totalElements };
+        },
+      },
+      {
+        label: "xxl",
+        buildGraph(builder) {
+          const inputShape = [1, 512, 512, 32];
+          const filterShape = [64, 3, 3, 32];
+          const input = builder.input("input", { dataType: "float32", shape: inputShape });
+          const filterData = new Float32Array(64 * 3 * 3 * 32);
+          fillRandom(new Uint8Array(filterData.buffer));
+          const filter = builder.constant({ dataType: "float32", shape: filterShape }, filterData);
+          const output = builder.conv2d(input, filter, { inputLayout: "nhwc", filterLayout: "ohwi" });
+          const outH = 510, outW = 510;
+          const flops = 2 * outH * outW * 64 * 32 * 3 * 3;
+          const totalElements = 1 * 512 * 512 * 32;
+          const bytes = (1*512*512*32 + 64*3*3*32 + 1*outH*outW*64) * 4;
+          return { outputs: { output }, inputName: "input", inputShape, flops, bytes, outputShape: [1, outH, outW, 64], totalElements };
+        },
+      },
+    ],
+  },
+
+  add: {
+    opType: "add",
+    sizes: [
+      {
+        label: "xs",
+        buildGraph(builder) {
+          const shape = [1, 16, 16, 8];
+          const a = builder.input("input", { dataType: "float32", shape });
+          const bData = new Float32Array(1 * 16 * 16 * 8);
+          fillRandom(new Uint8Array(bData.buffer));
+          const b = builder.constant({ dataType: "float32", shape }, bData);
+          const output = builder.add(a, b);
+          const elements = 1 * 16 * 16 * 8;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "small",
+        buildGraph(builder) {
+          const shape = [1, 32, 32, 16];
+          const a = builder.input("input", { dataType: "float32", shape });
+          const bData = new Float32Array(1 * 32 * 32 * 16);
+          fillRandom(new Uint8Array(bData.buffer));
+          const b = builder.constant({ dataType: "float32", shape }, bData);
+          const output = builder.add(a, b);
+          const elements = 1 * 32 * 32 * 16;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "medium",
+        buildGraph(builder) {
+          const shape = [1, 64, 64, 32];
+          const a = builder.input("input", { dataType: "float32", shape });
+          const bData = new Float32Array(1 * 64 * 64 * 32);
+          fillRandom(new Uint8Array(bData.buffer));
+          const b = builder.constant({ dataType: "float32", shape }, bData);
+          const output = builder.add(a, b);
+          const elements = 1 * 64 * 64 * 32;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "large",
+        buildGraph(builder) {
+          const shape = [1, 128, 128, 64];
+          const a = builder.input("input", { dataType: "float32", shape });
+          const bData = new Float32Array(1 * 128 * 128 * 64);
+          fillRandom(new Uint8Array(bData.buffer));
+          const b = builder.constant({ dataType: "float32", shape }, bData);
+          const output = builder.add(a, b);
+          const elements = 1 * 128 * 128 * 64;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "xl",
+        buildGraph(builder) {
+          const shape = [1, 256, 256, 32];
+          const a = builder.input("input", { dataType: "float32", shape });
+          const bData = new Float32Array(1 * 256 * 256 * 32);
+          fillRandom(new Uint8Array(bData.buffer));
+          const b = builder.constant({ dataType: "float32", shape }, bData);
+          const output = builder.add(a, b);
+          const elements = 1 * 256 * 256 * 32;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "xxl",
+        buildGraph(builder) {
+          const shape = [1, 512, 512, 32];
+          const a = builder.input("input", { dataType: "float32", shape });
+          const bData = new Float32Array(1 * 512 * 512 * 32);
+          fillRandom(new Uint8Array(bData.buffer));
+          const b = builder.constant({ dataType: "float32", shape }, bData);
+          const output = builder.add(a, b);
+          const elements = 1 * 512 * 512 * 32;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+    ],
+  },
+
+  mul: {
+    opType: "mul",
+    sizes: [
+      {
+        label: "xs",
+        buildGraph(builder) {
+          const shape = [1, 16, 16, 8];
+          const a = builder.input("input", { dataType: "float32", shape });
+          const bData = new Float32Array(1 * 16 * 16 * 8);
+          fillRandom(new Uint8Array(bData.buffer));
+          const b = builder.constant({ dataType: "float32", shape }, bData);
+          const output = builder.mul(a, b);
+          const elements = 1 * 16 * 16 * 8;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "small",
+        buildGraph(builder) {
+          const shape = [1, 32, 32, 16];
+          const a = builder.input("input", { dataType: "float32", shape });
+          const bData = new Float32Array(1 * 32 * 32 * 16);
+          fillRandom(new Uint8Array(bData.buffer));
+          const b = builder.constant({ dataType: "float32", shape }, bData);
+          const output = builder.mul(a, b);
+          const elements = 1 * 32 * 32 * 16;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "medium",
+        buildGraph(builder) {
+          const shape = [1, 64, 64, 32];
+          const a = builder.input("input", { dataType: "float32", shape });
+          const bData = new Float32Array(1 * 64 * 64 * 32);
+          fillRandom(new Uint8Array(bData.buffer));
+          const b = builder.constant({ dataType: "float32", shape }, bData);
+          const output = builder.mul(a, b);
+          const elements = 1 * 64 * 64 * 32;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "large",
+        buildGraph(builder) {
+          const shape = [1, 128, 128, 64];
+          const a = builder.input("input", { dataType: "float32", shape });
+          const bData = new Float32Array(1 * 128 * 128 * 64);
+          fillRandom(new Uint8Array(bData.buffer));
+          const b = builder.constant({ dataType: "float32", shape }, bData);
+          const output = builder.mul(a, b);
+          const elements = 1 * 128 * 128 * 64;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "xl",
+        buildGraph(builder) {
+          const shape = [1, 256, 256, 32];
+          const a = builder.input("input", { dataType: "float32", shape });
+          const bData = new Float32Array(1 * 256 * 256 * 32);
+          fillRandom(new Uint8Array(bData.buffer));
+          const b = builder.constant({ dataType: "float32", shape }, bData);
+          const output = builder.mul(a, b);
+          const elements = 1 * 256 * 256 * 32;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "xxl",
+        buildGraph(builder) {
+          const shape = [1, 512, 512, 32];
+          const a = builder.input("input", { dataType: "float32", shape });
+          const bData = new Float32Array(1 * 512 * 512 * 32);
+          fillRandom(new Uint8Array(bData.buffer));
+          const b = builder.constant({ dataType: "float32", shape }, bData);
+          const output = builder.mul(a, b);
+          const elements = 1 * 512 * 512 * 32;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+    ],
+  },
+
+  relu: {
+    opType: "relu",
+    sizes: [
+      {
+        label: "xs",
+        buildGraph(builder) {
+          const shape = [1, 16, 16, 8];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.relu(input);
+          const elements = 1 * 16 * 16 * 8;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "small",
+        buildGraph(builder) {
+          const shape = [1, 32, 32, 16];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.relu(input);
+          const elements = 1 * 32 * 32 * 16;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "medium",
+        buildGraph(builder) {
+          const shape = [1, 64, 64, 32];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.relu(input);
+          const elements = 1 * 64 * 64 * 32;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "large",
+        buildGraph(builder) {
+          const shape = [1, 128, 128, 64];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.relu(input);
+          const elements = 1 * 128 * 128 * 64;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "xl",
+        buildGraph(builder) {
+          const shape = [1, 256, 256, 32];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.relu(input);
+          const elements = 1 * 256 * 256 * 32;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "xxl",
+        buildGraph(builder) {
+          const shape = [1, 512, 512, 32];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.relu(input);
+          const elements = 1 * 512 * 512 * 32;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+    ],
+  },
+
+  sigmoid: {
+    opType: "sigmoid",
+    sizes: [
+      {
+        label: "xs",
+        buildGraph(builder) {
+          const shape = [1, 16, 16, 8];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.sigmoid(input);
+          const elements = 1 * 16 * 16 * 8;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 4, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "small",
+        buildGraph(builder) {
+          const shape = [1, 32, 32, 16];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.sigmoid(input);
+          const elements = 1 * 32 * 32 * 16;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 4, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "medium",
+        buildGraph(builder) {
+          const shape = [1, 64, 64, 32];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.sigmoid(input);
+          const elements = 1 * 64 * 64 * 32;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 4, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "large",
+        buildGraph(builder) {
+          const shape = [1, 128, 128, 64];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.sigmoid(input);
+          const elements = 1 * 128 * 128 * 64;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 4, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "xl",
+        buildGraph(builder) {
+          const shape = [1, 256, 256, 32];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.sigmoid(input);
+          const elements = 1 * 256 * 256 * 32;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 4, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "xxl",
+        buildGraph(builder) {
+          const shape = [1, 512, 512, 32];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.sigmoid(input);
+          const elements = 1 * 512 * 512 * 32;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 4, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+    ],
+  },
+
+  clamp: {
+    opType: "clamp",
+    sizes: [
+      {
+        label: "xs",
+        buildGraph(builder) {
+          const shape = [1, 16, 16, 8];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.clamp(input, { minValue: 0, maxValue: 6 });
+          const elements = 1 * 16 * 16 * 8;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 2, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "small",
+        buildGraph(builder) {
+          const shape = [1, 32, 32, 16];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.clamp(input, { minValue: 0, maxValue: 6 });
+          const elements = 1 * 32 * 32 * 16;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 2, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "medium",
+        buildGraph(builder) {
+          const shape = [1, 64, 64, 32];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.clamp(input, { minValue: 0, maxValue: 6 });
+          const elements = 1 * 64 * 64 * 32;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 2, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "large",
+        buildGraph(builder) {
+          const shape = [1, 128, 128, 64];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.clamp(input, { minValue: 0, maxValue: 6 });
+          const elements = 1 * 128 * 128 * 64;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 2, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "xl",
+        buildGraph(builder) {
+          const shape = [1, 256, 256, 32];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.clamp(input, { minValue: 0, maxValue: 6 });
+          const elements = 1 * 256 * 256 * 32;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 2, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+      {
+        label: "xxl",
+        buildGraph(builder) {
+          const shape = [1, 512, 512, 32];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.clamp(input, { minValue: 0, maxValue: 6 });
+          const elements = 1 * 512 * 512 * 32;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 2, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements };
+        },
+      },
+    ],
+  },
+
+  averagePool2d: {
+    opType: "averagePool2d",
+    sizes: [
+      {
+        label: "xs",
+        buildGraph(builder) {
+          const shape = [1, 16, 16, 8];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.averagePool2d(input, {
+            windowDimensions: [3, 3], strides: [2, 2], padding: [1, 1, 1, 1], layout: "nhwc",
+          });
+          const outElements = 1 * 8 * 8 * 8;
+          const inElements = 1 * 16 * 16 * 8;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: outElements * 3 * 3, bytes: (inElements + outElements) * 4, outputShape: [1, 8, 8, 8], totalElements: inElements };
+        },
+      },
+      {
+        label: "small",
+        buildGraph(builder) {
+          const shape = [1, 32, 32, 16];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.averagePool2d(input, {
+            windowDimensions: [3, 3], strides: [2, 2], padding: [1, 1, 1, 1], layout: "nhwc",
+          });
+          const outElements = 1 * 16 * 16 * 16;
+          const inElements = 1 * 32 * 32 * 16;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: outElements * 3 * 3, bytes: (inElements + outElements) * 4, outputShape: [1, 16, 16, 16], totalElements: inElements };
+        },
+      },
+      {
+        label: "medium",
+        buildGraph(builder) {
+          const shape = [1, 64, 64, 32];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.averagePool2d(input, {
+            windowDimensions: [3, 3], strides: [2, 2], padding: [1, 1, 1, 1], layout: "nhwc",
+          });
+          const outElements = 1 * 32 * 32 * 32;
+          const inElements = 1 * 64 * 64 * 32;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: outElements * 3 * 3, bytes: (inElements + outElements) * 4, outputShape: [1, 32, 32, 32], totalElements: inElements };
+        },
+      },
+      {
+        label: "large",
+        buildGraph(builder) {
+          const shape = [1, 128, 128, 64];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.averagePool2d(input, {
+            windowDimensions: [3, 3], strides: [2, 2], padding: [1, 1, 1, 1], layout: "nhwc",
+          });
+          const outElements = 1 * 64 * 64 * 64;
+          const inElements = 1 * 128 * 128 * 64;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: outElements * 3 * 3, bytes: (inElements + outElements) * 4, outputShape: [1, 64, 64, 64], totalElements: inElements };
+        },
+      },
+      {
+        label: "xl",
+        buildGraph(builder) {
+          const shape = [1, 256, 256, 32];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.averagePool2d(input, {
+            windowDimensions: [3, 3], strides: [2, 2], padding: [1, 1, 1, 1], layout: "nhwc",
+          });
+          const outElements = 1 * 128 * 128 * 32;
+          const inElements = 1 * 256 * 256 * 32;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: outElements * 3 * 3, bytes: (inElements + outElements) * 4, outputShape: [1, 128, 128, 32], totalElements: inElements };
+        },
+      },
+      {
+        label: "xxl",
+        buildGraph(builder) {
+          const shape = [1, 512, 512, 32];
+          const input = builder.input("input", { dataType: "float32", shape });
+          const output = builder.averagePool2d(input, {
+            windowDimensions: [3, 3], strides: [2, 2], padding: [1, 1, 1, 1], layout: "nhwc",
+          });
+          const outElements = 1 * 256 * 256 * 32;
+          const inElements = 1 * 512 * 512 * 32;
+          return { outputs: { output }, inputName: "input", inputShape: shape, flops: outElements * 3 * 3, bytes: (inElements + outElements) * 4, outputShape: [1, 256, 256, 32], totalElements: inElements };
+        },
+      },
+    ],
+  },
+};