diff --git a/.gitignore b/.gitignore
index e69de29..bf3579a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,154 @@
+daop-illustration/docs
+.vscode
+
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+lerna-debug.log*
+
+# Diagnostic reports (https://nodejs.org/api/report.html)
+report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
+
+# Runtime data
+pids
+*.pid
+*.seed
+*.pid.lock
+
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+
+# Coverage directory used by tools like istanbul
+coverage
+*.lcov
+
+# nyc test coverage
+.nyc_output
+
+# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+
+# Bower dependency directory (https://bower.io/)
+bower_components
+
+# node-waf configuration
+.lock-wscript
+
+# Compiled binary addons (https://nodejs.org/api/addons.html)
+build/Release
+
+# Dependency directories
+node_modules/
+jspm_packages/
+
+# Snowpack dependency directory (https://snowpack.dev/)
+web_modules/
+
+# TypeScript cache
+*.tsbuildinfo
+
+# Optional npm cache directory
+.npm
+
+# Optional eslint cache
+.eslintcache
+
+# Optional stylelint cache
+.stylelintcache
+
+# Optional REPL history
+.node_repl_history
+
+# Output of 'npm pack'
+*.tgz
+
+# Yarn Integrity file
+.yarn-integrity
+
+# dotenv environment variable files
+.env
+.env.*
+!.env.example
+
+# parcel-bundler cache (https://parceljs.org/)
+.cache
+.parcel-cache
+
+# Next.js build output
+.next
+out
+
+# Nuxt.js build / generate output
+.nuxt
+dist
+
+# Gatsby files
+.cache/
+# Comment in the public line in if your project uses Gatsby and not Next.js
+# https://nextjs.org/blog/next-9-1#public-directory-support
+# public
+
+# vuepress build output
+.vuepress/dist
+
+# vuepress v2.x temp and cache directory
+.temp
+.cache
+
+# Sveltekit cache directory
+.svelte-kit/
+
+# vitepress build output
+**/.vitepress/dist
+
+# vitepress cache directory
+**/.vitepress/cache
+
+# Docusaurus cache and generated files
+.docusaurus
+
+# Serverless directories
+.serverless/
+
+# FuseBox cache
+.fusebox/
+
+# DynamoDB Local files
+.dynamodb/
+
+# Firebase cache directory
+.firebase/
+
+# TernJS port file
+.tern-port
+
+# Stores VSCode versions used for testing VSCode extensions
+.vscode-test
+
+# yarn v3
+.pnp.*
+.yarn/*
+!.yarn/patches
+!.yarn/plugins
+!.yarn/releases
+!.yarn/sdks
+!.yarn/versions
+
+# Vite logs files
+vite.config.js.timestamp-*
+vite.config.ts.timestamp-*
+
+# AI coding agents
+AGENTS.md
+.cursor/
+.cursorrules
+.github/copilot-instructions.md
+.copilot/
+.aider*
+.cline/
+.windsurf/
+.augment/
+
diff --git a/LICENSE.md b/LICENSE.md
new file mode 100644
index 0000000..d63efea
--- /dev/null
+++ b/LICENSE.md
@@ -0,0 +1,10 @@
+All Reports in this Repository are licensed by Contributors
+under the
+[W3C Software and Document License](https://www.w3.org/copyright/software-license/).
+
+Contributions to Specifications are made under the
+[W3C CLA](https://www.w3.org/community/about/agreements/cla/).
+
+Contributions to Test Suites are made under the
+[W3C 3-clause BSD License](https://www.w3.org/copyright/3-clause-bsd-license-2008/)
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..3cfd954
--- /dev/null
+++ b/README.md
@@ -0,0 +1,422 @@
+# Dynamic AI Offloading Protocol (DAOP) — Explainer
+
+> 📺 **[Live Demo](daop-illustration/)** — Browser-based illustration of `estimateQoS()` with interactive micro-benchmarks
+
+## Table of Contents
+- [Authors](#authors)
+- [Participate](#participate)
+- [Introduction](#introduction)
+- [User-Facing Problem](#user-facing-problem)
+ - [Goals](#goals)
+ - [Non-goals](#non-goals)
+- [User research](#user-research)
+- [Use Cases](#use-cases)
+ - [Adaptive Video Conferencing Background Blur](#adaptive-video-conferencing-background-blur)
+ - [Privacy-Preserving Photo Enhancement](#privacy-preserving-photo-enhancement)
+- [Proposed Approach: Model-Centric Evaluation (Callee Responsible)](#proposed-approach-model-centric-evaluation-callee-responsible)
+ - [Standardized Specification Requirements](#standardized-specification-requirements)
+ - [The `estimateQoS()` API](#the-estimateqos-api)
+ - [The "Weightless" Requirement and WebNN Spec Extensions](#the-weightless-requirement-and-webnn-spec-extensions)
+ - [Performance Tiers](#performance-tiers)
+- [Implementation Considerations (AI Stack Internals)](#implementation-considerations-ai-stack-internals)
+ - [Example Code: Adaptive Background Blur](#example-code-adaptive-background-blur)
+- [Discussion: Potential API Enhancements](#discussion-potential-api-enhancements)
+ - [1. Boolean Requirement API](#1-boolean-requirement-api)
+ - [2. QoS Change Events](#2-qos-change-events)
+- [Alternatives considered](#alternatives-considered)
+ - [Device-Centric Approach (Caller Responsible)](#device-centric-approach-caller-responsible)
+- [Accessibility, Internationalization, Privacy, and Security Considerations](#accessibility-internationalization-privacy-and-security-considerations)
+ - [Privacy](#privacy)
+ - [Security](#security)
+- [Stakeholder Feedback / Opposition](#stakeholder-feedback--opposition)
+- [References & acknowledgements](#references--acknowledgements)
+
+## Authors
+
+- Jonathan Ding (Intel)
+
+## Participate
+
+- [Issue tracker - Dynamic AI Offloading Protocol (DAOP)](https://github.com/webmachinelearning/proposals/issues/15)
+
+## Introduction
+
+This proposal addresses the challenge of efficiently offloading AI inference tasks from cloud
+servers to client devices while maintaining Quality of Service (QoS). This protocol provides a more
+effective mechanism for applications to evaluate whether a specific AI inference request is suitable
+for execution on the client side. It moves beyond static hardware specifications by enabling
+dynamic, privacy-preserving assessment of device capabilities, helping applications make informed
+offloading decisions. Throughout this document, the **Application (App)** represents the
+decision-making logic, which may reside on the client device (e.g., in a web browser) or on a cloud
+server.
+
+## User-Facing Problem
+
+Modern web applications increasingly rely on AI, but running these models solely in the cloud can be
+expensive and introduce latency. Conversely, running them on client devices is difficult because
+developers cannot easily determine if a target device—given its specific CPU, GPU, and NPU
+capabilities—can host a specific AI model without compromising performance or user privacy.
+
+### Goals
+
+- Standardize a mechanism for identifying device performance buckets for AI tasks.
+- Enable efficient offloading of AI inference from cloud to client devices.
+- Maintain high Quality of Service (QoS) during offloading.
+- Protect user privacy by avoiding detailed hardware fingerprinting.
+- Provide a future-proof abstraction that works across varying hardware (CPU, GPU, NPU).
+- Define a protocol that works regardless of whether the decision logic resides in the App's cloud
+ backend or client frontend.
+
+### Non-goals
+
+- Defining the specific wire protocol for model transmission (this focuses on the
+ negotiation/estimation).
+- Mandatory implementation of any specific inference engine.
+- Solving all AI workload types in version 1 (e.g., extremely large LLMs with dynamic shapes).
+
+## User research
+
+[Placeholder for user research findings. Initial feedback from ISVs and web developers indicates a
+strong need for predictable client-side AI performance metrics.]
+
+## Use Cases
+
+### Adaptive Video Conferencing Background Blur
+
+A video conferencing application wants to offload background blur processing to the user's laptop to
+save server costs and improve privacy, but only if the device can maintain a stable 30fps.
+
+1. **Inquiry**: The application builds a weightless graph of its blur model and calls
+ `context.estimateQoS()`.
+2. **Estimation**: The device evaluates its capability by integrating a wide range of local
+ intelligence: the AI stack software (including specialized drivers and runtimes), the specific
+ hardware accelerators, current system state (thermal state, battery level, power mode), and
+ environmental configurations that might affect performance.
+3. **Decision**:
+ - If the `performanceTier` meets the application's requirements (e.g., "excellent", "good", or
+ "fair" for real-time video), the application logic decides to download the full weights, bind
+ them, and run locally.
+ - Otherwise (e.g., "slow", "very-slow", "poor"), it falls back to cloud-based processing.
+
+### Privacy-Preserving Photo Enhancement
+
+A photo editing web app wants to run complex enhancement filters using the user's mobile NPU to
+reduce latency and maintain privacy.
+
+1. **Inquiry**: The application provides a weightless description of the enhancement model to
+ `context.estimateQoS()`, including specific target resolutions.
+2. **Estimation**: The device evaluates its capability by considering the current hardware and
+ software environment, including AI stack optimizations, hardware accelerators (such as NPU), and
+ overall system state (e.g., battery level, power mode, thermal conditions).
+3. **Decision**: The application enables the "High Quality" filter locally if the performance tier
+ meets the requirements.
+
+## Proposed Approach: Model-Centric Evaluation (Callee Responsible)
+
+The preferred approach is **Model-Centric**, where the device (the callee, i.e., the responder to
+the AI request) is responsible for evaluating its own ability to handle the requested AI workload.
+In this model, the **Application** (the caller) sends a **Model Description Inquiry**—a weightless
+description of the AI model and input characteristics—to the device. The device, as the callee, uses
+its local knowledge of hardware, current system state, software environment, and implementation
+details to estimate the expected Quality of Service (QoS) for the given task.
+
+```mermaid
+sequenceDiagram
+ participant App as App
+ participant Device as Device
+ participant Cloud as Cloud LLM
+ App->>Device: Weightless Model Description & Input Metadata
+ Note over Device: UA/AI Stack runs Local Estimation (Internal: Static / Dry Run / Black Box)
+ Device-->>App: Return QoS Bucket (Performance Tier)
+ Note over App: App makes Decision (Compare QoS vs Requirement)
+ alt App Decides: Offload
+ App->>Device: Bind Weights & Run Locally
+ else App Decides: Cloud
+ App->>Cloud: Execute on Cloud
+ end
+```
+
+This "callee responsible" design ensures that sensitive device details remain private, as only broad
+performance tiers are reported back to the application. It also allows the device to make the most
+accurate estimation possible, considering real-time factors like thermal state, background load, and
+hardware-specific optimizations that are not visible to the caller (whether the caller logic is in
+the cloud or on the client). By shifting responsibility for QoS evaluation to the callee, the
+protocol achieves both privacy protection and more reliable offloading decisions.
+
+### Standardized Specification Requirements
+
+To enable consistent cross-vendor estimation, the protocol requires standardization of the following
+inputs and outputs:
+
+1. **Weightless Model Description**:
+ - Based on the **WebNN Graph topology**.
+ - Includes **Lazy Bind Constants**: Placeholders for weights (via descriptors and labels) that
+ enable "weightless" graph construction and estimation without downloading large parameter
+ files.
+ - **Dynamic vs. Static Graph Expression**: This proposal currently uses the dynamic WebNN
+ `MLGraphBuilder` API to construct the weightless graph at runtime. An alternative approach is
+ to express the graph topology statically using a declarative format. The
+ [webnn-graph][ref-webnn-graph] project defines a WebNN-oriented graph DSL (`.webnn` format)
+ that separates the graph definition (structure only, no tensor data) from a weights manifest
+ and binary weights file. This static representation is human-readable,
+ diffable, and enables tooling such as ONNX-to-WebNN conversion and graph visualization. A
+ future version of DAOP could accept either a dynamically built `MLGraph` or a statically
+ defined `.webnn` graph description as input to `estimateQoS()`.
+2. **Model Metadata (Optional)**:
+ - Information about the weights that can significantly impact performance, such as **sparsity**
+ or specific quantization schemes.
+3. **Input Characterization**:
+ - The **shape** and **size** of the input data (e.g., image resolution, sequence length).
+4. **QoS Output**:
+ - Unified **Performance Tiers** (e.g., "excellent", "good", "fair", "moderate", "slow",
+ "very-slow", "poor") to ensure hardware abstraction and prevent privacy-leaking through precise
+ latency metrics.
+
+### The `estimateQoS()` API
+
+We proposes a core API for performance negotiation:
+
+```webidl
+dictionary MLQoSReport {
+ MLPerformanceTier performanceTier;
+};
+
+partial interface MLContext {
+ Promise estimateQoS(MLGraph graph, optional MLQoSOptions options);
+};
+
+dictionary MLQoSOptions {
+ // Input characteristics
+ record inputDescriptors;
+
+ // Weights characteristics (Optional)
+ boolean weightsSparsity = false;
+};
+```
+
+### The "Weightless" Requirement and WebNN Spec Extensions
+
+To maximize the benefits of DAOP, the underlying WebNN specification should support a **weightless
+build mode**. Currently, WebNN's `constant()` API typically requires an `ArrayBufferView`, which
+implies the weights must already be present in memory.
+
+We propose that WebNN builders be extended to allow:
+
+1. **Weightless Constants**: Defining constants using only their descriptor (shape, type) and a
+ `label` for late-binding.
+2. **Lazy / Explicit Binding**: Separating the graph topology definition from the binding of heavy
+ weight data. By using an explicit `bindConstants()` (or similar) method, we achieve **lazy
+ binding** where weights are only provided and processed after the offloading decision is made.
+ This design aligns with the proposal in
+ [webnn#901][ref-webnn-901], which addresses the same
+ fundamental problem from a memory-efficiency perspective. That proposal allows
+ `builder.constant()` to accept just an `MLOperandDescriptor` (shape and type, no
+ `ArrayBufferView`), producing a "hollow constant" handle. After `builder.build()`, weights are
+ streamed to device memory one at a time via `graph.setConstantData(constantOperand, dataBuffer)`,
+ reducing peak CPU memory from ~3× model size to ~1×. Our `bindConstants()` API could be
+ integrated with or replaced by this `setConstantData()` mechanism in a future version of the
+ spec, combining the benefits of weightless QoS estimation with memory-efficient weight loading.
+
+### Performance Tiers
+
+The `estimateQoS()` API returns a `performanceTier` string that represents the device's estimated
+ability to execute the given graph. The tiers are designed to be broad enough to prevent hardware
+fingerprinting while still enabling meaningful offloading decisions:
+
+| Tier | Indicative Latency | Interpretation |
+| ------------- | ------------------ | -------------------------------------- |
+| `"excellent"` | < 16 ms | Real-time (60 fps frame budget) |
+| `"good"` | < 100 ms | Interactive responsiveness |
+| `"fair"` | < 1 s | Responsive for non-real-time tasks |
+| `"moderate"` | < 10 s | Tolerable for batch or one-shot tasks |
+| `"slow"` | < 30 s | Noticeable wait |
+| `"very-slow"` | < 60 s | Long wait |
+| `"poor"` | ≥ 60 s | Likely unacceptable for most use cases |
+
+The exact tier boundaries are **implementation-defined** and may be adjusted. The key requirement is
+that tiers remain coarse enough to avoid fingerprinting while fine enough for applications to make
+useful offloading decisions.
+
+Applications choose their own acceptance threshold based on use-case requirements. For example, a
+video conferencing blur might require "good" or better, while a one-shot photo enhancement might
+accept "moderate".
+
+## Implementation Considerations (AI Stack Internals)
+
+The underlying system (e.g., User Agent or WebNN implementation) can use several strategies to
+estimate performance for the weightless graph. **These strategies are internal implementation
+details of the AI stack and are transparent to the application developer.** It is important to note
+that these strategies are **not part of the DAOP specification or proposal**; they are discussed
+here only to illustrate possible implementation choices and feasibility. Common techniques include:
+
+1. Static Cost Model: Analytical formulas (e.g., Roofline model) or lookup tables to predict
+ operator costs based on descriptors.
+2. Dry Run: Fast simulation of the inference engine's execution path without heavy computation or
+ weights.
+3. Black Box Profiling: Running the actual model topology using dummy/zero weights to measure
+ timing.
+
+For a concrete demonstration of these techniques, see the [daop-illustration](./daop-illustration)
+project and its [implementation details](./daop-illustration/IMPLEMENTATION.md). It showcases a
+**Static Cost Model** strategy that employs **log-log polynomial interpolation** of
+measured operator latencies derived from per-operator micro-benchmarks. By fitting degree-1
+polynomials (power-law curves) to latency data across multiple tensor sizes in logarithmic space,
+with a left-side clamp to handle small-size noise, the implementation
+captures performance characteristics common in GPU-accelerated workloads. This
+illustration uses a simplified approach for demonstration purposes; production implementations could
+employ other strategies such as Roofline models, learned cost models,
+hardware-specific operator libraries, or ML-based performance predictors. These internal metrics
+(regression coefficients, estimated throughput) are **internal implementation
+details** of the AI stack and are never exposed directly to the web application.
+
+To prevent hardware fingerprinting, the raw estimation results are normalized into broad
+**Performance Tiers** before being returned to the web application. The application logic remains
+decoupled from the hardware-specific details.
+
+### Example Code: Adaptive Background Blur
+
+The following example shows how an application might use the API to decide whether to offload.
+
+```js
+// 1. Initialize WebNN context
+const context = await navigator.ml.createContext({ deviceType: "npu" });
+const builder = new MLGraphBuilder(context);
+
+// 2. Build a WEIGHTLESS graph
+const weights = builder.constant({
+ shape: [3, 3, 64, 64],
+ dataType: "float32",
+ label: "modelWeights", // Identity for late-binding meta-data
+});
+
+const input = builder.input("input", { shape: [1, 3, 224, 224], dataType: "float32" });
+const output = builder.conv2d(input, weights);
+const graph = builder.build();
+
+// 3. DAOP Estimation: Providing input characteristics
+const qos = await context.estimateQoS(graph, {
+ inputDescriptors: {
+ input: { shape: [1, 3, 720, 1280], dataType: "float32" },
+ },
+});
+
+// Check if the performance tier meets our requirements
+const acceptable = ["excellent", "good", "fair", "moderate"];
+if (acceptable.includes(qos.performanceTier)) {
+ const realWeights = await fetch("model-weights.bin").then((r) => r.arrayBuffer());
+
+ // 4. Bind real data (using the label) explicitly.
+ await context.bindConstants(graph, {
+ modelWeights: realWeights,
+ });
+
+ // 5. Subsequent compute calls only need dynamic inputs
+ const results = await context.compute(graph, {
+ input: cameraFrame,
+ });
+} else {
+ runCloudInference();
+}
+```
+
+## Discussion: Potential API Enhancements
+
+We are considering several additions to the API to better support adaptive applications:
+
+### 1. Boolean Requirement API
+
+Instead of returning a bucket, the application could provide its specific requirements (e.g.,
+minimum FPS or maximum latency) and receiving a simple boolean "can meet requirement" response.
+
+```webidl
+partial interface MLContext {
+ Promise meetsRequirement(MLGraph graph, MLPerformanceTier requiredTier, optional MLQoSOptions options);
+};
+```
+
+### 2. QoS Change Events
+
+AI performance can change dynamically due to thermal throttling, battery state, or background system
+load. An event-driven mechanism would allow applications to react when the device's ability to meet
+a specific QoS requirement changes.
+
+```webidl
+interface MLQoSChangeEvent : Event {
+ readonly attribute boolean meetsRequirement;
+};
+
+// Application listens for changes in offload capability
+const monitor = context.createQoSMonitor(graph, "excellent");
+monitor.onchange = (e) => {
+ if (!e.meetsRequirement) {
+ console.log("Performance dropped, switching back to cloud.");
+ switchToCloud();
+ } else {
+ console.log("Performance restored, offloading to local.");
+ switchToLocal();
+ }
+};
+```
+
+## Alternatives considered
+
+### Device-Centric Approach (Caller Responsible)
+
+In this alternative, the Application acts as the central intelligence. It collects raw hardware
+specifications and telemetry from the device and makes the offloading decision.
+
+```mermaid
+sequenceDiagram
+ participant App as App
+ participant Device as Device
+ participant Cloud as Cloud LLM
+ App->>Device: Request Device Description
+ Device-->>App: Return Spec (CPU, GPU, NPU, Mem, Microbenchmarks...)
+ Note over App: App estimates QoS (Mapping H/W Spec -> AI Performance)
+ Note over App: App makes Decision (Compare QoS vs Requirement)
+ alt App Decides: Offload
+ App->>Device: Execute locally
+ else App Decides: Cloud
+ App->>Cloud: Execute on Cloud
+ end
+```
+
+- **Process**: Device returns specific hardware details (CPU model, GPU frequency, NPU TOPs,
+ micro-benchmark results) -> Application estimates QoS -> Application decides to offload.
+- **Why rejected**:
+ - **Privacy Risks**: Exposes detailed hardware fingerprints and potentially sensitive system
+ telemetry to remote servers.
+ - **Estimation Complexity**: It is extremely difficult for a remote server to accurately map raw
+ hardware specs to actual inference performance across a fragmented device ecosystem (ignoring
+ local drivers, thermal state, and OS-level optimizations).
+ - **Scalability**: Requires maintaining and constantly updating an impractical global database
+ mapping every possible device configuration to AI performance profiles.
+
+## Accessibility, Internationalization, Privacy, and Security Considerations
+
+### Privacy
+
+The Model-Centric approach significantly enhances privacy by:
+
+- Avoiding hardware fingerprinting.
+- Returning broad **Performance Tiers** rather than exact hardware identifiers or precise latency
+ metrics.
+- Enabling local processing of sensitive user data (like photos or video) that would otherwise need
+ to be sent to the cloud.
+
+### Security
+
+- Weightless model descriptions should be validated to prevent malicious topologies from causing
+ resource exhaustion (DoS) during the estimation phase.
+
+## Stakeholder Feedback / Opposition
+
+- [Implementors/ISVs]: Initial interests from several ISVs, to be documented.
+
+## References & acknowledgements
+
+Many thanks for valuable feedback and advice from the contributors to the WebNN and Web Machine
+Learning Working Group.
+
+[ref-webnn-graph]: https://github.com/rustnn/webnn-graph
+[ref-webnn-901]: https://github.com/webmachinelearning/webnn/issues/901
diff --git a/daop-illustration/IMPLEMENTATION.md b/daop-illustration/IMPLEMENTATION.md
new file mode 100644
index 0000000..5169397
--- /dev/null
+++ b/daop-illustration/IMPLEMENTATION.md
@@ -0,0 +1,122 @@
+# DAOP Illustration: Reference Implementation
+
+> **Note on Illustration Purposes**: This implementation is provided for **illustration purposes** to demonstrate the feasibility of the `estimateQoS()` API. It uses a simplified log-log polynomial interpolation approach. A production implementation could employ other strategies — such as Roofline models, learned cost models, hardware-specific operator libraries, or ML-based performance predictors — depending on the target platform and accuracy requirements.
+
+## 1. Overview
+This document describes the implementation strategy for the `estimateQoS()` API in the DAOP (Dynamic AI Offloading Protocol) illustration. The estimation strategy uses **log-log polynomial interpolation** based on operator-level micro-benchmarks.
+
+The internals of these estimations are entirely opaque to the application. The application receives only a high-level performance tier (e.g., "excellent" or "fair"), allowing for hardware-agnostic offloading decisions without exposing raw timing data or device-specific characteristics.
+
+## 2. Performance Tiers
+The implementation categorizes the estimated graph latency into one of seven performance tiers. These tiers correspond to typical user experience expectations:
+
+| Tier | Latency Threshold | Description |
+|------|-------------------|-------------|
+| excellent | < 16ms | Real-time (60fps) performance |
+| good | < 100ms | Interactive / seamless UI |
+| fair | < 1s | Responsive but noticeable |
+| moderate | < 10s | Tolerable for background tasks |
+| slow | < 30s | Significant wait time |
+| very-slow | < 60s | Approaching timeout limits |
+| poor | ≥ 60s | Unacceptable performance |
+
+## 3. Estimation Strategy: Log-Log Polynomial Interpolation
+Empirical observations show that operator execution time on modern hardware often follows a power-law relationship with the total number of processed elements:
+`time ≈ a · (totalElements)^b`
+
+By taking the logarithm of both sides, this relationship becomes linear in log-log space:
+`log(time) = log(a) + b · log(totalElements)`
+
+This implementation fits a **degree-1 polynomial** (linear) in log-log space:
+`log(time) = c0 + c1 · log(n)`
+
+The coefficients (`c0`, `c1`) are found using least-squares normal equations, solved via **Gaussian elimination with partial pivoting**.
+
+#### Small-Size Noise Handling (Clamp)
+
+At small input sizes, GPU dispatch overhead can dominate actual computation time, producing a U-shaped curve in log-log space — small inputs appear slower than medium ones. Left unchecked, a polynomial fit on this data extrapolates catastrophically for very small inputs.
+
+To address this, the fitter applies a **left-side clamp**:
+1. Find the measured point with the **minimum medianMs** (the "valley" of the U).
+2. **Clamp** all points to the left of it to that minimum value.
+3. **Fit** the degree-1 polynomial using only points from the minimum onward — clamped points are excluded from the fit.
+4. At **prediction time**, any input size at or below the minimum-point's size returns the flat clamp value instead of polynomial extrapolation.
+
+This ensures monotonic (non-decreasing) predictions: small inputs never produce absurdly high time estimates.
+
+**Prediction Process:**
+1. Calculate the natural log of the input element count: `ln_n = log(totalElements)`.
+2. If `ln_n` is at or below the clamp boundary, return the clamped floor value directly.
+3. Otherwise, evaluate the fitted polynomial: `ln_time = polyEval(coeffs, ln_n)`.
+4. Revert to time domain: `estimatedTime = exp(ln_time)`.
+
+If a polynomial has not yet been fitted for an operator (e.g., during the first calibration run), the system falls back to piecewise linear interpolation on the raw benchmarked data points.
+
+## 4. Shape-Aware Micro-Benchmarks
+To populate the estimation models, the implementation runs a suite of micro-benchmarks for supported operators across six size variants.
+
+### Size Variants
+Benchmarking across multiple sizes captures the "utilization curve" where small tensors may not fully saturate compute units.
+
+| Variant | Representative Shape | Total Elements |
+|---------|----------------------|----------------|
+| xs | [1, 16, 16, 8] | 2,048 |
+| small | [1, 32, 32, 16] | 16,384 |
+| medium | [1, 64, 64, 24] | 98,304 |
+| large | [1, 128, 128, 32] | 524,288 |
+| xl | [1, 256, 256, 32] | 2,097,152 |
+| xxl | [1, 512, 512, 32] | 8,388,608 |
+
+### Benchmark Methodology
+The system benchmarks 7 operator types: `conv2d`, `add`, `mul`, `relu`, `sigmoid`, `clamp`, and `averagePool2d`.
+
+1. **Baseline Overhead Subtraction**: Before benchmarking real operators, the runner measures the dispatch + readTensor overhead using a trivial (reshape) graph at a small fixed size. To improve accuracy, this measurement is repeated across 3 independent rounds (each with 50 batched iterations), and the median-of-medians is used. This baseline is subtracted from each operator's measured time to isolate pure compute cost.
+2. **Amortized Readback**: The runner dispatches 10 operations (batchSize=10) before a single `readTensor` call, further reducing per-dispatch synchronization overhead.
+3. **Iterations**: Each benchmark performs 5 warmup runs followed by 30 timed iterations to find the median latency.
+4. **Storage**: Raw data points `{ totalElements, medianMs }` are stored in the `TimeModelDatabase`.
+
+## 5. End-to-End Estimation Flow
+
+### Benchmark Phase (Offline/Calibration)
+1. Measure baseline dispatch + readTensor overhead using a trivial graph.
+2. Iterate through supported operator types and size variants.
+3. Execute benchmarks, subtract baseline overhead, and record median latencies.
+4. Store results in `TimeModelDatabase` and fit log-log polynomials.
+5. Persist models to `localStorage` under the key `"daop_time_models"`.
+
+### Estimation Phase (Online)
+1. **Traverse Graph**: Walk the IR of the weightless graph.
+2. **Sum Node Latencies**: For each node, look up the operator in `TimeModelDatabase`.
+ - Call `predict(opType, inputElements)` to get the estimated time.
+3. **Add Overhead**: Add a graph dispatch overhead: `0.5 + numNodes * 0.005 ms`.
+4. **Assign Tier**: Map the total estimated latency to a performance tier string.
+
+## 6. Project Structure
+
+```
+daop-illustration/
+ src/
+ index.js # Public API entry point
+ polyfill.js # WebNN feature detection and DAOP initialization
+ daop-context.js # Wraps native MLContext; delegates to interpolation estimator
+ daop-graph-builder.js # IR graph builder supporting weightless constants
+ daop-graph.js # IR graph representation and Mermaid export
+ ir/
+ graph-ir.js # Core IR definitions (TensorDesc, IRNode)
+ shape-inference.js # Shape inference logic for operators
+ qos/
+ estimate-qos-interp.js # Interpolation-based QoS estimator
+ interpolation/
+ poly-fit.js # Polynomial fitting (Normal Equations, Gaussian)
+ time-model.js # TimeModelDatabase (stores points, fits, predicts)
+ microbench/
+ bench-runner.js # Hardware-specific benchmark execution engine
+ op-benchmarks.js # Operator configurations (xs, small, medium, large, xl, xxl)
+ examples/
+ background-blur/
+ background-blur-demo.html # Interactive two-column demo page
+ selfie-model.js # Model graph definition + weight loader
+ blur-renderer.js # Image processing + blur compositing
+ meeting.jpg # Sample input image
+```
+
diff --git a/daop-illustration/LICENSE b/daop-illustration/LICENSE
new file mode 100644
index 0000000..261eeb9
--- /dev/null
+++ b/daop-illustration/LICENSE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/daop-illustration/README.md b/daop-illustration/README.md
new file mode 100644
index 0000000..3c9ae3f
--- /dev/null
+++ b/daop-illustration/README.md
@@ -0,0 +1,78 @@
+# DAOP Illustration
+
+A working JavaScript illustration of the
+[Dynamic AI Offloading Protocol (DAOP)](../explainer.md) using the WebNN API.
+
+This project demonstrates how the proposed `estimateQoS()` API can work in practice:
+an application builds a **weightless** computation graph, estimates performance via
+operator micro-benchmarks and polynomial interpolation, decides whether to run locally
+or offload to the cloud, and — if local — downloads weights and executes inference.
+
+## Prerequisites
+
+- A WebNN-capable browser (e.g., Chrome Canary with WebNN flags enabled).
+ See [installation guide](https://webnn.io/en/learn/get-started/installation).
+- Node.js (for the static file server).
+
+## Running the Demo
+
+```bash
+npm install
+npm start # starts http://localhost:8080
+```
+
+Open `http://localhost:8080/examples/background-blur/background-blur-demo.html` in the WebNN-capable
+browser.
+
+### Background Blur Demo
+
+The demo applies AI-powered background blur to a meeting photo using the MediaPipe Selfie
+Segmentation model. The workflow follows the DAOP protocol:
+
+1. **Build weightless graph** — the model topology is recorded without downloading weights.
+2. **Estimate QoS** — per-operator micro-benchmarks and interpolation produce a performance tier.
+3. **Offloading decision** — if the tier is acceptable (< 10 s), run locally; otherwise
+ offload to cloud.
+4. **Execute** — download weights, compile the native WebNN graph, run inference, apply blur.
+
+The right column of the demo exposes DAOP internals: operator benchmarks, estimation
+curves, computation graph visualization, and a timing comparison between estimated and
+actual latency.
+
+## Implementation Details
+
+See [IMPLEMENTATION.md](./IMPLEMENTATION.md) for a detailed description of the estimation
+strategy, including shape-aware micro-benchmarks, polynomial interpolation, and the 7-tier
+performance classification.
+
+## Project Structure
+
+```
+src/ # DAOP library (reusable)
+ index.js # Public API
+ polyfill.js # WebNN detection + DAOP initialization
+ daop-context.js # Wraps native MLContext with estimateQoS / bindConstants / compute
+ daop-graph-builder.js # IR graph builder (weightless constants)
+ daop-graph.js # IR graph + Mermaid visualization
+ ir/
+ graph-ir.js # TensorDesc, IROperand, IRNode, IRGraph
+ shape-inference.js # Shape inference for conv2d, pool, resample, etc.
+ qos/
+ estimate-qos-interp.js # Interpolation-based QoS estimation + tier assignment
+ interpolation/
+ poly-fit.js # Polynomial fitting (Normal Equations, Gaussian)
+ time-model.js # TimeModelDatabase (stores points, fits, predicts)
+ microbench/
+ bench-runner.js # Multi-size benchmark runner
+ op-benchmarks.js # Per-op benchmark configurations (xs–xxl)
+examples/
+ background-blur/ # Background blur demo (self-contained)
+ background-blur-demo.html # Interactive two-column demo page
+ selfie-model.js # Model graph definition + weight loader
+ blur-renderer.js # Image processing + blur compositing
+ meeting.jpg # Sample input image
+```
+
+## License
+
+Apache 2.0
diff --git a/daop-illustration/demo-server.js b/daop-illustration/demo-server.js
new file mode 100644
index 0000000..34f57bc
--- /dev/null
+++ b/daop-illustration/demo-server.js
@@ -0,0 +1,63 @@
+import http from "http";
+import fs from "fs";
+import path from "path";
+import { fileURLToPath } from "url";
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+const PORT = 8080;
+
+const MIME_TYPES = {
+ ".html": "text/html",
+ ".js": "text/javascript",
+ ".css": "text/css",
+ ".json": "application/json",
+ ".png": "image/png",
+ ".jpg": "image/jpg",
+ ".jpeg": "image/jpeg",
+ ".gif": "image/gif",
+ ".svg": "image/svg+xml",
+ ".webp": "image/webp",
+ ".wasm": "application/wasm",
+};
+
+const server = http.createServer((req, res) => {
+ console.log(`${req.method} ${req.url}`);
+
+ // Default to the background blur demo
+ let filePath = req.url === "/" ? "/examples/background-blur/background-blur-demo.html" : req.url;
+
+ // Remove query strings or hashes if present
+ filePath = filePath.split("?")[0].split("#")[0];
+
+ // Ensure we don't try to access files outside the directory
+ // Remove leading slash for path.join to behave consistently
+ const safePath = path.normalize(filePath).replace(/^[\/\\]+/, "");
+ let fullPath = path.join(__dirname, safePath);
+
+ console.log(`Serving: ${fullPath}`);
+
+ const extname = path.extname(fullPath);
+ let contentType = MIME_TYPES[extname] || "application/octet-stream";
+
+ fs.readFile(fullPath, (error, content) => {
+ if (error) {
+ if (error.code === "ENOENT") {
+ res.writeHead(404);
+ res.end("File not found");
+ } else {
+ res.writeHead(500);
+ res.end(`Server error: ${error.code}`);
+ }
+ } else {
+ res.writeHead(200, { "Content-Type": contentType });
+ res.end(content, "utf-8");
+ }
+ });
+});
+
+server.listen(PORT, () => {
+ console.log(`Server running at http://localhost:${PORT}/`);
+ console.log(`Demo page: http://localhost:${PORT}/examples/background-blur/background-blur-demo.html`);
+});
diff --git a/daop-illustration/examples/background-blur/background-blur-demo.html b/daop-illustration/examples/background-blur/background-blur-demo.html
new file mode 100644
index 0000000..06497a4
--- /dev/null
+++ b/daop-illustration/examples/background-blur/background-blur-demo.html
@@ -0,0 +1,641 @@
+
+
+
+
+
+ DAOP: Background Blur Demo
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Web Application
+
+
+
Background Blur with WebNN
+
This demo applies AI-powered background blur using the WebNN API. The application only sees a high-level performance tier — all estimation internals are opaque.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Under the Hood — DAOP Internals
+
One possible implementation strategy (for illustration only)
+
+
+
+
+
⚙️ Operator Micro-benchmarks
+
+
+
+
+
+
+
+
+
+
+
+
+
+
1. Computation Graph (Weightless)
+
+
+
The graph structure is built without weights to allow instant analysis.