diff --git a/.gitignore b/.gitignore index e69de29..bf3579a 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1,154 @@ +daop-illustration/docs +.vscode + +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +lerna-debug.log* + +# Diagnostic reports (https://nodejs.org/api/report.html) +report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json + +# Runtime data +pids +*.pid +*.seed +*.pid.lock + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage +*.lcov + +# nyc test coverage +.nyc_output + +# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# Bower dependency directory (https://bower.io/) +bower_components + +# node-waf configuration +.lock-wscript + +# Compiled binary addons (https://nodejs.org/api/addons.html) +build/Release + +# Dependency directories +node_modules/ +jspm_packages/ + +# Snowpack dependency directory (https://snowpack.dev/) +web_modules/ + +# TypeScript cache +*.tsbuildinfo + +# Optional npm cache directory +.npm + +# Optional eslint cache +.eslintcache + +# Optional stylelint cache +.stylelintcache + +# Optional REPL history +.node_repl_history + +# Output of 'npm pack' +*.tgz + +# Yarn Integrity file +.yarn-integrity + +# dotenv environment variable files +.env +.env.* +!.env.example + +# parcel-bundler cache (https://parceljs.org/) +.cache +.parcel-cache + +# Next.js build output +.next +out + +# Nuxt.js build / generate output +.nuxt +dist + +# Gatsby files +.cache/ +# Comment in the public line in if your project uses Gatsby and not Next.js +# https://nextjs.org/blog/next-9-1#public-directory-support +# public + +# vuepress build output +.vuepress/dist + +# vuepress v2.x temp and cache directory +.temp +.cache + +# Sveltekit cache directory +.svelte-kit/ + +# vitepress build output +**/.vitepress/dist + +# vitepress cache directory +**/.vitepress/cache + +# Docusaurus cache and generated files +.docusaurus + +# Serverless directories +.serverless/ + +# FuseBox cache +.fusebox/ + +# DynamoDB Local files +.dynamodb/ + +# Firebase cache directory +.firebase/ + +# TernJS port file +.tern-port + +# Stores VSCode versions used for testing VSCode extensions +.vscode-test + +# yarn v3 +.pnp.* +.yarn/* +!.yarn/patches +!.yarn/plugins +!.yarn/releases +!.yarn/sdks +!.yarn/versions + +# Vite logs files +vite.config.js.timestamp-* +vite.config.ts.timestamp-* + +# AI coding agents +AGENTS.md +.cursor/ +.cursorrules +.github/copilot-instructions.md +.copilot/ +.aider* +.cline/ +.windsurf/ +.augment/ + diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..d63efea --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,10 @@ +All Reports in this Repository are licensed by Contributors +under the +[W3C Software and Document License](https://www.w3.org/copyright/software-license/). + +Contributions to Specifications are made under the +[W3C CLA](https://www.w3.org/community/about/agreements/cla/). + +Contributions to Test Suites are made under the +[W3C 3-clause BSD License](https://www.w3.org/copyright/3-clause-bsd-license-2008/) + diff --git a/README.md b/README.md new file mode 100644 index 0000000..3cfd954 --- /dev/null +++ b/README.md @@ -0,0 +1,422 @@ +# Dynamic AI Offloading Protocol (DAOP) — Explainer + +> 📺 **[Live Demo](daop-illustration/)** — Browser-based illustration of `estimateQoS()` with interactive micro-benchmarks + +## Table of Contents +- [Authors](#authors) +- [Participate](#participate) +- [Introduction](#introduction) +- [User-Facing Problem](#user-facing-problem) + - [Goals](#goals) + - [Non-goals](#non-goals) +- [User research](#user-research) +- [Use Cases](#use-cases) + - [Adaptive Video Conferencing Background Blur](#adaptive-video-conferencing-background-blur) + - [Privacy-Preserving Photo Enhancement](#privacy-preserving-photo-enhancement) +- [Proposed Approach: Model-Centric Evaluation (Callee Responsible)](#proposed-approach-model-centric-evaluation-callee-responsible) + - [Standardized Specification Requirements](#standardized-specification-requirements) + - [The `estimateQoS()` API](#the-estimateqos-api) + - [The "Weightless" Requirement and WebNN Spec Extensions](#the-weightless-requirement-and-webnn-spec-extensions) + - [Performance Tiers](#performance-tiers) +- [Implementation Considerations (AI Stack Internals)](#implementation-considerations-ai-stack-internals) + - [Example Code: Adaptive Background Blur](#example-code-adaptive-background-blur) +- [Discussion: Potential API Enhancements](#discussion-potential-api-enhancements) + - [1. Boolean Requirement API](#1-boolean-requirement-api) + - [2. QoS Change Events](#2-qos-change-events) +- [Alternatives considered](#alternatives-considered) + - [Device-Centric Approach (Caller Responsible)](#device-centric-approach-caller-responsible) +- [Accessibility, Internationalization, Privacy, and Security Considerations](#accessibility-internationalization-privacy-and-security-considerations) + - [Privacy](#privacy) + - [Security](#security) +- [Stakeholder Feedback / Opposition](#stakeholder-feedback--opposition) +- [References & acknowledgements](#references--acknowledgements) + +## Authors + +- Jonathan Ding (Intel) + +## Participate + +- [Issue tracker - Dynamic AI Offloading Protocol (DAOP)](https://github.com/webmachinelearning/proposals/issues/15) + +## Introduction + +This proposal addresses the challenge of efficiently offloading AI inference tasks from cloud +servers to client devices while maintaining Quality of Service (QoS). This protocol provides a more +effective mechanism for applications to evaluate whether a specific AI inference request is suitable +for execution on the client side. It moves beyond static hardware specifications by enabling +dynamic, privacy-preserving assessment of device capabilities, helping applications make informed +offloading decisions. Throughout this document, the **Application (App)** represents the +decision-making logic, which may reside on the client device (e.g., in a web browser) or on a cloud +server. + +## User-Facing Problem + +Modern web applications increasingly rely on AI, but running these models solely in the cloud can be +expensive and introduce latency. Conversely, running them on client devices is difficult because +developers cannot easily determine if a target device—given its specific CPU, GPU, and NPU +capabilities—can host a specific AI model without compromising performance or user privacy. + +### Goals + +- Standardize a mechanism for identifying device performance buckets for AI tasks. +- Enable efficient offloading of AI inference from cloud to client devices. +- Maintain high Quality of Service (QoS) during offloading. +- Protect user privacy by avoiding detailed hardware fingerprinting. +- Provide a future-proof abstraction that works across varying hardware (CPU, GPU, NPU). +- Define a protocol that works regardless of whether the decision logic resides in the App's cloud + backend or client frontend. + +### Non-goals + +- Defining the specific wire protocol for model transmission (this focuses on the + negotiation/estimation). +- Mandatory implementation of any specific inference engine. +- Solving all AI workload types in version 1 (e.g., extremely large LLMs with dynamic shapes). + +## User research + +[Placeholder for user research findings. Initial feedback from ISVs and web developers indicates a +strong need for predictable client-side AI performance metrics.] + +## Use Cases + +### Adaptive Video Conferencing Background Blur + +A video conferencing application wants to offload background blur processing to the user's laptop to +save server costs and improve privacy, but only if the device can maintain a stable 30fps. + +1. **Inquiry**: The application builds a weightless graph of its blur model and calls + `context.estimateQoS()`. +2. **Estimation**: The device evaluates its capability by integrating a wide range of local + intelligence: the AI stack software (including specialized drivers and runtimes), the specific + hardware accelerators, current system state (thermal state, battery level, power mode), and + environmental configurations that might affect performance. +3. **Decision**: + - If the `performanceTier` meets the application's requirements (e.g., "excellent", "good", or + "fair" for real-time video), the application logic decides to download the full weights, bind + them, and run locally. + - Otherwise (e.g., "slow", "very-slow", "poor"), it falls back to cloud-based processing. + +### Privacy-Preserving Photo Enhancement + +A photo editing web app wants to run complex enhancement filters using the user's mobile NPU to +reduce latency and maintain privacy. + +1. **Inquiry**: The application provides a weightless description of the enhancement model to + `context.estimateQoS()`, including specific target resolutions. +2. **Estimation**: The device evaluates its capability by considering the current hardware and + software environment, including AI stack optimizations, hardware accelerators (such as NPU), and + overall system state (e.g., battery level, power mode, thermal conditions). +3. **Decision**: The application enables the "High Quality" filter locally if the performance tier + meets the requirements. + +## Proposed Approach: Model-Centric Evaluation (Callee Responsible) + +The preferred approach is **Model-Centric**, where the device (the callee, i.e., the responder to +the AI request) is responsible for evaluating its own ability to handle the requested AI workload. +In this model, the **Application** (the caller) sends a **Model Description Inquiry**—a weightless +description of the AI model and input characteristics—to the device. The device, as the callee, uses +its local knowledge of hardware, current system state, software environment, and implementation +details to estimate the expected Quality of Service (QoS) for the given task. + +```mermaid +sequenceDiagram + participant App as App + participant Device as Device + participant Cloud as Cloud LLM + App->>Device: Weightless Model Description & Input Metadata + Note over Device: UA/AI Stack runs Local Estimation
(Internal: Static / Dry Run / Black Box) + Device-->>App: Return QoS Bucket (Performance Tier) + Note over App: App makes Decision
(Compare QoS vs Requirement) + alt App Decides: Offload + App->>Device: Bind Weights & Run Locally + else App Decides: Cloud + App->>Cloud: Execute on Cloud + end +``` + +This "callee responsible" design ensures that sensitive device details remain private, as only broad +performance tiers are reported back to the application. It also allows the device to make the most +accurate estimation possible, considering real-time factors like thermal state, background load, and +hardware-specific optimizations that are not visible to the caller (whether the caller logic is in +the cloud or on the client). By shifting responsibility for QoS evaluation to the callee, the +protocol achieves both privacy protection and more reliable offloading decisions. + +### Standardized Specification Requirements + +To enable consistent cross-vendor estimation, the protocol requires standardization of the following +inputs and outputs: + +1. **Weightless Model Description**: + - Based on the **WebNN Graph topology**. + - Includes **Lazy Bind Constants**: Placeholders for weights (via descriptors and labels) that + enable "weightless" graph construction and estimation without downloading large parameter + files. + - **Dynamic vs. Static Graph Expression**: This proposal currently uses the dynamic WebNN + `MLGraphBuilder` API to construct the weightless graph at runtime. An alternative approach is + to express the graph topology statically using a declarative format. The + [webnn-graph][ref-webnn-graph] project defines a WebNN-oriented graph DSL (`.webnn` format) + that separates the graph definition (structure only, no tensor data) from a weights manifest + and binary weights file. This static representation is human-readable, + diffable, and enables tooling such as ONNX-to-WebNN conversion and graph visualization. A + future version of DAOP could accept either a dynamically built `MLGraph` or a statically + defined `.webnn` graph description as input to `estimateQoS()`. +2. **Model Metadata (Optional)**: + - Information about the weights that can significantly impact performance, such as **sparsity** + or specific quantization schemes. +3. **Input Characterization**: + - The **shape** and **size** of the input data (e.g., image resolution, sequence length). +4. **QoS Output**: + - Unified **Performance Tiers** (e.g., "excellent", "good", "fair", "moderate", "slow", + "very-slow", "poor") to ensure hardware abstraction and prevent privacy-leaking through precise + latency metrics. + +### The `estimateQoS()` API + +We proposes a core API for performance negotiation: + +```webidl +dictionary MLQoSReport { + MLPerformanceTier performanceTier; +}; + +partial interface MLContext { + Promise estimateQoS(MLGraph graph, optional MLQoSOptions options); +}; + +dictionary MLQoSOptions { + // Input characteristics + record inputDescriptors; + + // Weights characteristics (Optional) + boolean weightsSparsity = false; +}; +``` + +### The "Weightless" Requirement and WebNN Spec Extensions + +To maximize the benefits of DAOP, the underlying WebNN specification should support a **weightless +build mode**. Currently, WebNN's `constant()` API typically requires an `ArrayBufferView`, which +implies the weights must already be present in memory. + +We propose that WebNN builders be extended to allow: + +1. **Weightless Constants**: Defining constants using only their descriptor (shape, type) and a + `label` for late-binding. +2. **Lazy / Explicit Binding**: Separating the graph topology definition from the binding of heavy + weight data. By using an explicit `bindConstants()` (or similar) method, we achieve **lazy + binding** where weights are only provided and processed after the offloading decision is made. + This design aligns with the proposal in + [webnn#901][ref-webnn-901], which addresses the same + fundamental problem from a memory-efficiency perspective. That proposal allows + `builder.constant()` to accept just an `MLOperandDescriptor` (shape and type, no + `ArrayBufferView`), producing a "hollow constant" handle. After `builder.build()`, weights are + streamed to device memory one at a time via `graph.setConstantData(constantOperand, dataBuffer)`, + reducing peak CPU memory from ~3× model size to ~1×. Our `bindConstants()` API could be + integrated with or replaced by this `setConstantData()` mechanism in a future version of the + spec, combining the benefits of weightless QoS estimation with memory-efficient weight loading. + +### Performance Tiers + +The `estimateQoS()` API returns a `performanceTier` string that represents the device's estimated +ability to execute the given graph. The tiers are designed to be broad enough to prevent hardware +fingerprinting while still enabling meaningful offloading decisions: + +| Tier | Indicative Latency | Interpretation | +| ------------- | ------------------ | -------------------------------------- | +| `"excellent"` | < 16 ms | Real-time (60 fps frame budget) | +| `"good"` | < 100 ms | Interactive responsiveness | +| `"fair"` | < 1 s | Responsive for non-real-time tasks | +| `"moderate"` | < 10 s | Tolerable for batch or one-shot tasks | +| `"slow"` | < 30 s | Noticeable wait | +| `"very-slow"` | < 60 s | Long wait | +| `"poor"` | ≥ 60 s | Likely unacceptable for most use cases | + +The exact tier boundaries are **implementation-defined** and may be adjusted. The key requirement is +that tiers remain coarse enough to avoid fingerprinting while fine enough for applications to make +useful offloading decisions. + +Applications choose their own acceptance threshold based on use-case requirements. For example, a +video conferencing blur might require "good" or better, while a one-shot photo enhancement might +accept "moderate". + +## Implementation Considerations (AI Stack Internals) + +The underlying system (e.g., User Agent or WebNN implementation) can use several strategies to +estimate performance for the weightless graph. **These strategies are internal implementation +details of the AI stack and are transparent to the application developer.** It is important to note +that these strategies are **not part of the DAOP specification or proposal**; they are discussed +here only to illustrate possible implementation choices and feasibility. Common techniques include: + +1. Static Cost Model: Analytical formulas (e.g., Roofline model) or lookup tables to predict + operator costs based on descriptors. +2. Dry Run: Fast simulation of the inference engine's execution path without heavy computation or + weights. +3. Black Box Profiling: Running the actual model topology using dummy/zero weights to measure + timing. + +For a concrete demonstration of these techniques, see the [daop-illustration](./daop-illustration) +project and its [implementation details](./daop-illustration/IMPLEMENTATION.md). It showcases a +**Static Cost Model** strategy that employs **log-log polynomial interpolation** of +measured operator latencies derived from per-operator micro-benchmarks. By fitting degree-1 +polynomials (power-law curves) to latency data across multiple tensor sizes in logarithmic space, +with a left-side clamp to handle small-size noise, the implementation +captures performance characteristics common in GPU-accelerated workloads. This +illustration uses a simplified approach for demonstration purposes; production implementations could +employ other strategies such as Roofline models, learned cost models, +hardware-specific operator libraries, or ML-based performance predictors. These internal metrics +(regression coefficients, estimated throughput) are **internal implementation +details** of the AI stack and are never exposed directly to the web application. + +To prevent hardware fingerprinting, the raw estimation results are normalized into broad +**Performance Tiers** before being returned to the web application. The application logic remains +decoupled from the hardware-specific details. + +### Example Code: Adaptive Background Blur + +The following example shows how an application might use the API to decide whether to offload. + +```js +// 1. Initialize WebNN context +const context = await navigator.ml.createContext({ deviceType: "npu" }); +const builder = new MLGraphBuilder(context); + +// 2. Build a WEIGHTLESS graph +const weights = builder.constant({ + shape: [3, 3, 64, 64], + dataType: "float32", + label: "modelWeights", // Identity for late-binding meta-data +}); + +const input = builder.input("input", { shape: [1, 3, 224, 224], dataType: "float32" }); +const output = builder.conv2d(input, weights); +const graph = builder.build(); + +// 3. DAOP Estimation: Providing input characteristics +const qos = await context.estimateQoS(graph, { + inputDescriptors: { + input: { shape: [1, 3, 720, 1280], dataType: "float32" }, + }, +}); + +// Check if the performance tier meets our requirements +const acceptable = ["excellent", "good", "fair", "moderate"]; +if (acceptable.includes(qos.performanceTier)) { + const realWeights = await fetch("model-weights.bin").then((r) => r.arrayBuffer()); + + // 4. Bind real data (using the label) explicitly. + await context.bindConstants(graph, { + modelWeights: realWeights, + }); + + // 5. Subsequent compute calls only need dynamic inputs + const results = await context.compute(graph, { + input: cameraFrame, + }); +} else { + runCloudInference(); +} +``` + +## Discussion: Potential API Enhancements + +We are considering several additions to the API to better support adaptive applications: + +### 1. Boolean Requirement API + +Instead of returning a bucket, the application could provide its specific requirements (e.g., +minimum FPS or maximum latency) and receiving a simple boolean "can meet requirement" response. + +```webidl +partial interface MLContext { + Promise meetsRequirement(MLGraph graph, MLPerformanceTier requiredTier, optional MLQoSOptions options); +}; +``` + +### 2. QoS Change Events + +AI performance can change dynamically due to thermal throttling, battery state, or background system +load. An event-driven mechanism would allow applications to react when the device's ability to meet +a specific QoS requirement changes. + +```webidl +interface MLQoSChangeEvent : Event { + readonly attribute boolean meetsRequirement; +}; + +// Application listens for changes in offload capability +const monitor = context.createQoSMonitor(graph, "excellent"); +monitor.onchange = (e) => { + if (!e.meetsRequirement) { + console.log("Performance dropped, switching back to cloud."); + switchToCloud(); + } else { + console.log("Performance restored, offloading to local."); + switchToLocal(); + } +}; +``` + +## Alternatives considered + +### Device-Centric Approach (Caller Responsible) + +In this alternative, the Application acts as the central intelligence. It collects raw hardware +specifications and telemetry from the device and makes the offloading decision. + +```mermaid +sequenceDiagram + participant App as App + participant Device as Device + participant Cloud as Cloud LLM + App->>Device: Request Device Description + Device-->>App: Return Spec (CPU, GPU, NPU, Mem, Microbenchmarks...) + Note over App: App estimates QoS
(Mapping H/W Spec -> AI Performance) + Note over App: App makes Decision
(Compare QoS vs Requirement) + alt App Decides: Offload + App->>Device: Execute locally + else App Decides: Cloud + App->>Cloud: Execute on Cloud + end +``` + +- **Process**: Device returns specific hardware details (CPU model, GPU frequency, NPU TOPs, + micro-benchmark results) -> Application estimates QoS -> Application decides to offload. +- **Why rejected**: + - **Privacy Risks**: Exposes detailed hardware fingerprints and potentially sensitive system + telemetry to remote servers. + - **Estimation Complexity**: It is extremely difficult for a remote server to accurately map raw + hardware specs to actual inference performance across a fragmented device ecosystem (ignoring + local drivers, thermal state, and OS-level optimizations). + - **Scalability**: Requires maintaining and constantly updating an impractical global database + mapping every possible device configuration to AI performance profiles. + +## Accessibility, Internationalization, Privacy, and Security Considerations + +### Privacy + +The Model-Centric approach significantly enhances privacy by: + +- Avoiding hardware fingerprinting. +- Returning broad **Performance Tiers** rather than exact hardware identifiers or precise latency + metrics. +- Enabling local processing of sensitive user data (like photos or video) that would otherwise need + to be sent to the cloud. + +### Security + +- Weightless model descriptions should be validated to prevent malicious topologies from causing + resource exhaustion (DoS) during the estimation phase. + +## Stakeholder Feedback / Opposition + +- [Implementors/ISVs]: Initial interests from several ISVs, to be documented. + +## References & acknowledgements + +Many thanks for valuable feedback and advice from the contributors to the WebNN and Web Machine +Learning Working Group. + +[ref-webnn-graph]: https://github.com/rustnn/webnn-graph +[ref-webnn-901]: https://github.com/webmachinelearning/webnn/issues/901 diff --git a/daop-illustration/IMPLEMENTATION.md b/daop-illustration/IMPLEMENTATION.md new file mode 100644 index 0000000..5169397 --- /dev/null +++ b/daop-illustration/IMPLEMENTATION.md @@ -0,0 +1,122 @@ +# DAOP Illustration: Reference Implementation + +> **Note on Illustration Purposes**: This implementation is provided for **illustration purposes** to demonstrate the feasibility of the `estimateQoS()` API. It uses a simplified log-log polynomial interpolation approach. A production implementation could employ other strategies — such as Roofline models, learned cost models, hardware-specific operator libraries, or ML-based performance predictors — depending on the target platform and accuracy requirements. + +## 1. Overview +This document describes the implementation strategy for the `estimateQoS()` API in the DAOP (Dynamic AI Offloading Protocol) illustration. The estimation strategy uses **log-log polynomial interpolation** based on operator-level micro-benchmarks. + +The internals of these estimations are entirely opaque to the application. The application receives only a high-level performance tier (e.g., "excellent" or "fair"), allowing for hardware-agnostic offloading decisions without exposing raw timing data or device-specific characteristics. + +## 2. Performance Tiers +The implementation categorizes the estimated graph latency into one of seven performance tiers. These tiers correspond to typical user experience expectations: + +| Tier | Latency Threshold | Description | +|------|-------------------|-------------| +| excellent | < 16ms | Real-time (60fps) performance | +| good | < 100ms | Interactive / seamless UI | +| fair | < 1s | Responsive but noticeable | +| moderate | < 10s | Tolerable for background tasks | +| slow | < 30s | Significant wait time | +| very-slow | < 60s | Approaching timeout limits | +| poor | ≥ 60s | Unacceptable performance | + +## 3. Estimation Strategy: Log-Log Polynomial Interpolation +Empirical observations show that operator execution time on modern hardware often follows a power-law relationship with the total number of processed elements: +`time ≈ a · (totalElements)^b` + +By taking the logarithm of both sides, this relationship becomes linear in log-log space: +`log(time) = log(a) + b · log(totalElements)` + +This implementation fits a **degree-1 polynomial** (linear) in log-log space: +`log(time) = c0 + c1 · log(n)` + +The coefficients (`c0`, `c1`) are found using least-squares normal equations, solved via **Gaussian elimination with partial pivoting**. + +#### Small-Size Noise Handling (Clamp) + +At small input sizes, GPU dispatch overhead can dominate actual computation time, producing a U-shaped curve in log-log space — small inputs appear slower than medium ones. Left unchecked, a polynomial fit on this data extrapolates catastrophically for very small inputs. + +To address this, the fitter applies a **left-side clamp**: +1. Find the measured point with the **minimum medianMs** (the "valley" of the U). +2. **Clamp** all points to the left of it to that minimum value. +3. **Fit** the degree-1 polynomial using only points from the minimum onward — clamped points are excluded from the fit. +4. At **prediction time**, any input size at or below the minimum-point's size returns the flat clamp value instead of polynomial extrapolation. + +This ensures monotonic (non-decreasing) predictions: small inputs never produce absurdly high time estimates. + +**Prediction Process:** +1. Calculate the natural log of the input element count: `ln_n = log(totalElements)`. +2. If `ln_n` is at or below the clamp boundary, return the clamped floor value directly. +3. Otherwise, evaluate the fitted polynomial: `ln_time = polyEval(coeffs, ln_n)`. +4. Revert to time domain: `estimatedTime = exp(ln_time)`. + +If a polynomial has not yet been fitted for an operator (e.g., during the first calibration run), the system falls back to piecewise linear interpolation on the raw benchmarked data points. + +## 4. Shape-Aware Micro-Benchmarks +To populate the estimation models, the implementation runs a suite of micro-benchmarks for supported operators across six size variants. + +### Size Variants +Benchmarking across multiple sizes captures the "utilization curve" where small tensors may not fully saturate compute units. + +| Variant | Representative Shape | Total Elements | +|---------|----------------------|----------------| +| xs | [1, 16, 16, 8] | 2,048 | +| small | [1, 32, 32, 16] | 16,384 | +| medium | [1, 64, 64, 24] | 98,304 | +| large | [1, 128, 128, 32] | 524,288 | +| xl | [1, 256, 256, 32] | 2,097,152 | +| xxl | [1, 512, 512, 32] | 8,388,608 | + +### Benchmark Methodology +The system benchmarks 7 operator types: `conv2d`, `add`, `mul`, `relu`, `sigmoid`, `clamp`, and `averagePool2d`. + +1. **Baseline Overhead Subtraction**: Before benchmarking real operators, the runner measures the dispatch + readTensor overhead using a trivial (reshape) graph at a small fixed size. To improve accuracy, this measurement is repeated across 3 independent rounds (each with 50 batched iterations), and the median-of-medians is used. This baseline is subtracted from each operator's measured time to isolate pure compute cost. +2. **Amortized Readback**: The runner dispatches 10 operations (batchSize=10) before a single `readTensor` call, further reducing per-dispatch synchronization overhead. +3. **Iterations**: Each benchmark performs 5 warmup runs followed by 30 timed iterations to find the median latency. +4. **Storage**: Raw data points `{ totalElements, medianMs }` are stored in the `TimeModelDatabase`. + +## 5. End-to-End Estimation Flow + +### Benchmark Phase (Offline/Calibration) +1. Measure baseline dispatch + readTensor overhead using a trivial graph. +2. Iterate through supported operator types and size variants. +3. Execute benchmarks, subtract baseline overhead, and record median latencies. +4. Store results in `TimeModelDatabase` and fit log-log polynomials. +5. Persist models to `localStorage` under the key `"daop_time_models"`. + +### Estimation Phase (Online) +1. **Traverse Graph**: Walk the IR of the weightless graph. +2. **Sum Node Latencies**: For each node, look up the operator in `TimeModelDatabase`. + - Call `predict(opType, inputElements)` to get the estimated time. +3. **Add Overhead**: Add a graph dispatch overhead: `0.5 + numNodes * 0.005 ms`. +4. **Assign Tier**: Map the total estimated latency to a performance tier string. + +## 6. Project Structure + +``` +daop-illustration/ + src/ + index.js # Public API entry point + polyfill.js # WebNN feature detection and DAOP initialization + daop-context.js # Wraps native MLContext; delegates to interpolation estimator + daop-graph-builder.js # IR graph builder supporting weightless constants + daop-graph.js # IR graph representation and Mermaid export + ir/ + graph-ir.js # Core IR definitions (TensorDesc, IRNode) + shape-inference.js # Shape inference logic for operators + qos/ + estimate-qos-interp.js # Interpolation-based QoS estimator + interpolation/ + poly-fit.js # Polynomial fitting (Normal Equations, Gaussian) + time-model.js # TimeModelDatabase (stores points, fits, predicts) + microbench/ + bench-runner.js # Hardware-specific benchmark execution engine + op-benchmarks.js # Operator configurations (xs, small, medium, large, xl, xxl) + examples/ + background-blur/ + background-blur-demo.html # Interactive two-column demo page + selfie-model.js # Model graph definition + weight loader + blur-renderer.js # Image processing + blur compositing + meeting.jpg # Sample input image +``` + diff --git a/daop-illustration/LICENSE b/daop-illustration/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/daop-illustration/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/daop-illustration/README.md b/daop-illustration/README.md new file mode 100644 index 0000000..3c9ae3f --- /dev/null +++ b/daop-illustration/README.md @@ -0,0 +1,78 @@ +# DAOP Illustration + +A working JavaScript illustration of the +[Dynamic AI Offloading Protocol (DAOP)](../explainer.md) using the WebNN API. + +This project demonstrates how the proposed `estimateQoS()` API can work in practice: +an application builds a **weightless** computation graph, estimates performance via +operator micro-benchmarks and polynomial interpolation, decides whether to run locally +or offload to the cloud, and — if local — downloads weights and executes inference. + +## Prerequisites + +- A WebNN-capable browser (e.g., Chrome Canary with WebNN flags enabled). + See [installation guide](https://webnn.io/en/learn/get-started/installation). +- Node.js (for the static file server). + +## Running the Demo + +```bash +npm install +npm start # starts http://localhost:8080 +``` + +Open `http://localhost:8080/examples/background-blur/background-blur-demo.html` in the WebNN-capable +browser. + +### Background Blur Demo + +The demo applies AI-powered background blur to a meeting photo using the MediaPipe Selfie +Segmentation model. The workflow follows the DAOP protocol: + +1. **Build weightless graph** — the model topology is recorded without downloading weights. +2. **Estimate QoS** — per-operator micro-benchmarks and interpolation produce a performance tier. +3. **Offloading decision** — if the tier is acceptable (< 10 s), run locally; otherwise + offload to cloud. +4. **Execute** — download weights, compile the native WebNN graph, run inference, apply blur. + +The right column of the demo exposes DAOP internals: operator benchmarks, estimation +curves, computation graph visualization, and a timing comparison between estimated and +actual latency. + +## Implementation Details + +See [IMPLEMENTATION.md](./IMPLEMENTATION.md) for a detailed description of the estimation +strategy, including shape-aware micro-benchmarks, polynomial interpolation, and the 7-tier +performance classification. + +## Project Structure + +``` +src/ # DAOP library (reusable) + index.js # Public API + polyfill.js # WebNN detection + DAOP initialization + daop-context.js # Wraps native MLContext with estimateQoS / bindConstants / compute + daop-graph-builder.js # IR graph builder (weightless constants) + daop-graph.js # IR graph + Mermaid visualization + ir/ + graph-ir.js # TensorDesc, IROperand, IRNode, IRGraph + shape-inference.js # Shape inference for conv2d, pool, resample, etc. + qos/ + estimate-qos-interp.js # Interpolation-based QoS estimation + tier assignment + interpolation/ + poly-fit.js # Polynomial fitting (Normal Equations, Gaussian) + time-model.js # TimeModelDatabase (stores points, fits, predicts) + microbench/ + bench-runner.js # Multi-size benchmark runner + op-benchmarks.js # Per-op benchmark configurations (xs–xxl) +examples/ + background-blur/ # Background blur demo (self-contained) + background-blur-demo.html # Interactive two-column demo page + selfie-model.js # Model graph definition + weight loader + blur-renderer.js # Image processing + blur compositing + meeting.jpg # Sample input image +``` + +## License + +Apache 2.0 diff --git a/daop-illustration/demo-server.js b/daop-illustration/demo-server.js new file mode 100644 index 0000000..34f57bc --- /dev/null +++ b/daop-illustration/demo-server.js @@ -0,0 +1,63 @@ +import http from "http"; +import fs from "fs"; +import path from "path"; +import { fileURLToPath } from "url"; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); + +const PORT = 8080; + +const MIME_TYPES = { + ".html": "text/html", + ".js": "text/javascript", + ".css": "text/css", + ".json": "application/json", + ".png": "image/png", + ".jpg": "image/jpg", + ".jpeg": "image/jpeg", + ".gif": "image/gif", + ".svg": "image/svg+xml", + ".webp": "image/webp", + ".wasm": "application/wasm", +}; + +const server = http.createServer((req, res) => { + console.log(`${req.method} ${req.url}`); + + // Default to the background blur demo + let filePath = req.url === "/" ? "/examples/background-blur/background-blur-demo.html" : req.url; + + // Remove query strings or hashes if present + filePath = filePath.split("?")[0].split("#")[0]; + + // Ensure we don't try to access files outside the directory + // Remove leading slash for path.join to behave consistently + const safePath = path.normalize(filePath).replace(/^[\/\\]+/, ""); + let fullPath = path.join(__dirname, safePath); + + console.log(`Serving: ${fullPath}`); + + const extname = path.extname(fullPath); + let contentType = MIME_TYPES[extname] || "application/octet-stream"; + + fs.readFile(fullPath, (error, content) => { + if (error) { + if (error.code === "ENOENT") { + res.writeHead(404); + res.end("File not found"); + } else { + res.writeHead(500); + res.end(`Server error: ${error.code}`); + } + } else { + res.writeHead(200, { "Content-Type": contentType }); + res.end(content, "utf-8"); + } + }); +}); + +server.listen(PORT, () => { + console.log(`Server running at http://localhost:${PORT}/`); + console.log(`Demo page: http://localhost:${PORT}/examples/background-blur/background-blur-demo.html`); +}); diff --git a/daop-illustration/examples/background-blur/background-blur-demo.html b/daop-illustration/examples/background-blur/background-blur-demo.html new file mode 100644 index 0000000..06497a4 --- /dev/null +++ b/daop-illustration/examples/background-blur/background-blur-demo.html @@ -0,0 +1,641 @@ + + + + + + DAOP: Background Blur Demo + + + + + + + + + + + + +
+ +
+ +
+
+

Web Application

+
+
+

Background Blur with WebNN

+

This demo applies AI-powered background blur using the WebNN API. The application only sees a high-level performance tier — all estimation internals are opaque.

+ + + +
+ +
+ +
+
+
+ + +
+
+

Under the Hood — DAOP Internals

+

One possible implementation strategy (for illustration only)

+
+ + +
+

⚙️ Operator Micro-benchmarks

+
+ + + +
+
+ + +
+ + + + + + + + + + + + + +
+
+ + + + \ No newline at end of file diff --git a/daop-illustration/examples/background-blur/blur-renderer.js b/daop-illustration/examples/background-blur/blur-renderer.js new file mode 100644 index 0000000..ba57d4e --- /dev/null +++ b/daop-illustration/examples/background-blur/blur-renderer.js @@ -0,0 +1,151 @@ +// src/demo/background-blur/blur-renderer.js + +/** + * BlurRenderer — image processing for background blur demo. + * + * Handles: image loading → resize → model input prep → mask application → display + */ +export class BlurRenderer { + /** + * @param {HTMLCanvasElement} canvas - Display canvas + */ + constructor(canvas) { + this.canvas = canvas; + this.ctx = canvas.getContext("2d"); + this._originalImage = null; + } + + /** + * Load an image from URL into the canvas. + */ + async loadImage(url) { + return new Promise((resolve, reject) => { + const img = new Image(); + img.crossOrigin = "anonymous"; + img.onload = () => { + this._originalImage = img; + this.canvas.width = img.width; + this.canvas.height = img.height; + this.ctx.drawImage(img, 0, 0); + resolve(img); + }; + img.onerror = reject; + img.src = url; + }); + } + + /** + * Get the current image as a Float32Array suitable for model input. + * Resizes to modelWidth × modelHeight and normalizes to [0, 1]. + * + * @param {number} modelWidth + * @param {number} modelHeight + * @param {string} layout - "nchw" or "nhwc" + * @returns {Float32Array} + */ + getModelInput(modelWidth, modelHeight, layout = "nchw") { + if (!this._originalImage) throw new Error("No image loaded"); + + // Create offscreen canvas for resize + const offscreen = document.createElement("canvas"); + offscreen.width = modelWidth; + offscreen.height = modelHeight; + const offCtx = offscreen.getContext("2d"); + offCtx.drawImage(this._originalImage, 0, 0, modelWidth, modelHeight); + + const imageData = offCtx.getImageData(0, 0, modelWidth, modelHeight); + const { data } = imageData; // RGBA uint8 + + const size = modelWidth * modelHeight; + const float32 = new Float32Array(1 * 3 * size); + + if (layout === "nchw") { + // [1, 3, H, W] + for (let i = 0; i < size; i++) { + float32[i] = data[i * 4] / 255.0; // R + float32[size + i] = data[i * 4 + 1] / 255.0; // G + float32[2 * size + i] = data[i * 4 + 2] / 255.0; // B + } + } else { + // [1, H, W, 3] + for (let i = 0; i < size; i++) { + float32[i * 3] = data[i * 4] / 255.0; + float32[i * 3 + 1] = data[i * 4 + 1] / 255.0; + float32[i * 3 + 2] = data[i * 4 + 2] / 255.0; + } + } + + return float32; + } + + /** + * Apply segmentation mask to blur the background. + * + * @param {Float32Array} mask - 256x256 segmentation mask (0=bg, 1=fg) + * @param {number} blurRadius - CSS blur radius in px + */ + applyBlur(mask, blurRadius = 15) { + if (!this._originalImage) throw new Error("No image loaded"); + + const { width, height } = this.canvas; + + // 1. Draw blurred version + const blurCanvas = document.createElement("canvas"); + blurCanvas.width = width; + blurCanvas.height = height; + const blurCtx = blurCanvas.getContext("2d"); + blurCtx.filter = `blur(${blurRadius}px)`; + blurCtx.drawImage(this._originalImage, 0, 0, width, height); + + // 2. Scale mask to original image size + const maskCanvas = document.createElement("canvas"); + maskCanvas.width = 256; + maskCanvas.height = 256; + const maskCtx = maskCanvas.getContext("2d"); + const maskImageData = maskCtx.createImageData(256, 256); + + for (let i = 0; i < 256 * 256; i++) { + const val = Math.round(mask[i] * 255); + maskImageData.data[i * 4] = val; + maskImageData.data[i * 4 + 1] = val; + maskImageData.data[i * 4 + 2] = val; + maskImageData.data[i * 4 + 3] = 255; + } + maskCtx.putImageData(maskImageData, 0, 0); + + // Scale mask to image size + const scaledMaskCanvas = document.createElement("canvas"); + scaledMaskCanvas.width = width; + scaledMaskCanvas.height = height; + const scaledMaskCtx = scaledMaskCanvas.getContext("2d"); + scaledMaskCtx.drawImage(maskCanvas, 0, 0, width, height); + + // 3. Composite: foreground (original) where mask=1, background (blurred) where mask=0 + this.ctx.drawImage(blurCanvas, 0, 0); // Start with blurred + + // Use mask as alpha for original image + const origCanvas = document.createElement("canvas"); + origCanvas.width = width; + origCanvas.height = height; + const origCtx = origCanvas.getContext("2d"); + origCtx.drawImage(this._originalImage, 0, 0, width, height); + + const origData = origCtx.getImageData(0, 0, width, height); + const scaledMask = scaledMaskCtx.getImageData(0, 0, width, height); + + // Apply mask alpha + for (let i = 0; i < origData.data.length; i += 4) { + origData.data[i + 3] = scaledMask.data[i]; // Use R channel of mask as alpha + } + origCtx.putImageData(origData, 0, 0); + + this.ctx.drawImage(origCanvas, 0, 0); + } + + /** Reset to original image */ + reset() { + if (this._originalImage) { + this.ctx.drawImage(this._originalImage, 0, 0); + } + } +} diff --git a/daop-illustration/examples/background-blur/meeting.jpg b/daop-illustration/examples/background-blur/meeting.jpg new file mode 100644 index 0000000..1627b81 Binary files /dev/null and b/daop-illustration/examples/background-blur/meeting.jpg differ diff --git a/daop-illustration/examples/background-blur/selfie-model.js b/daop-illustration/examples/background-blur/selfie-model.js new file mode 100644 index 0000000..b3ef6c3 --- /dev/null +++ b/daop-illustration/examples/background-blur/selfie-model.js @@ -0,0 +1,349 @@ +// src/demo/background-blur/selfie-model.js +// +// MediaPipe Selfie Segmentation (General) — Model Graph Builder +// Input: [1, 256, 256, 3] NHWC float32 → Output: [1, 256, 256, 1] segmentation mask +// +// Architecture faithfully mirrors webmachinelearning/webnn-samples reference: +// github.com/webmachinelearning/webnn-samples/blob/master/ +// selfie_segmentation/selfie_segmentation_general.js + +const WEIGHTS_BASE_URL = + "https://webmachinelearning.github.io/test-data/models/selfie_segmentation/general"; + +/** + * Weight and bias tensor shapes from the official model metadata. + * + * Standard convs use filterLayout "ohwi": [outCh, H, W, inCh] + * Depthwise convs use filterLayout "ihwo": [inCh/groups, H, W, outCh] + * Bias is always 1D: [outChannels] + */ +const WEIGHT_SHAPES = { + conv0: { weight: [16, 3, 3, 3], bias: [16] }, + conv1: { weight: [16, 1, 1, 16], bias: [16] }, + conv2: { weight: [1, 3, 3, 16], bias: [16] }, // depthwise + conv3: { weight: [8, 1, 1, 16], bias: [8] }, + conv4: { weight: [16, 1, 1, 8], bias: [16] }, + conv5: { weight: [16, 1, 1, 16], bias: [16] }, + conv6: { weight: [72, 1, 1, 16], bias: [72] }, + conv7: { weight: [1, 3, 3, 72], bias: [72] }, // depthwise + conv8: { weight: [24, 1, 1, 72], bias: [24] }, + conv9: { weight: [88, 1, 1, 24], bias: [88] }, + conv10: { weight: [1, 3, 3, 88], bias: [88] }, // depthwise + conv11: { weight: [24, 1, 1, 88], bias: [24] }, + conv12: { weight: [96, 1, 1, 24], bias: [96] }, + conv13: { weight: [1, 5, 5, 96], bias: [96] }, // depthwise + conv14: { weight: [24, 1, 1, 96], bias: [24] }, + conv15: { weight: [96, 1, 1, 24], bias: [96] }, + conv16: { weight: [32, 1, 1, 96], bias: [32] }, + conv17: { weight: [128, 1, 1, 32], bias: [128] }, + conv18: { weight: [1, 5, 5, 128], bias: [128] }, // depthwise + conv19: { weight: [32, 1, 1, 128], bias: [32] }, + conv20: { weight: [128, 1, 1, 32], bias: [128] }, + conv21: { weight: [32, 1, 1, 128], bias: [32] }, + conv22: { weight: [128, 1, 1, 32], bias: [128] }, + conv23: { weight: [1, 5, 5, 128], bias: [128] }, // depthwise + conv24: { weight: [32, 1, 1, 128], bias: [32] }, + conv25: { weight: [128, 1, 1, 32], bias: [128] }, + conv26: { weight: [32, 1, 1, 128], bias: [32] }, + conv27: { weight: [96, 1, 1, 32], bias: [96] }, + conv28: { weight: [1, 5, 5, 96], bias: [96] }, // depthwise + conv29: { weight: [24, 1, 1, 96], bias: [24] }, + conv30: { weight: [96, 1, 1, 24], bias: [96] }, + conv31: { weight: [32, 1, 1, 96], bias: [32] }, + conv32: { weight: [96, 1, 1, 32], bias: [96] }, + conv33: { weight: [1, 5, 5, 96], bias: [96] }, // depthwise + conv34: { weight: [24, 1, 1, 96], bias: [24] }, + conv35: { weight: [96, 1, 1, 24], bias: [96] }, + conv36: { weight: [32, 1, 1, 96], bias: [32] }, + conv37: { weight: [128, 1, 1, 32], bias: [128] }, + conv38: { weight: [128, 1, 1, 32], bias: [128] }, + conv39: { weight: [24, 1, 1, 128], bias: [24] }, + conv40: { weight: [24, 1, 1, 24], bias: [24] }, + conv41: { weight: [24, 1, 1, 24], bias: [24] }, + conv42: { weight: [24, 1, 1, 24], bias: [24] }, + conv43: { weight: [1, 3, 3, 24], bias: [24] }, // depthwise + conv44: { weight: [16, 1, 1, 24], bias: [16] }, + conv45: { weight: [16, 1, 1, 16], bias: [16] }, + conv46: { weight: [16, 1, 1, 16], bias: [16] }, + conv47: { weight: [16, 1, 1, 16], bias: [16] }, + conv48: { weight: [1, 3, 3, 16], bias: [16] }, // depthwise + conv49: { weight: [16, 1, 1, 16], bias: [16] }, + conv50: { weight: [16, 1, 1, 16], bias: [16] }, + conv51: { weight: [16, 1, 1, 16], bias: [16] }, + conv52: { weight: [16, 1, 1, 16], bias: [16] }, + conv53: { weight: [1, 3, 3, 16], bias: [16] }, // depthwise + convTranspose0: { weight: [1, 2, 2, 16], bias: null }, +}; + +/** + * Depthwise convolutions (groups == inputChannels) use filterLayout "ihwo"; + * standard convolutions use "ohwi". + */ +function conv(builder, input, index, activation, options = {}) { + const shapes = WEIGHT_SHAPES[`conv${index}`]; + const weight = builder.constant({ + shape: shapes.weight, dataType: "float32", label: `conv${index}_weight`, + }); + const bias = builder.constant({ + shape: shapes.bias, dataType: "float32", label: `conv${index}_bias`, + }); + + const isDepthwise = options.groups > 1; + const convOut = builder.conv2d(input, weight, { + ...options, + bias, + inputLayout: "nhwc", + filterLayout: isDepthwise ? "ihwo" : "ohwi", + }); + + if (activation === "relu") return builder.relu(convOut); + if (activation === "sigmoid") return builder.sigmoid(convOut); + return convOut; +} + +/** + * SubGraphA — hardswish-like activation fused with convolution: + * out = conv(input) * clamp(conv(input) + 3, 0, 6) * (1/6) + */ +function subGraphA(builder, input, convIndex, addB, mulA, convOptions = {}) { + const c = conv(builder, input, convIndex, "", convOptions); + const added = builder.add(c, addB); + const clamped = builder.clamp(added, { minValue: 0, maxValue: 6 }); + const scaled = builder.mul(mulA, clamped); + return builder.mul(c, scaled); +} + +/** + * SubGraphB — SE-like attention block: + * avgPool → conv(relu) → conv(sigmoid) → mul(mulTarget || input) + */ +function subGraphB(builder, input, convIndex, poolStride, mulTarget) { + const strides = [poolStride, poolStride]; + const pooled = builder.averagePool2d(input, { + windowDimensions: strides, + strides, + layout: "nhwc", + }); + const reduced = conv(builder, pooled, convIndex, "relu"); + const gate = conv(builder, reduced, convIndex + 1, "sigmoid"); + return builder.mul(mulTarget || input, gate); +} + +/** + * Build the full Selfie Segmentation General graph. + * + * Weight constants include correct tensor shapes (from the official model + * metadata) but no buffer data — they are "weightless". This enables shape + * inference and QoS estimation before any weight download. Actual weight + * buffers are attached later via graph.bindConstants(). + * + * @param {MLGraphBuilder} builder - DAOP or native graph builder + * @returns {{ graph: Object, weightMeta: null }} + */ +export function buildSelfieSegmentationGraph(builder) { + const input = builder.input("input", { dataType: "float32", shape: [1, 256, 256, 3] }); + + const addB = builder.constant( + { shape: [1, 1, 1, 1], dataType: "float32", label: null }, + new Float32Array([3]), + ); + const mulA = builder.constantScalar("float32", 0.1666666716337204); + + // ── Encoder ────────────────────────────────────────────── + + const sgA0 = subGraphA(builder, input, 0, addB, mulA, { + strides: [2, 2], + padding: [0, 1, 0, 1], + }); + + const c1 = conv(builder, sgA0, 1, "relu"); + const c2 = conv(builder, c1, 2, "relu", { + strides: [2, 2], + padding: [0, 1, 0, 1], + groups: 16, + }); + + const sgB0 = subGraphB(builder, c2, 3, 64); + + const c5 = conv(builder, sgB0, 5, ""); + const c6 = conv(builder, c5, 6, "relu"); + const c7 = conv(builder, c6, 7, "relu", { + strides: [2, 2], + padding: [0, 1, 0, 1], + groups: 72, + }); + const c8 = conv(builder, c7, 8, ""); + + const c9 = conv(builder, c8, 9, "relu"); + const c10 = conv(builder, c9, 10, "relu", { + padding: [1, 1, 1, 1], + groups: 88, + }); + const c11 = conv(builder, c10, 11, ""); + const add0 = builder.add(c11, c8); + + const sgA1 = subGraphA(builder, add0, 12, addB, mulA); + const sgA2 = subGraphA(builder, sgA1, 13, addB, mulA, { + strides: [2, 2], + padding: [1, 2, 1, 2], + groups: 96, + }); + const sgB1 = subGraphB(builder, sgA2, 14, 16); + const c16 = conv(builder, sgB1, 16, ""); + + const sgA3 = subGraphA(builder, c16, 17, addB, mulA); + const sgA4 = subGraphA(builder, sgA3, 18, addB, mulA, { + padding: [2, 2, 2, 2], + groups: 128, + }); + const sgB2 = subGraphB(builder, sgA4, 19, 16); + const c21 = conv(builder, sgB2, 21, ""); + const add1 = builder.add(c21, c16); + + const sgA5 = subGraphA(builder, add1, 22, addB, mulA); + const sgA6 = subGraphA(builder, sgA5, 23, addB, mulA, { + padding: [2, 2, 2, 2], + groups: 128, + }); + const sgB3 = subGraphB(builder, sgA6, 24, 16); + const c26 = conv(builder, sgB3, 26, ""); + const add2 = builder.add(c26, add1); + + const sgA7 = subGraphA(builder, add2, 27, addB, mulA); + const sgA8 = subGraphA(builder, sgA7, 28, addB, mulA, { + padding: [2, 2, 2, 2], + groups: 96, + }); + const sgB4 = subGraphB(builder, sgA8, 29, 16); + const c31 = conv(builder, sgB4, 31, ""); + const add3 = builder.add(c31, add2); + + const sgA9 = subGraphA(builder, add3, 32, addB, mulA); + const sgA10 = subGraphA(builder, sgA9, 33, addB, mulA, { + padding: [2, 2, 2, 2], + groups: 96, + }); + const sgB5 = subGraphB(builder, sgA10, 34, 16); + const c36 = conv(builder, sgB5, 36, ""); + const add4 = builder.add(c36, add3); + + // ── Decoder ────────────────────────────────────────────── + + const c37 = conv(builder, add4, 37, "relu"); + const avgPool0 = builder.averagePool2d(add4, { + windowDimensions: [16, 16], + strides: [16, 16], + layout: "nhwc", + }); + const c38 = conv(builder, avgPool0, 38, "sigmoid"); + const mul0 = builder.mul(c37, c38); + + const resample0 = builder.resample2d(mul0, { + sizes: [32, 32], + mode: "linear", + axes: [1, 2], + }); + const c39 = conv(builder, resample0, 39, ""); + const add5 = builder.add(c39, add0); + + const sgB6 = subGraphB(builder, add5, 40, 32, add0); + const add6 = builder.add(sgB6, c39); + + const c42 = conv(builder, add6, 42, "relu"); + const c43 = conv(builder, c42, 43, "relu", { + padding: [1, 1, 1, 1], + groups: 24, + }); + const add7 = builder.add(c42, c43); + + const resample1 = builder.resample2d(add7, { + sizes: [64, 64], + mode: "linear", + axes: [1, 2], + }); + const c44 = conv(builder, resample1, 44, ""); + const add8 = builder.add(c5, c44); + + const sgB7 = subGraphB(builder, add8, 45, 64, c5); + const add9 = builder.add(sgB7, c44); + + const c47 = conv(builder, add9, 47, "relu"); + const c48 = conv(builder, c47, 48, "relu", { + padding: [1, 1, 1, 1], + groups: 16, + }); + const add10 = builder.add(c47, c48); + + const resample2 = builder.resample2d(add10, { + sizes: [128, 128], + mode: "linear", + axes: [1, 2], + }); + const c49 = conv(builder, resample2, 49, ""); + const add11 = builder.add(sgA0, c49); + + const sgB8 = subGraphB(builder, add11, 50, 128, sgA0); + const add12 = builder.add(sgB8, c49); + + const c52 = conv(builder, add12, 52, "relu"); + const c53 = conv(builder, c52, 53, "relu", { + padding: [1, 1, 1, 1], + groups: 16, + }); + const add13 = builder.add(c52, c53); + + // ── Final: convTranspose2d 128→256 + sigmoid ─────────── + + const convTransposeWeight = builder.constant({ + shape: [1, 2, 2, 16], dataType: "float32", label: "convTranspose0_weight", + }); + const convTransposeBias = builder.constant( + { shape: [1], dataType: "float32", label: null }, + new Float32Array([0.53271484375]), + ); + const convTranspose = builder.convTranspose2d(add13, convTransposeWeight, { + bias: convTransposeBias, + padding: [0, 0, 0, 0], + strides: [2, 2], + outputSizes: [256, 256], + filterLayout: "ohwi", + inputLayout: "nhwc", + }); + + const output = builder.sigmoid(convTranspose); + + const graph = builder.build({ output }); + return { graph, weightMeta: null }; +} + +/** + * @param {Function} [onProgress] - ({ label, phase }) callback + * @returns {Promise>} + */ +export async function loadWeights(onProgress) { + const [weightsInfo, biasesInfo] = await Promise.all([ + fetch(`${WEIGHTS_BASE_URL}/weights_nhwc.json`).then(r => r.json()), + fetch(`${WEIGHTS_BASE_URL}/biases.json`).then(r => r.json()), + ]); + + const [weightsBin, biasesBin] = await Promise.all([ + fetch(`${WEIGHTS_BASE_URL}/weights_nhwc.bin`).then(r => r.arrayBuffer()), + fetch(`${WEIGHTS_BASE_URL}/biases.bin`).then(r => r.arrayBuffer()), + ]); + + const weights = {}; + + for (const [name, meta] of Object.entries(weightsInfo)) { + const label = `${name}_weight`; + weights[label] = new Float32Array(weightsBin, meta.dataOffset, meta.byteLength / 4); + if (onProgress) onProgress({ label, phase: "weight" }); + } + + for (const [name, meta] of Object.entries(biasesInfo)) { + const label = `${name}_bias`; + weights[label] = new Float32Array(biasesBin, meta.dataOffset, meta.byteLength / 4); + if (onProgress) onProgress({ label, phase: "bias" }); + } + + return weights; +} diff --git a/daop-illustration/package.json b/daop-illustration/package.json new file mode 100644 index 0000000..e6b519c --- /dev/null +++ b/daop-illustration/package.json @@ -0,0 +1,14 @@ +{ + "name": "daop-illustration", + "version": "1.0.0", + "description": "DAOP (Dynamic AI Offloading Protocol) illustration using WebNN", + "main": "src/daop/index.js", + "type": "module", + "scripts": { + "start": "node demo-server.js", + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [], + "author": "", + "license": "ISC" +} diff --git a/daop-illustration/src/daop-context.js b/daop-illustration/src/daop-context.js new file mode 100644 index 0000000..a4d8f73 --- /dev/null +++ b/daop-illustration/src/daop-context.js @@ -0,0 +1,131 @@ +// src/daop-context.js + +import { estimateQoSInterp } from "./qos/estimate-qos-interp.js"; +import { DAOPGraphBuilder } from "./daop-graph-builder.js"; + +/** + * DAOPContext — wraps native MLContext with DAOP extensions. + * + * Provides the standard MLContext interface plus: + * - estimateQoS(graph, options) — QoS estimation on weightless graph + * - bindConstants(graph, constants) — late-bind weight buffers + * - compute(graph, inputs) — replay IR → native WebNN → execute + */ +export class DAOPContext { + /** + * @param {MLContext} nativeContext - The real browser-provided MLContext + * @param {Object} options - Creation options (deviceType, etc.) + */ + constructor(nativeContext, options = {}) { + this._native = nativeContext; + this.deviceType = options.deviceType || "gpu"; + } + + /** Access the underlying native MLContext */ + get nativeContext() { + return this._native; + } + + /** + * DAOP Extension: Estimate QoS for a weightless graph. + * + * @param {import("./daop-graph.js").DAOPGraph} graph + * @param {Object} [options={}] + * @returns {Object} QoS report with performanceTier + */ + async estimateQoS(graph, options = {}) { + return estimateQoSInterp(graph, options); + } + + /** + * DAOP Extension: Bind constants to a weightless graph. + * + * @param {import("./daop-graph.js").DAOPGraph} graph + * @param {Object} constants - label → typed array + */ + async bindConstants(graph, constants) { + graph.bindConstants(constants); + return { status: "success" }; + } + + /** + * DAOP Extension: Pre-compile a fully-bound graph for execution. + * + * Replays the IR into native WebNN and compiles it. Call this before + * compute() to separate compilation latency from inference latency. + * + * @param {import("./daop-graph.js").DAOPGraph} graph + * @returns {Promise} + */ + async compileGraph(graph) { + if (!graph.isFullyBound()) { + throw new Error("[DAOP] Cannot compile: not all constants are bound. Call bindConstants() first."); + } + await graph.compile(this._native); + } + + /** + * Compile and execute a fully-bound graph. + * + * Replays IR into native WebNN, compiles, dispatches, and returns results. + * + * @param {import("./daop-graph.js").DAOPGraph} graph + * @param {Object} inputs + * @returns {Promise>} + */ + async compute(graph, inputs) { + if (!graph.isFullyBound()) { + throw new Error("[DAOP] Cannot compute: not all constants are bound. Call bindConstants() first."); + } + + // Compile (replay IR → native WebNN graph) + const { graph: nativeGraph, nativeContext } = await graph.compile(this._native); + + // Create input/output tensors + const inputTensors = {}; + const outputTensors = {}; + + // Create input MLTensors + for (const irInput of graph.ir.getInputs()) { + const inputData = inputs[irInput.name]; + if (!inputData) { + throw new Error(`[DAOP] Missing input "${irInput.name}"`); + } + const tensor = await nativeContext.createTensor({ + dataType: irInput.desc.dataType, + shape: irInput.desc.shape, + writable: true, + readable: false, + }); + nativeContext.writeTensor(tensor, inputData); + inputTensors[irInput.name] = tensor; + } + + // Create output MLTensors + for (const [outName, outOpId] of graph.ir.outputs) { + const outOp = graph.ir.getOperand(outOpId); + const tensor = await nativeContext.createTensor({ + dataType: outOp.desc.dataType, + shape: outOp.desc.shape, + writable: false, + readable: true, + }); + outputTensors[outName] = tensor; + } + + // Dispatch + nativeContext.dispatch(nativeGraph, inputTensors, outputTensors); + + // Read back results + const results = {}; + for (const [name, tensor] of Object.entries(outputTensors)) { + results[name] = await nativeContext.readTensor(tensor); + } + + // Cleanup tensors + for (const t of Object.values(inputTensors)) t.destroy(); + for (const t of Object.values(outputTensors)) t.destroy(); + + return results; + } +} diff --git a/daop-illustration/src/daop-graph-builder.js b/daop-illustration/src/daop-graph-builder.js new file mode 100644 index 0000000..f5b1180 --- /dev/null +++ b/daop-illustration/src/daop-graph-builder.js @@ -0,0 +1,209 @@ +// src/daop-graph-builder.js + +import { TensorDesc, IROperand, IRNode, IRGraph, generateId } from "./ir/graph-ir.js"; +import { inferShape } from "./ir/shape-inference.js"; +import { DAOPGraph } from "./daop-graph.js"; + +/** + * DAOPGraphBuilder — mirrors WebNN MLGraphBuilder API. + * + * Builds an IR graph for weightless QoS estimation. When the application + * later binds constants and calls compute(), the IR is replayed into a + * real native MLGraphBuilder. + */ +export class DAOPGraphBuilder { + /** + * @param {DAOPContext} context + */ + constructor(context) { + this.context = context; + this._ir = new IRGraph(); + } + + /** + * Declare a named input operand. + * Mirrors: MLGraphBuilder.input(name, descriptor) + */ + input(name, descriptor) { + const desc = new TensorDesc(descriptor); + const operand = new IROperand({ + id: name, + kind: "input", + desc, + name, + }); + this._ir.addOperand(operand); + return name; + } + + /** + * Declare a constant operand (optionally weightless). + * Mirrors: MLGraphBuilder.constant(descriptor, bufferView?) + * + * DAOP Extension: When called with only a descriptor (no buffer), + * creates a "weightless" constant that can be bound later via + * context.bindConstants(). The descriptor MUST include a `label` + * for late-binding identification. + */ + constant(descriptorOrType, bufferOrValue = null) { + // Support native 2-arg scalar form: constant(dataType, value) + if (typeof descriptorOrType === "string") { + return this.constantScalar(descriptorOrType, bufferOrValue); + } + const desc = new TensorDesc(descriptorOrType); + const label = descriptorOrType.label || null; + const id = label || generateId("const"); + const operand = new IROperand({ + id, + kind: "constant", + desc, + label, + buffer: bufferOrValue || null, + }); + this._ir.addOperand(operand); + return id; + } + + /** + * Scalar constant helper. + * Mirrors: MLGraphBuilder.constant(dataType, value) + */ + constantScalar(dataType, value) { + const id = generateId("scalar"); + const desc = new TensorDesc({ shape: [], dataType }); + const operand = new IROperand({ + id, + kind: "constant", + desc, + buffer: value, // Store scalar value directly + }); + this._ir.addOperand(operand); + return id; + } + + // ─── Operators ─────────────────────────────────────────── + + conv2d(input, filter, options = {}) { + return this._addOp("conv2d", [input, filter], options); + } + + convTranspose2d(input, filter, options = {}) { + return this._addOp("convTranspose2d", [input, filter], options); + } + + add(a, b) { + return this._addOp("add", [a, b]); + } + + sub(a, b) { + return this._addOp("sub", [a, b]); + } + + mul(a, b) { + return this._addOp("mul", [a, b]); + } + + div(a, b) { + return this._addOp("div", [a, b]); + } + + relu(input) { + return this._addOp("relu", [input]); + } + + sigmoid(input) { + return this._addOp("sigmoid", [input]); + } + + tanh(input) { + return this._addOp("tanh", [input]); + } + + clamp(input, options = {}) { + return this._addOp("clamp", [input], options); + } + + averagePool2d(input, options = {}) { + return this._addOp("averagePool2d", [input], options); + } + + maxPool2d(input, options = {}) { + return this._addOp("maxPool2d", [input], options); + } + + matmul(a, b) { + return this._addOp("matmul", [a, b]); + } + + softmax(input, axis) { + return this._addOp("softmax", [input], { axis }); + } + + reshape(input, newShape) { + return this._addOp("reshape", [input], { newShape }); + } + + transpose(input, options = {}) { + return this._addOp("transpose", [input], options); + } + + concat(inputs, axis) { + return this._addOp("concat", inputs, { axis }); + } + + resample2d(input, options = {}) { + return this._addOp("resample2d", [input], options); + } + + // ─── Build ─────────────────────────────────────────────── + + /** + * Build the graph. + * Mirrors: MLGraphBuilder.build(outputs) + * + * @param {Object} outputs - Map of output name → operand ID + * @returns {DAOPGraph} + */ + build(outputs) { + for (const [name, operandId] of Object.entries(outputs)) { + this._ir.outputs.set(name, operandId); + } + return new DAOPGraph(this._ir, this.context); + } + + // ─── Internal ──────────────────────────────────────────── + + /** + * Add an operator node to the IR, infer output shape, return output operand ID. + */ + _addOp(opType, inputIds, attrs = {}) { + const inputDescs = inputIds.map(id => { + const operand = this._ir.getOperand(id); + if (!operand) { + throw new Error(`[DAOP] Unknown operand ID: ${id}`); + } + return { shape: operand.desc.shape, dataType: operand.desc.dataType }; + }); + + const outputDescs = inferShape(opType, inputDescs, attrs); + const outputIds = outputDescs.map((desc, i) => { + const id = generateId(opType); + const operand = new IROperand({ + id, + kind: "intermediate", + desc: new TensorDesc(desc), + }); + this._ir.addOperand(operand); + return id; + }); + + this._ir.addNode(new IRNode({ + opType, + inputs: inputIds, + outputs: outputIds, + attrs, + })); + + return outputIds.length === 1 ? outputIds[0] : outputIds; + } +} diff --git a/daop-illustration/src/daop-graph.js b/daop-illustration/src/daop-graph.js new file mode 100644 index 0000000..da94e8d --- /dev/null +++ b/daop-illustration/src/daop-graph.js @@ -0,0 +1,281 @@ +// src/daop-graph.js + +import { getNativeMLGraphBuilder } from "./polyfill.js"; + +/** + * DAOPGraph — IR graph container with replay capability. + * + * Holds the recorded IR from DAOPGraphBuilder. Provides: + * - Topology inspection for QoS estimation and visualization + * - Weight binding for labeled constants + * - Replay into native WebNN for real execution + */ +export class DAOPGraph { + /** + * @param {import("./ir/graph-ir.js").IRGraph} ir + * @param {import("./daop-context.js").DAOPContext} context + */ + constructor(ir, context) { + this._ir = ir; + this._context = context; + this._boundWeights = new Map(); + this._compiledGraph = null; // Cached native MLGraph + this._compiledTensors = null; // Cached native MLTensors for constants + } + + /** Access the IR for estimation and visualization */ + get ir() { + return this._ir; + } + + /** Get all nodes (operators) for iteration */ + get nodes() { + return this._ir.nodes; + } + + /** Get all operands */ + get operands() { + return this._ir.operands; + } + + /** + * Bind weight buffers to labeled constants. + * + * @param {Object} constants - label → buffer map + */ + bindConstants(constants) { + for (const [label, buffer] of Object.entries(constants)) { + const operand = this._ir.getOperand(label); + if (!operand) { + console.warn(`[DAOP] bindConstants: no operand with label "${label}"`); + continue; + } + if (operand.kind !== "constant") { + console.warn(`[DAOP] bindConstants: operand "${label}" is not a constant`); + continue; + } + operand.buffer = buffer; + this._boundWeights.set(label, buffer); + } + // Invalidate cached compilation + this._compiledGraph = null; + this._compiledTensors = null; + } + + /** + * Check if all constants have been bound. + */ + isFullyBound() { + return this._ir.isFullyBound(); + } + + /** + * Replay the IR into a native WebNN graph and compile it. + * + * @param {MLContext} nativeContext - The real native WebNN context + * @returns {Promise<{graph: MLGraph, nativeContext: MLContext}>} + */ + async compile(nativeContext) { + if (this._compiledGraph) { + return this._compiledGraph; + } + + // Verify all constants are bound + const unbound = this._ir.getWeightlessConstants(); + if (unbound.length > 0) { + const labels = unbound.map(op => op.label || op.id).join(", "); + throw new Error(`[DAOP] Cannot compile: unbound constants: ${labels}`); + } + + const NativeBuilder = getNativeMLGraphBuilder(); + const nativeBuilder = new NativeBuilder(nativeContext); + const operandMap = new Map(); // IR operand ID → native MLOperand + + // 1. Create input operands + for (const irOp of this._ir.getInputs()) { + const nativeOperand = nativeBuilder.input(irOp.name, { + dataType: irOp.desc.dataType, + shape: irOp.desc.shape, + }); + operandMap.set(irOp.id, nativeOperand); + } + + // 2. Create constant operands (with bound buffers) + for (const irOp of this._ir.getConstants()) { + let nativeOperand; + if (irOp.buffer !== null && typeof irOp.buffer === "number") { + // Scalar constant + nativeOperand = nativeBuilder.constant(irOp.desc.dataType, irOp.buffer); + } else if (irOp.buffer !== null) { + // Constant with buffer data + nativeOperand = nativeBuilder.constant( + { dataType: irOp.desc.dataType, shape: irOp.desc.shape }, + irOp.buffer + ); + } else { + throw new Error(`[DAOP] Constant "${irOp.label || irOp.id}" has no buffer`); + } + operandMap.set(irOp.id, nativeOperand); + } + + // 3. Replay operator nodes in topological order (they're already recorded in order) + for (const node of this._ir.nodes) { + const nativeInputs = node.inputs.map(id => { + const op = operandMap.get(id); + if (!op) throw new Error(`[DAOP] Replay: missing operand ${id} for ${node.opType}`); + return op; + }); + + let nativeOutput; + const resolvedAttrs = { ...node.attrs }; + if (resolvedAttrs.bias && typeof resolvedAttrs.bias === "string") { + resolvedAttrs.bias = operandMap.get(resolvedAttrs.bias); + if (!resolvedAttrs.bias) { + throw new Error(`[DAOP] Replay: missing bias operand for ${node.opType}`); + } + } + + switch (node.opType) { + case "conv2d": + nativeOutput = nativeBuilder.conv2d(nativeInputs[0], nativeInputs[1], resolvedAttrs); + break; + case "convTranspose2d": + nativeOutput = nativeBuilder.convTranspose2d(nativeInputs[0], nativeInputs[1], resolvedAttrs); + break; + case "add": + nativeOutput = nativeBuilder.add(nativeInputs[0], nativeInputs[1]); + break; + case "sub": + nativeOutput = nativeBuilder.sub(nativeInputs[0], nativeInputs[1]); + break; + case "mul": + nativeOutput = nativeBuilder.mul(nativeInputs[0], nativeInputs[1]); + break; + case "div": + nativeOutput = nativeBuilder.div(nativeInputs[0], nativeInputs[1]); + break; + case "relu": + nativeOutput = nativeBuilder.relu(nativeInputs[0]); + break; + case "sigmoid": + nativeOutput = nativeBuilder.sigmoid(nativeInputs[0]); + break; + case "tanh": + nativeOutput = nativeBuilder.tanh(nativeInputs[0]); + break; + case "clamp": + nativeOutput = nativeBuilder.clamp(nativeInputs[0], resolvedAttrs); + break; + case "averagePool2d": + nativeOutput = nativeBuilder.averagePool2d(nativeInputs[0], resolvedAttrs); + break; + case "maxPool2d": + nativeOutput = nativeBuilder.maxPool2d(nativeInputs[0], resolvedAttrs); + break; + case "matmul": + nativeOutput = nativeBuilder.matmul(nativeInputs[0], nativeInputs[1]); + break; + case "softmax": + nativeOutput = nativeBuilder.softmax(nativeInputs[0], resolvedAttrs.axis); + break; + case "reshape": + nativeOutput = nativeBuilder.reshape(nativeInputs[0], resolvedAttrs.newShape); + break; + case "transpose": + nativeOutput = nativeBuilder.transpose(nativeInputs[0], resolvedAttrs); + break; + case "concat": + nativeOutput = nativeBuilder.concat(nativeInputs, resolvedAttrs.axis); + break; + case "resample2d": + nativeOutput = nativeBuilder.resample2d(nativeInputs[0], resolvedAttrs); + break; + default: + throw new Error(`[DAOP] Replay: unsupported op "${node.opType}"`); + } + + // Map IR output IDs to native operands + if (Array.isArray(nativeOutput)) { + node.outputs.forEach((id, i) => operandMap.set(id, nativeOutput[i])); + } else { + node.outputs.forEach(id => operandMap.set(id, nativeOutput)); + } + } + + // 4. Build the native graph + const nativeOutputs = {}; + for (const [name, operandId] of this._ir.outputs) { + nativeOutputs[name] = operandMap.get(operandId); + } + + const nativeGraph = await nativeBuilder.build(nativeOutputs); + + this._compiledGraph = { graph: nativeGraph, nativeContext }; + return this._compiledGraph; + } + + /** + * Generate Mermaid diagram code for this graph. + * Used by the demo's right panel for visualization. + * + * @param {Object[]} [qosBreakdown] - Optional per-node QoS data for annotations + * @returns {string} Mermaid flowchart code + */ + toMermaid(qosBreakdown = null) { + let code = "graph TD\n"; + code += " classDef computeBound fill:#ffcdd2,stroke:#e53935,stroke-width:2px;\n"; + code += " classDef memoryBound fill:#bbdefb,stroke:#1e88e5,stroke-width:2px;\n"; + code += " classDef inputNode fill:#e8f5e9,stroke:#2e7d32,stroke-width:2px;\n"; + code += " classDef constantNode fill:#fff3e0,stroke:#ef6c00,stroke-width:1px;\n"; + + // Track which operand IDs are produced by which node + const producedBy = new Map(); + + this._ir.nodes.forEach((node, i) => { + const nodeId = `op${i}`; + let label; + if (qosBreakdown && qosBreakdown[i]) { + const est = qosBreakdown[i]; + const boundIcon = est.bottleneck === "compute" ? "COMPUTE" : "MEMORY"; + label = `"${boundIcon}
${node.opType.toUpperCase()}
${est.timeMs.toFixed(2)}ms"`; + const cls = est.bottleneck === "compute" ? "computeBound" : "memoryBound"; + code += ` ${nodeId}[${label}]:::${cls}\n`; + } else { + // Show shapes in label + const outOp = this._ir.getOperand(node.outputs[0]); + const shapeStr = outOp ? outOp.desc.shape.join("x") : "?"; + label = `"${node.opType.toUpperCase()}
[${shapeStr}]"`; + code += ` ${nodeId}[${label}]\n`; + } + + node.outputs.forEach(outId => producedBy.set(outId, nodeId)); + }); + + // Add edges + this._ir.nodes.forEach((node, i) => { + const nodeId = `op${i}`; + node.inputs.forEach(inputId => { + const sourceNode = producedBy.get(inputId); + if (sourceNode) { + code += ` ${sourceNode} --> ${nodeId}\n`; + } else { + // It's a graph input or constant + const operand = this._ir.getOperand(inputId); + if (operand) { + const displayName = operand.name || operand.label || inputId; + if (operand.kind === "input") { + code += ` ${inputId}([${displayName}]):::inputNode --> ${nodeId}\n`; + } else if (operand.kind === "constant") { + // Don't clutter graph with every constant; only show labeled ones + if (operand.label) { + code += ` ${inputId}[/${operand.label}/]:::constantNode --> ${nodeId}\n`; + } + } + } + } + }); + }); + + return code; + } +} diff --git a/daop-illustration/src/index.js b/daop-illustration/src/index.js new file mode 100644 index 0000000..0ad1175 --- /dev/null +++ b/daop-illustration/src/index.js @@ -0,0 +1,18 @@ +// src/index.js + +/** + * DAOP Library — Public API + * + * Usage: + * import { initDAOP, detectWebNNSupport } from "./src/index.js"; + * const result = initDAOP(); + * if (!result.ok) { showError(result.error); return; } + * // Now use standard WebNN API — DAOP layer is active + */ + +export { initDAOP, detectWebNNSupport, getNativeML } from "./polyfill.js"; +export { DAOPContext } from "./daop-context.js"; +export { DAOPGraphBuilder } from "./daop-graph-builder.js"; +export { DAOPGraph } from "./daop-graph.js"; +export { timeModelDatabase } from "./qos/interpolation/time-model.js"; +export { estimateQoSInterp } from "./qos/estimate-qos-interp.js"; diff --git a/daop-illustration/src/ir/graph-ir.js b/daop-illustration/src/ir/graph-ir.js new file mode 100644 index 0000000..8b84630 --- /dev/null +++ b/daop-illustration/src/ir/graph-ir.js @@ -0,0 +1,149 @@ +// src/ir/graph-ir.js + +/** + * DAOP Intermediate Representation for WebNN Graphs + * + * Records graph topology (operators, operands, connections) without + * requiring actual weight buffers — enabling "weightless" QoS estimation. + */ + +let _nextId = 0; + +/** Generate a unique operand ID */ +export function generateId(prefix = "op") { + return `${prefix}_${_nextId++}`; +} + +/** Reset ID counter (useful for testing) */ +export function resetIdCounter() { + _nextId = 0; +} + +/** + * Describes a tensor's metadata (no buffer data). + */ +export class TensorDesc { + /** + * @param {Object} opts + * @param {number[]} opts.shape + * @param {string} [opts.dataType="float32"] + */ + constructor({ shape, dataType = "float32" }) { + this.shape = [...shape]; + this.dataType = dataType; + } + + /** Total number of elements */ + get elements() { + return this.shape.reduce((a, b) => a * b, 1); + } + + /** Bytes per element for this dataType */ + get bytesPerElement() { + switch (this.dataType) { + case "float32": return 4; + case "float16": return 2; + case "int32": return 4; + case "int8": return 1; + case "uint8": return 1; + default: return 4; + } + } + + /** Total byte size */ + get byteSize() { + return this.elements * this.bytesPerElement; + } +} + +/** + * An operand in the IR graph. + */ +export class IROperand { + /** + * @param {Object} opts + * @param {string} opts.id - Unique identifier + * @param {string} opts.kind - "input" | "constant" | "intermediate" + * @param {TensorDesc} opts.desc - Tensor descriptor + * @param {string} [opts.name] - User-facing name (for inputs) + * @param {string} [opts.label] - Label for late-binding (for constants) + * @param {ArrayBufferView|null} [opts.buffer] - Actual data (null for weightless) + */ + constructor({ id, kind, desc, name = null, label = null, buffer = null }) { + this.id = id; + this.kind = kind; + this.desc = desc; + this.name = name; + this.label = label; + this.buffer = buffer; + } + + get isWeightless() { + return this.kind === "constant" && this.buffer === null; + } +} + +/** + * An operator node in the IR graph. + */ +export class IRNode { + /** + * @param {Object} opts + * @param {string} opts.opType - WebNN op name ("conv2d", "add", etc.) + * @param {string[]} opts.inputs - Input operand IDs + * @param {string[]} opts.outputs - Output operand IDs + * @param {Object} [opts.attrs={}] - Op-specific attributes (strides, pads, etc.) + */ + constructor({ opType, inputs, outputs, attrs = {} }) { + this.opType = opType; + this.inputs = [...inputs]; + this.outputs = [...outputs]; + this.attrs = { ...attrs }; + } +} + +/** + * Complete IR graph: operands + operators in topological order. + */ +export class IRGraph { + constructor() { + /** @type {Map} */ + this.operands = new Map(); + /** @type {IRNode[]} */ + this.nodes = []; + /** @type {Map} output name → operand ID */ + this.outputs = new Map(); + } + + addOperand(operand) { + this.operands.set(operand.id, operand); + } + + addNode(node) { + this.nodes.push(node); + } + + getOperand(id) { + return this.operands.get(id); + } + + /** All input operands */ + getInputs() { + return [...this.operands.values()].filter(op => op.kind === "input"); + } + + /** All constant operands */ + getConstants() { + return [...this.operands.values()].filter(op => op.kind === "constant"); + } + + /** All weightless constants (need binding before compute) */ + getWeightlessConstants() { + return this.getConstants().filter(op => op.isWeightless); + } + + /** Check if all constants have buffers bound */ + isFullyBound() { + return this.getConstants().every(op => !op.isWeightless); + } +} diff --git a/daop-illustration/src/ir/shape-inference.js b/daop-illustration/src/ir/shape-inference.js new file mode 100644 index 0000000..8c18edb --- /dev/null +++ b/daop-illustration/src/ir/shape-inference.js @@ -0,0 +1,280 @@ +// src/ir/shape-inference.js + +/** + * Shape inference for WebNN operators. + * + * Given input shapes and op attributes, computes the output shape(s). + * Supports the operator subset used by Selfie Segmentation + common ops. + */ + +/** + * Infer output shape for a WebNN operator. + * + * @param {string} opType - Operator name + * @param {Array<{shape: number[], dataType: string}>} inputs - Input descriptors + * @param {Object} attrs - Operator attributes + * @returns {{shape: number[], dataType: string}[]} Output descriptor(s) + */ +export function inferShape(opType, inputs, attrs = {}) { + const fn = SHAPE_FNS[opType]; + if (!fn) { + throw new Error(`[DAOP] Shape inference not implemented for op: ${opType}`); + } + return fn(inputs, attrs); +} + +/** + * Check if shape inference is available for an op type. + */ +export function hasShapeInference(opType) { + return opType in SHAPE_FNS; +} + +const SHAPE_FNS = { + conv2d(inputs, attrs) { + // inputs[0] = input tensor, inputs[1] = filter tensor + // WebNN conv2d supports both NCHW and NHWC layouts + const layout = attrs.inputLayout || "nchw"; + const input = inputs[0].shape; + const filter = inputs[1].shape; + + let batch, inH, inW, outChannels, filterH, filterW; + if (layout === "nchw") { + [batch, , inH, inW] = input; + // filter layout for nchw: [outChannels, inChannels/groups, filterH, filterW] + const filterLayout = attrs.filterLayout || "oihw"; + if (filterLayout === "oihw") { + [outChannels, , filterH, filterW] = filter; + } else if (filterLayout === "hwio") { + [filterH, filterW, , outChannels] = filter; + } else { + [outChannels, , filterH, filterW] = filter; + } + } else { + // nhwc + [batch, inH, inW, ] = input; + const filterLayout = attrs.filterLayout || "ohwi"; + if (filterLayout === "ohwi") { + [outChannels, filterH, filterW, ] = filter; + } else if (filterLayout === "hwio") { + [filterH, filterW, , outChannels] = filter; + } else if (filterLayout === "ihwo") { + // [inputChannels/groups, filterH, filterW, outputChannels] + outChannels = filter[3]; + filterH = filter[1]; + filterW = filter[2]; + } else { + [outChannels, filterH, filterW, ] = filter; + } + } + + const padding = attrs.padding || [0, 0, 0, 0]; // [top, bottom, left, right] + const strides = attrs.strides || [1, 1]; + const dilations = attrs.dilations || [1, 1]; + + const effectiveFilterH = (filterH - 1) * dilations[0] + 1; + const effectiveFilterW = (filterW - 1) * dilations[1] + 1; + const outH = Math.floor((inH + padding[0] + padding[1] - effectiveFilterH) / strides[0]) + 1; + const outW = Math.floor((inW + padding[2] + padding[3] - effectiveFilterW) / strides[1]) + 1; + + const outShape = layout === "nchw" + ? [batch, outChannels, outH, outW] + : [batch, outH, outW, outChannels]; + + return [{ shape: outShape, dataType: inputs[0].dataType }]; + }, + + // Element-wise binary ops: output shape = broadcast(input shapes) + add: (inputs) => [{ shape: broadcastShape(inputs[0].shape, inputs[1].shape), dataType: inputs[0].dataType }], + sub: (inputs) => [{ shape: broadcastShape(inputs[0].shape, inputs[1].shape), dataType: inputs[0].dataType }], + mul: (inputs) => [{ shape: broadcastShape(inputs[0].shape, inputs[1].shape), dataType: inputs[0].dataType }], + div: (inputs) => [{ shape: broadcastShape(inputs[0].shape, inputs[1].shape), dataType: inputs[0].dataType }], + + // Element-wise unary ops: output shape = input shape + relu: (inputs) => [{ shape: [...inputs[0].shape], dataType: inputs[0].dataType }], + sigmoid: (inputs) => [{ shape: [...inputs[0].shape], dataType: inputs[0].dataType }], + tanh: (inputs) => [{ shape: [...inputs[0].shape], dataType: inputs[0].dataType }], + clamp: (inputs) => [{ shape: [...inputs[0].shape], dataType: inputs[0].dataType }], + + averagePool2d(inputs, attrs) { + return [poolShape(inputs[0], attrs)]; + }, + + maxPool2d(inputs, attrs) { + return [poolShape(inputs[0], attrs)]; + }, + + matmul(inputs) { + const a = inputs[0].shape; + const b = inputs[1].shape; + // Support batched matmul: [...batch, M, K] x [...batch, K, N] -> [...batch, M, N] + const m = a[a.length - 2]; + const n = b[b.length - 1]; + const batchDims = a.length > 2 ? a.slice(0, -2) : []; + return [{ shape: [...batchDims, m, n], dataType: inputs[0].dataType }]; + }, + + softmax(inputs) { + return [{ shape: [...inputs[0].shape], dataType: inputs[0].dataType }]; + }, + + reshape(inputs, attrs) { + const newShape = attrs.newShape || attrs.shape; + if (!newShape) { + throw new Error("[DAOP] reshape requires newShape attribute"); + } + return [{ shape: [...newShape], dataType: inputs[0].dataType }]; + }, + + transpose(inputs, attrs) { + const perm = attrs.permutation; + if (!perm) { + // Default: reverse dimensions + const shape = [...inputs[0].shape].reverse(); + return [{ shape, dataType: inputs[0].dataType }]; + } + const shape = perm.map(i => inputs[0].shape[i]); + return [{ shape, dataType: inputs[0].dataType }]; + }, + + concat(inputs, attrs) { + const axis = attrs.axis || 0; + const shape = [...inputs[0].shape]; + shape[axis] = inputs.reduce((sum, inp) => sum + inp.shape[axis], 0); + return [{ shape, dataType: inputs[0].dataType }]; + }, + + resample2d(inputs, attrs) { + const layout = attrs.layout || "nchw"; + const shape = [...inputs[0].shape]; + + if (attrs.axes && attrs.sizes) { + const out = [...inputs[0].shape]; + attrs.axes.forEach((axis, i) => { + out[axis] = attrs.sizes[i]; + }); + return [{ shape: out, dataType: inputs[0].dataType }]; + } + if (attrs.axes && attrs.scales) { + const out = [...inputs[0].shape]; + attrs.axes.forEach((axis, i) => { + out[axis] = Math.floor(out[axis] * attrs.scales[i]); + }); + return [{ shape: out, dataType: inputs[0].dataType }]; + } + + if (attrs.sizes) { + // Explicit output sizes [outH, outW] + if (layout === "nchw") { + shape[2] = attrs.sizes[0]; + shape[3] = attrs.sizes[1]; + } else { + shape[1] = attrs.sizes[0]; + shape[2] = attrs.sizes[1]; + } + } else if (attrs.scales) { + // Scale factors [scaleH, scaleW] + if (layout === "nchw") { + shape[2] = Math.floor(shape[2] * attrs.scales[0]); + shape[3] = Math.floor(shape[3] * attrs.scales[1]); + } else { + shape[1] = Math.floor(shape[1] * attrs.scales[0]); + shape[2] = Math.floor(shape[2] * attrs.scales[1]); + } + } + return [{ shape, dataType: inputs[0].dataType }]; + }, + convTranspose2d(inputs, attrs) { + const layout = attrs.inputLayout || "nchw"; + const input = inputs[0].shape; + const filter = inputs[1].shape; + + let batch, inH, inW, outChannels, filterH, filterW; + if (layout === "nchw") { + [batch, , inH, inW] = input; + [, outChannels, filterH, filterW] = filter; + } else { + // nhwc + [batch, inH, inW, ] = input; + const filterLayout = attrs.filterLayout || "ihwo"; + if (filterLayout === "ihwo") { + outChannels = filter[3]; + filterH = filter[1]; + filterW = filter[2]; + } else if (filterLayout === "ohwi") { + [outChannels, filterH, filterW, ] = filter; + } else { + outChannels = filter[3]; + filterH = filter[1]; + filterW = filter[2]; + } + } + + const strides = attrs.strides || [1, 1]; + const padding = attrs.padding || [0, 0, 0, 0]; + const outputPadding = attrs.outputPadding || [0, 0]; + const dilations = attrs.dilations || [1, 1]; + + if (attrs.outputSizes) { + const [outH, outW] = attrs.outputSizes; + const outShape = layout === "nchw" + ? [batch, outChannels, outH, outW] + : [batch, outH, outW, outChannels]; + return [{ shape: outShape, dataType: inputs[0].dataType }]; + } + + const outH = (inH - 1) * strides[0] - padding[0] - padding[1] + + (filterH - 1) * dilations[0] + outputPadding[0] + 1; + const outW = (inW - 1) * strides[1] - padding[2] - padding[3] + + (filterW - 1) * dilations[1] + outputPadding[1] + 1; + + const outShape = layout === "nchw" + ? [batch, outChannels, outH, outW] + : [batch, outH, outW, outChannels]; + return [{ shape: outShape, dataType: inputs[0].dataType }]; + }, +}; + +/** + * Compute broadcast-compatible output shape. + */ +function broadcastShape(a, b) { + const rank = Math.max(a.length, b.length); + const result = new Array(rank); + for (let i = 0; i < rank; i++) { + const dimA = i < a.length ? a[a.length - 1 - i] : 1; + const dimB = i < b.length ? b[b.length - 1 - i] : 1; + if (dimA !== dimB && dimA !== 1 && dimB !== 1) { + throw new Error(`[DAOP] Shape broadcast failed: ${a} vs ${b}`); + } + result[rank - 1 - i] = Math.max(dimA, dimB); + } + return result; +} + +/** + * Pool output shape helper. + */ +function poolShape(input, attrs) { + const layout = attrs.layout || "nchw"; + const shape = input.shape; + const windowDimensions = attrs.windowDimensions || [2, 2]; + const strides = attrs.strides || windowDimensions; + const padding = attrs.padding || [0, 0, 0, 0]; + + let batch, channels, inH, inW; + if (layout === "nchw") { + [batch, channels, inH, inW] = shape; + } else { + [batch, inH, inW, channels] = shape; + } + + const outH = Math.floor((inH + padding[0] + padding[1] - windowDimensions[0]) / strides[0]) + 1; + const outW = Math.floor((inW + padding[2] + padding[3] - windowDimensions[1]) / strides[1]) + 1; + + const outShape = layout === "nchw" + ? [batch, channels, outH, outW] + : [batch, outH, outW, channels]; + + return { shape: outShape, dataType: input.dataType }; +} diff --git a/daop-illustration/src/polyfill.js b/daop-illustration/src/polyfill.js new file mode 100644 index 0000000..9a3db70 --- /dev/null +++ b/daop-illustration/src/polyfill.js @@ -0,0 +1,102 @@ +// src/polyfill.js + +import { DAOPContext } from "./daop-context.js"; +import { DAOPGraphBuilder } from "./daop-graph-builder.js"; + +/** + * DAOP Polyfill — intercepts WebNN API to add DAOP extensions. + * + * When initialized: + * 1. Checks for native WebNN (navigator.ml). If missing → returns error. + * 2. Saves references to native ML/MLGraphBuilder. + * 3. Replaces window.MLGraphBuilder with DAOPGraphBuilder. + * 4. Wraps navigator.ml.createContext() to return DAOPContext. + */ + +let _initialized = false; +let _nativeML = null; +let _NativeMLGraphBuilder = null; + +/** + * Detect if native WebNN is available. + * + * Checks for the presence of navigator.ml and window.MLGraphBuilder. + * This function only tests the NATIVE API — if DAOP has already been + * initialized it still returns true (the native references are saved + * internally). + */ +export function detectWebNNSupport() { + if (typeof navigator === "undefined") { + return { supported: false, reason: "No navigator object" }; + } + if (!navigator.ml) { + return { supported: false, reason: "navigator.ml not available" }; + } + if (typeof MLGraphBuilder === "undefined") { + return { supported: false, reason: "MLGraphBuilder not available" }; + } + return { supported: true }; +} + +/** + * Initialize the DAOP polyfill layer. + * + * @returns {{ok: boolean, error?: string}} + */ +export function initDAOP() { + if (_initialized) { + return { ok: true }; + } + + const support = detectWebNNSupport(); + if (!support.supported) { + return { + ok: false, + error: `WebNN is not available: ${support.reason}. ` + + `Please install a WebNN-capable browser: https://webnn.io/en/learn/get-started/installation`, + }; + } + + // Save native references + _nativeML = navigator.ml; + _NativeMLGraphBuilder = window.MLGraphBuilder; + + // Wrap navigator.ml.createContext to return DAOPContext + const originalCreateContext = _nativeML.createContext.bind(_nativeML); + + const wrappedML = { + async createContext(options = {}) { + const nativeContext = await originalCreateContext(options); + return new DAOPContext(nativeContext, options); + }, + __daopPolyfill: true, + }; + + // Replace globals + // navigator.ml is a read-only getter on the Navigator prototype — + // a plain assignment throws. Use Object.defineProperty to override. + Object.defineProperty(navigator, "ml", { + value: wrappedML, + writable: true, + configurable: true, + }); + window.MLGraphBuilder = DAOPGraphBuilder; + + _initialized = true; + return { ok: true }; +} + +/** + * Get a reference to the native (un-wrapped) MLGraphBuilder. + * Used internally by DAOPGraph.compile() for replay. + */ +export function getNativeMLGraphBuilder() { + return _NativeMLGraphBuilder || window.MLGraphBuilder; +} + +/** + * Get a reference to the native ML API. + */ +export function getNativeML() { + return _nativeML || navigator.ml; +} diff --git a/daop-illustration/src/qos/estimate-qos-interp.js b/daop-illustration/src/qos/estimate-qos-interp.js new file mode 100644 index 0000000..f540169 --- /dev/null +++ b/daop-illustration/src/qos/estimate-qos-interp.js @@ -0,0 +1,56 @@ +// src/qos/estimate-qos-interp.js + +import { timeModelDatabase } from "./interpolation/time-model.js"; + +/** + * Estimate QoS using direct interpolation from measured benchmark data. + * + * This estimator directly predicts per-operator execution time from + * measured (inputSize -> time) data points using polynomial regression. + * + * @param {import("../daop-graph.js").DAOPGraph} daopGraph + * @param {Object} [options={}] + * @returns {Object} QoS report + */ +export function estimateQoSInterp(daopGraph, options = {}) { + const ir = daopGraph.ir; + let totalTimeMs = 0; + const breakdown = []; + + for (const node of ir.nodes) { + const inputDescs = node.inputs.map(id => ir.getOperand(id)); + const primaryElements = (inputDescs[0] && inputDescs[0].desc) + ? inputDescs[0].desc.elements : 0; + + const predictedMs = timeModelDatabase.predict(node.opType, primaryElements); + totalTimeMs += predictedMs; + + breakdown.push({ + opType: node.opType, + timeMs: predictedMs, + inputElements: primaryElements, + }); + } + + // Graph-level dispatch overhead (single dispatch for compiled graph) + const graphDispatchOverheadMs = 0.5 + ir.nodes.length * 0.005; + totalTimeMs += graphDispatchOverheadMs; + + const performanceTier = totalTimeMs < 16 ? "excellent" + : totalTimeMs < 100 ? "good" + : totalTimeMs < 1000 ? "fair" + : totalTimeMs < 10000 ? "moderate" + : totalTimeMs < 30000 ? "slow" + : totalTimeMs < 60000 ? "very-slow" + : "poor"; + + return { + performanceTier, + internal: { + totalTimeMs, + graphDispatchOverheadMs, + breakdown, + method: "interpolation", + }, + }; +} diff --git a/daop-illustration/src/qos/interpolation/poly-fit.js b/daop-illustration/src/qos/interpolation/poly-fit.js new file mode 100644 index 0000000..f394d8b --- /dev/null +++ b/daop-illustration/src/qos/interpolation/poly-fit.js @@ -0,0 +1,130 @@ +// src/qos/interpolation/poly-fit.js + +/** + * Polynomial least-squares regression. + * + * Fits y = c0 + c1*x + c2*x^2 + ... + cn*x^n to minimize squared error. + * Uses the normal equations approach (sufficient for degree 2-3 with <20 points). + */ + +/** + * Fit a polynomial of given degree to (x, y) data points. + * + * @param {number[]} xs - Independent variable values + * @param {number[]} ys - Dependent variable values + * @param {number} degree - Polynomial degree (2 or 3 recommended) + * @returns {number[]} Coefficients [c0, c1, c2, ...] where y = c0 + c1*x + c2*x^2 + ... + */ +export function polyFit(xs, ys, degree = 2) { + if (xs.length !== ys.length) { + throw new Error("polyFit: xs and ys must have same length"); + } + if (xs.length <= degree) { + // Not enough points for this degree — fall back to lower degree + degree = Math.max(1, xs.length - 1); + } + + const n = xs.length; + const m = degree + 1; // number of coefficients + + // Build normal equations: A^T A c = A^T y + // ATA[i][j] = sum(x^(i+j)), ATy[i] = sum(y * x^i) + const ATA = Array.from({ length: m }, () => new Array(m).fill(0)); + const ATy = new Array(m).fill(0); + + // Pre-compute x^p for each data point (p = 0..2*degree) + for (let k = 0; k < n; k++) { + const xPows = new Array(2 * degree + 1); + xPows[0] = 1; + for (let p = 1; p < xPows.length; p++) { + xPows[p] = xPows[p - 1] * xs[k]; + } + for (let i = 0; i < m; i++) { + ATy[i] += ys[k] * xPows[i]; + for (let j = i; j < m; j++) { + ATA[i][j] += xPows[i + j]; + } + } + } + + // Fill symmetric lower triangle + for (let i = 1; i < m; i++) { + for (let j = 0; j < i; j++) { + ATA[i][j] = ATA[j][i]; + } + } + + // Solve via Gaussian elimination with partial pivoting + return _solveLinearSystem(ATA, ATy); +} + +/** + * Evaluate a polynomial at a given x. + * + * @param {number[]} coeffs - [c0, c1, c2, ...] from polyFit + * @param {number} x - Value to evaluate at + * @returns {number} y = c0 + c1*x + c2*x^2 + ... + */ +export function polyEval(coeffs, x) { + let result = 0; + let xPow = 1; + for (const c of coeffs) { + result += c * xPow; + xPow *= x; + } + return result; +} + +/** + * Solve Ax = b using Gaussian elimination with partial pivoting. + * Modifies A and b in place. + * + * @param {number[][]} A - Square matrix + * @param {number[]} b - Right-hand side + * @returns {number[]} Solution vector x + */ +function _solveLinearSystem(A, b) { + const n = A.length; + + // Forward elimination with partial pivoting + for (let col = 0; col < n; col++) { + // Find pivot + let maxVal = Math.abs(A[col][col]); + let maxRow = col; + for (let row = col + 1; row < n; row++) { + if (Math.abs(A[row][col]) > maxVal) { + maxVal = Math.abs(A[row][col]); + maxRow = row; + } + } + + // Swap rows + if (maxRow !== col) { + [A[col], A[maxRow]] = [A[maxRow], A[col]]; + [b[col], b[maxRow]] = [b[maxRow], b[col]]; + } + + // Eliminate + const pivot = A[col][col]; + if (Math.abs(pivot) < 1e-12) continue; // singular — skip + for (let row = col + 1; row < n; row++) { + const factor = A[row][col] / pivot; + for (let j = col; j < n; j++) { + A[row][j] -= factor * A[col][j]; + } + b[row] -= factor * b[col]; + } + } + + // Back substitution + const x = new Array(n).fill(0); + for (let row = n - 1; row >= 0; row--) { + let sum = b[row]; + for (let j = row + 1; j < n; j++) { + sum -= A[row][j] * x[j]; + } + x[row] = Math.abs(A[row][row]) > 1e-12 ? sum / A[row][row] : 0; + } + + return x; +} diff --git a/daop-illustration/src/qos/interpolation/time-model.js b/daop-illustration/src/qos/interpolation/time-model.js new file mode 100644 index 0000000..0bb2bf5 --- /dev/null +++ b/daop-illustration/src/qos/interpolation/time-model.js @@ -0,0 +1,206 @@ +// src/qos/interpolation/time-model.js + +import { polyFit, polyEval } from "./poly-fit.js"; + +const STORAGE_KEY = "daop_time_models"; +const DEFAULT_POLY_DEGREE = 1; + +/** + * TimeModelDatabase — stores measured time data points per operator + * and fits polynomial curves for direct time prediction. + * + * Stores raw (inputSize -> time) measurements and uses polynomial + * regression to predict times for unseen sizes. + */ +class TimeModelDatabase { + constructor() { + /** @type {Object} */ + this.models = {}; + this._loadFromLocalStorage(); + } + + _loadFromLocalStorage() { + if (typeof localStorage !== "undefined") { + const saved = localStorage.getItem(STORAGE_KEY); + if (saved) { + try { + this.models = JSON.parse(saved); + } catch (e) { + console.error("[DAOP TimeModel] Failed to parse from localStorage", e); + } + } + } + } + + _saveToLocalStorage() { + if (typeof localStorage !== "undefined") { + localStorage.setItem(STORAGE_KEY, JSON.stringify(this.models)); + } + } + + /** + * Add a measured data point for an operator. + * + * @param {string} opType - Operator name (e.g., "conv2d") + * @param {Object} point - { totalElements, medianMs, label, inputShape, ... } + */ + addDataPoint(opType, point) { + if (!this.models[opType]) { + this.models[opType] = { points: [], coeffs: null }; + } + const model = this.models[opType]; + // Replace existing point at same totalElements + model.points = model.points.filter(p => p.totalElements !== point.totalElements); + model.points.push(point); + model.points.sort((a, b) => a.totalElements - b.totalElements); + // Invalidate fitted curve + model.coeffs = null; + this._saveToLocalStorage(); + } + + /** + * Fit polynomial curves for all ops that have data points. + * Call this after all benchmarks are complete. + * + * @param {number} degree - Polynomial degree (default 2) + */ + fitAll(degree = DEFAULT_POLY_DEGREE) { + for (const [opType, model] of Object.entries(this.models)) { + if (model.points && model.points.length >= 2) { + this.fitOp(opType, degree); + } + } + this._saveToLocalStorage(); + } + + /** + * Fit polynomial for a single op. + * + * Uses log-log space: x = log(totalElements), y = log(medianMs). + * + * To handle noise at small sizes (where dispatch overhead dominates and + * can produce U-shaped data), the fitter: + * 1. Finds the point with the minimum medianMs. + * 2. Clamps all points to the left of it to that minimum value. + * 3. Fits a degree-1 polynomial (power law) using only the points from + * the minimum onward — the clamped left-side points are excluded. + * 4. Stores `clampBelowLogX` and `clampLogY` so predict() can return + * the flat clamp value for inputs smaller than the minimum point. + */ + fitOp(opType, degree = DEFAULT_POLY_DEGREE) { + const model = this.models[opType]; + if (!model || !model.points || model.points.length < 2) return; + + // Find the index of the point with the smallest medianMs + let minIdx = 0; + for (let i = 1; i < model.points.length; i++) { + if (model.points[i].medianMs < model.points[minIdx].medianMs) { + minIdx = i; + } + } + + const minMs = model.points[minIdx].medianMs; + const clampLogX = Math.log(model.points[minIdx].totalElements); + const clampLogY = Math.log(Math.max(1e-6, minMs)); + + // Clamp left-side points to the minimum value (mutate in place) + for (let i = 0; i < minIdx; i++) { + model.points[i].medianMs = minMs; + } + + // Fit using only points from minIdx onward (right side of the minimum) + const fitPoints = model.points.slice(minIdx); + const xs = fitPoints.map(p => Math.log(p.totalElements)); + const ys = fitPoints.map(p => Math.log(Math.max(1e-6, p.medianMs))); + + model.coeffs = (fitPoints.length >= 2) ? polyFit(xs, ys, degree) : null; + model.clampBelowLogX = (minIdx > 0) ? clampLogX : null; + model.clampLogY = (minIdx > 0) ? clampLogY : null; + model.fitDegree = degree; + model.fittedAt = Date.now(); + } + + /** + * Predict execution time (ms) for an operator at a given input size. + * + * @param {string} opType + * @param {number} totalElements - Total elements of primary input tensor + * @returns {number} Predicted time in ms + */ + predict(opType, totalElements) { + const model = this.models[opType]; + if (!model) { + return 0.1; + } + + if (model.coeffs) { + const logX = Math.log(Math.max(1, totalElements)); + + // Left-side clamp: if input is at or below the minimum-time point, + // return the clamped floor value instead of extrapolating + if (model.clampBelowLogX != null && logX <= model.clampBelowLogX) { + return Math.max(0.001, Math.exp(model.clampLogY)); + } + + const logPredicted = polyEval(model.coeffs, logX); + return Math.max(0.001, Math.exp(logPredicted)); + } + + return this._linearInterpolate(model.points, totalElements); + } + + /** + * Piecewise linear interpolation fallback. + */ + _linearInterpolate(points, totalElements) { + if (!points || points.length === 0) return 0.1; + if (points.length === 1) return points[0].medianMs; + + if (totalElements <= points[0].totalElements) return points[0].medianMs; + if (totalElements >= points[points.length - 1].totalElements) { + return points[points.length - 1].medianMs; + } + + for (let i = 1; i < points.length; i++) { + if (points[i].totalElements >= totalElements) { + const lo = points[i - 1]; + const hi = points[i]; + const t = (totalElements - lo.totalElements) / (hi.totalElements - lo.totalElements); + return lo.medianMs + t * (hi.medianMs - lo.medianMs); + } + } + return points[points.length - 1].medianMs; + } + + /** + * Get the model for an op (points + fitted coefficients). + */ + getModel(opType) { + return this.models[opType] || null; + } + + /** + * Get all models. + */ + getAllModels() { + return { ...this.models }; + } + + /** + * Check if benchmark data exists. + */ + hasBenchmarkData() { + return Object.keys(this.models).length > 0 && + Object.values(this.models).some(m => m.points && m.points.length > 0); + } + + /** + * Reset all models. + */ + resetToDefaults() { + this.models = {}; + this._saveToLocalStorage(); + } +} + +export const timeModelDatabase = new TimeModelDatabase(); diff --git a/daop-illustration/src/qos/microbench/bench-runner.js b/daop-illustration/src/qos/microbench/bench-runner.js new file mode 100644 index 0000000..c98b513 --- /dev/null +++ b/daop-illustration/src/qos/microbench/bench-runner.js @@ -0,0 +1,264 @@ +// src/qos/microbench/bench-runner.js + +import { getNativeML, getNativeMLGraphBuilder } from "../../polyfill.js"; +import { OP_BENCH_CONFIGS } from "./op-benchmarks.js"; +import { timeModelDatabase } from "../interpolation/time-model.js"; + +function fillRandomBuffer(uint8) { + const CHUNK = 65536; + for (let offset = 0; offset < uint8.length; offset += CHUNK) { + const end = Math.min(offset + CHUNK, uint8.length); + crypto.getRandomValues(uint8.subarray(offset, end)); + } +} + +export class BenchRunner { + constructor(options = {}) { + this.warmupIterations = options.warmupIterations || 5; + this.measureIterations = options.measureIterations || 30; + this.deviceType = options.deviceType || "gpu"; + /** @type {number} Measured dispatch+readTensor overhead in ms (set by _measureBaselineOverhead) */ + this._baselineOverheadMs = 0; + } + + /** + * Measure the fixed dispatch + readTensor overhead using a trivial graph. + * Builds a tiny reshape [1,1,1,1] → [1,1,1,1], runs the same warmup + + * batched-measurement loop used for real operators, and returns the median + * per-dispatch time. This captures GPU command-submission and sync cost + * with negligible compute, so we can subtract it from real measurements. + * + * Runs multiple independent rounds and returns the median-of-medians for + * higher accuracy, since this value is subtracted from every operator + * measurement. + */ + async _measureBaselineOverhead(onProgress) { + if (onProgress) onProgress({ phase: "baseline", label: "measuring dispatch overhead" }); + + const nativeML = getNativeML(); + const NativeBuilder = getNativeMLGraphBuilder(); + + const rounds = 3; + const roundMedians = []; + + for (let r = 0; r < rounds; r++) { + const context = await nativeML.createContext({ deviceType: this.deviceType }); + const builder = new NativeBuilder(context); + + const shape = [1, 1, 1, 1]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.reshape(input, shape); + const graph = await builder.build({ output }); + + const inputTensor = await context.createTensor({ + dataType: "float32", + shape, + writable: true, + readable: false, + }); + const outputTensor = await context.createTensor({ + dataType: "float32", + shape, + writable: false, + readable: true, + }); + + const inputData = new Float32Array(1); + inputData[0] = 1.0; + context.writeTensor(inputTensor, inputData); + + // Warmup + for (let i = 0; i < this.warmupIterations; i++) { + context.dispatch(graph, { input: inputTensor }, { output: outputTensor }); + await context.readTensor(outputTensor); + } + + // Measurement — same batched loop as _benchmarkSizeVariant + const batchSize = 10; + const batchTimes = []; + for (let i = 0; i < 50; i++) { + const batchStart = performance.now(); + for (let j = 0; j < batchSize; j++) { + context.dispatch(graph, { input: inputTensor }, { output: outputTensor }); + } + await context.readTensor(outputTensor); + const batchEnd = performance.now(); + batchTimes.push((batchEnd - batchStart) / batchSize); + } + + inputTensor.destroy(); + outputTensor.destroy(); + + batchTimes.sort((a, b) => a - b); + roundMedians.push(batchTimes[Math.floor(batchTimes.length / 2)]); + } + + // Median-of-medians across rounds + roundMedians.sort((a, b) => a - b); + return roundMedians[Math.floor(roundMedians.length / 2)]; + } + + /** + * Benchmark a single size variant of an operator. + * Builds the graph, warms up, measures, and returns timing statistics. + */ + async _benchmarkSizeVariant(opType, sizeConfig, onProgress) { + const nativeML = getNativeML(); + const context = await nativeML.createContext({ deviceType: this.deviceType }); + const NativeBuilder = getNativeMLGraphBuilder(); + const builder = new NativeBuilder(context); + + if (onProgress) onProgress({ phase: "building", opType, label: sizeConfig.label }); + + const { outputs, inputName, inputShape, flops, bytes, outputShape, totalElements } = sizeConfig.buildGraph(builder); + const graph = await builder.build(outputs); + + const inputTensor = await context.createTensor({ + dataType: "float32", + shape: inputShape, + writable: true, + readable: false, + }); + const outputTensor = await context.createTensor({ + dataType: "float32", + shape: outputShape, + writable: false, + readable: true, + }); + + const inputData = new Float32Array(inputShape.reduce((a, b) => a * b, 1)); + fillRandomBuffer(new Uint8Array(inputData.buffer)); + context.writeTensor(inputTensor, inputData); + + if (onProgress) onProgress({ phase: "warmup", opType, label: sizeConfig.label }); + for (let i = 0; i < this.warmupIterations; i++) { + context.dispatch(graph, { [inputName]: inputTensor }, { output: outputTensor }); + await context.readTensor(outputTensor); + } + + if (onProgress) onProgress({ phase: "measuring", opType, label: sizeConfig.label }); + const batchSize = 10; + const batchTimes = []; + for (let i = 0; i < this.measureIterations; i++) { + const batchStart = performance.now(); + for (let j = 0; j < batchSize; j++) { + context.dispatch(graph, { [inputName]: inputTensor }, { output: outputTensor }); + } + await context.readTensor(outputTensor); + const batchEnd = performance.now(); + batchTimes.push((batchEnd - batchStart) / batchSize); + } + + inputTensor.destroy(); + outputTensor.destroy(); + + batchTimes.sort((a, b) => a - b); + const medianMs = batchTimes[Math.floor(batchTimes.length / 2)]; + const p90Ms = batchTimes[Math.floor(batchTimes.length * 0.9)]; + const meanMs = batchTimes.reduce((a, b) => a + b, 0) / batchTimes.length; + const minMs = batchTimes[0]; + + // Compute throughput parameters from the overhead-corrected median time. + const correctedMedianMs = Math.max(0.001, medianMs - this._baselineOverheadMs); + const correctedP90Ms = Math.max(0.001, p90Ms - this._baselineOverheadMs); + const correctedMeanMs = Math.max(0.001, meanMs - this._baselineOverheadMs); + const correctedMinMs = Math.max(0.001, minMs - this._baselineOverheadMs); + + const gflops = (flops / 1e9) / (correctedMedianMs / 1000); + const bandwidthGBs = (bytes / 1e9) / (correctedMedianMs / 1000); + + return { + opType, + label: sizeConfig.label, + totalElements, + medianMs: correctedMedianMs, + p90Ms: correctedP90Ms, + meanMs: correctedMeanMs, + minMs: correctedMinMs, + overheadMs: this._baselineOverheadMs, + gflops, + bandwidthGBs, + arithmeticIntensity: flops / bytes, + flops, + bytes, + }; + } + + /** + * Benchmark a single operator across all its size variants. + */ + async benchmarkOp(opType, onProgress) { + const config = OP_BENCH_CONFIGS[opType]; + if (!config) { + throw new Error(`[DAOP Bench] No benchmark config for op: ${opType}`); + } + + const sizeResults = []; + for (const sizeConfig of config.sizes) { + const result = await this._benchmarkSizeVariant(opType, sizeConfig, onProgress); + sizeResults.push(result); + + await new Promise(r => setTimeout(r, 30)); + } + + // Store in TimeModelDatabase for interpolation estimator + for (const result of sizeResults) { + timeModelDatabase.addDataPoint(opType, { + totalElements: result.totalElements, + medianMs: result.medianMs, + minMs: result.minMs, + p90Ms: result.p90Ms, + label: result.label, + }); + } + + if (onProgress) onProgress({ phase: "done", opType, result: sizeResults }); + + return sizeResults; + } + + async benchmarkAll(onProgress) { + const results = []; + const ops = Object.keys(OP_BENCH_CONFIGS); + + // Measure baseline dispatch+readTensor overhead before benchmarking operators + this._baselineOverheadMs = await this._measureBaselineOverhead(onProgress); + if (onProgress) { + onProgress({ + phase: "baseline-done", + baselineMs: this._baselineOverheadMs, + }); + } + + for (let i = 0; i < ops.length; i++) { + const opType = ops[i]; + if (onProgress) { + onProgress({ + phase: "start", + opType, + index: i, + total: ops.length, + }); + } + + try { + const sizeResults = await this.benchmarkOp(opType, onProgress); + results.push({ opType, sizes: sizeResults }); + } catch (err) { + console.error(`[DAOP Bench] Failed to benchmark ${opType}:`, err); + results.push({ opType, error: err.message }); + } + + await new Promise(r => setTimeout(r, 50)); + } + + // Fit polynomial curves for the interpolation estimator + timeModelDatabase.fitAll(); + + return results; + } + + getAvailableOps() { + return Object.keys(OP_BENCH_CONFIGS); + } +} diff --git a/daop-illustration/src/qos/microbench/op-benchmarks.js b/daop-illustration/src/qos/microbench/op-benchmarks.js new file mode 100644 index 0000000..53d46ab --- /dev/null +++ b/daop-illustration/src/qos/microbench/op-benchmarks.js @@ -0,0 +1,577 @@ +// src/daop/qos/microbench/op-benchmarks.js + +function fillRandom(uint8) { + const CHUNK = 65536; + for (let offset = 0; offset < uint8.length; offset += CHUNK) { + const end = Math.min(offset + CHUNK, uint8.length); + crypto.getRandomValues(uint8.subarray(offset, end)); + } +} + +/** + * Each op benchmark defines multiple size variants (small/medium/large). + * The bench runner tests all variants and stores per-size timing data, + * enabling interpolation for accurate estimation on arbitrary shapes. + * + * Every buildGraph() must return { outputs, inputName, inputShape, flops, + * bytes, outputShape, totalElements }. + */ +export const OP_BENCH_CONFIGS = { + conv2d: { + opType: "conv2d", + sizes: [ + { + label: "xs", + buildGraph(builder) { + const inputShape = [1, 16, 16, 8]; + const filterShape = [16, 3, 3, 8]; + const input = builder.input("input", { dataType: "float32", shape: inputShape }); + const filterData = new Float32Array(16 * 3 * 3 * 8); + fillRandom(new Uint8Array(filterData.buffer)); + const filter = builder.constant({ dataType: "float32", shape: filterShape }, filterData); + const output = builder.conv2d(input, filter, { inputLayout: "nhwc", filterLayout: "ohwi" }); + const outH = 14, outW = 14; + const flops = 2 * outH * outW * 16 * 8 * 3 * 3; + const totalElements = 1 * 16 * 16 * 8; + const bytes = (1*16*16*8 + 16*3*3*8 + 1*outH*outW*16) * 4; + return { outputs: { output }, inputName: "input", inputShape, flops, bytes, outputShape: [1, outH, outW, 16], totalElements }; + }, + }, + { + label: "small", + buildGraph(builder) { + const inputShape = [1, 32, 32, 16]; + const filterShape = [32, 3, 3, 16]; + const input = builder.input("input", { dataType: "float32", shape: inputShape }); + const filterData = new Float32Array(32 * 3 * 3 * 16); + fillRandom(new Uint8Array(filterData.buffer)); + const filter = builder.constant({ dataType: "float32", shape: filterShape }, filterData); + const output = builder.conv2d(input, filter, { inputLayout: "nhwc", filterLayout: "ohwi" }); + const outH = 30, outW = 30; + const flops = 2 * outH * outW * 32 * 16 * 3 * 3; + const totalElements = 1 * 32 * 32 * 16; + const bytes = (1*32*32*16 + 32*3*3*16 + 1*outH*outW*32) * 4; + return { outputs: { output }, inputName: "input", inputShape, flops, bytes, outputShape: [1, outH, outW, 32], totalElements }; + }, + }, + { + label: "medium", + buildGraph(builder) { + const inputShape = [1, 64, 64, 24]; + const filterShape = [48, 3, 3, 24]; + const input = builder.input("input", { dataType: "float32", shape: inputShape }); + const filterData = new Float32Array(48 * 3 * 3 * 24); + fillRandom(new Uint8Array(filterData.buffer)); + const filter = builder.constant({ dataType: "float32", shape: filterShape }, filterData); + const output = builder.conv2d(input, filter, { inputLayout: "nhwc", filterLayout: "ohwi" }); + const outH = 62, outW = 62; + const flops = 2 * outH * outW * 48 * 24 * 3 * 3; + const totalElements = 1 * 64 * 64 * 24; + const bytes = (1*64*64*24 + 48*3*3*24 + 1*outH*outW*48) * 4; + return { outputs: { output }, inputName: "input", inputShape, flops, bytes, outputShape: [1, outH, outW, 48], totalElements }; + }, + }, + { + label: "large", + buildGraph(builder) { + const inputShape = [1, 128, 128, 32]; + const filterShape = [64, 3, 3, 32]; + const input = builder.input("input", { dataType: "float32", shape: inputShape }); + const filterData = new Float32Array(64 * 3 * 3 * 32); + fillRandom(new Uint8Array(filterData.buffer)); + const filter = builder.constant({ dataType: "float32", shape: filterShape }, filterData); + const output = builder.conv2d(input, filter, { inputLayout: "nhwc", filterLayout: "ohwi" }); + const outH = 126, outW = 126; + const flops = 2 * outH * outW * 64 * 32 * 3 * 3; + const totalElements = 1 * 128 * 128 * 32; + const bytes = (1*128*128*32 + 64*3*3*32 + 1*outH*outW*64) * 4; + return { outputs: { output }, inputName: "input", inputShape, flops, bytes, outputShape: [1, outH, outW, 64], totalElements }; + }, + }, + { + label: "xl", + buildGraph(builder) { + const inputShape = [1, 256, 256, 32]; + const filterShape = [64, 3, 3, 32]; + const input = builder.input("input", { dataType: "float32", shape: inputShape }); + const filterData = new Float32Array(64 * 3 * 3 * 32); + fillRandom(new Uint8Array(filterData.buffer)); + const filter = builder.constant({ dataType: "float32", shape: filterShape }, filterData); + const output = builder.conv2d(input, filter, { inputLayout: "nhwc", filterLayout: "ohwi" }); + const outH = 254, outW = 254; + const flops = 2 * outH * outW * 64 * 32 * 3 * 3; + const totalElements = 1 * 256 * 256 * 32; + const bytes = (1*256*256*32 + 64*3*3*32 + 1*outH*outW*64) * 4; + return { outputs: { output }, inputName: "input", inputShape, flops, bytes, outputShape: [1, outH, outW, 64], totalElements }; + }, + }, + { + label: "xxl", + buildGraph(builder) { + const inputShape = [1, 512, 512, 32]; + const filterShape = [64, 3, 3, 32]; + const input = builder.input("input", { dataType: "float32", shape: inputShape }); + const filterData = new Float32Array(64 * 3 * 3 * 32); + fillRandom(new Uint8Array(filterData.buffer)); + const filter = builder.constant({ dataType: "float32", shape: filterShape }, filterData); + const output = builder.conv2d(input, filter, { inputLayout: "nhwc", filterLayout: "ohwi" }); + const outH = 510, outW = 510; + const flops = 2 * outH * outW * 64 * 32 * 3 * 3; + const totalElements = 1 * 512 * 512 * 32; + const bytes = (1*512*512*32 + 64*3*3*32 + 1*outH*outW*64) * 4; + return { outputs: { output }, inputName: "input", inputShape, flops, bytes, outputShape: [1, outH, outW, 64], totalElements }; + }, + }, + ], + }, + + add: { + opType: "add", + sizes: [ + { + label: "xs", + buildGraph(builder) { + const shape = [1, 16, 16, 8]; + const a = builder.input("input", { dataType: "float32", shape }); + const bData = new Float32Array(1 * 16 * 16 * 8); + fillRandom(new Uint8Array(bData.buffer)); + const b = builder.constant({ dataType: "float32", shape }, bData); + const output = builder.add(a, b); + const elements = 1 * 16 * 16 * 8; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "small", + buildGraph(builder) { + const shape = [1, 32, 32, 16]; + const a = builder.input("input", { dataType: "float32", shape }); + const bData = new Float32Array(1 * 32 * 32 * 16); + fillRandom(new Uint8Array(bData.buffer)); + const b = builder.constant({ dataType: "float32", shape }, bData); + const output = builder.add(a, b); + const elements = 1 * 32 * 32 * 16; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "medium", + buildGraph(builder) { + const shape = [1, 64, 64, 32]; + const a = builder.input("input", { dataType: "float32", shape }); + const bData = new Float32Array(1 * 64 * 64 * 32); + fillRandom(new Uint8Array(bData.buffer)); + const b = builder.constant({ dataType: "float32", shape }, bData); + const output = builder.add(a, b); + const elements = 1 * 64 * 64 * 32; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "large", + buildGraph(builder) { + const shape = [1, 128, 128, 64]; + const a = builder.input("input", { dataType: "float32", shape }); + const bData = new Float32Array(1 * 128 * 128 * 64); + fillRandom(new Uint8Array(bData.buffer)); + const b = builder.constant({ dataType: "float32", shape }, bData); + const output = builder.add(a, b); + const elements = 1 * 128 * 128 * 64; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "xl", + buildGraph(builder) { + const shape = [1, 256, 256, 32]; + const a = builder.input("input", { dataType: "float32", shape }); + const bData = new Float32Array(1 * 256 * 256 * 32); + fillRandom(new Uint8Array(bData.buffer)); + const b = builder.constant({ dataType: "float32", shape }, bData); + const output = builder.add(a, b); + const elements = 1 * 256 * 256 * 32; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "xxl", + buildGraph(builder) { + const shape = [1, 512, 512, 32]; + const a = builder.input("input", { dataType: "float32", shape }); + const bData = new Float32Array(1 * 512 * 512 * 32); + fillRandom(new Uint8Array(bData.buffer)); + const b = builder.constant({ dataType: "float32", shape }, bData); + const output = builder.add(a, b); + const elements = 1 * 512 * 512 * 32; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements }; + }, + }, + ], + }, + + mul: { + opType: "mul", + sizes: [ + { + label: "xs", + buildGraph(builder) { + const shape = [1, 16, 16, 8]; + const a = builder.input("input", { dataType: "float32", shape }); + const bData = new Float32Array(1 * 16 * 16 * 8); + fillRandom(new Uint8Array(bData.buffer)); + const b = builder.constant({ dataType: "float32", shape }, bData); + const output = builder.mul(a, b); + const elements = 1 * 16 * 16 * 8; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "small", + buildGraph(builder) { + const shape = [1, 32, 32, 16]; + const a = builder.input("input", { dataType: "float32", shape }); + const bData = new Float32Array(1 * 32 * 32 * 16); + fillRandom(new Uint8Array(bData.buffer)); + const b = builder.constant({ dataType: "float32", shape }, bData); + const output = builder.mul(a, b); + const elements = 1 * 32 * 32 * 16; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "medium", + buildGraph(builder) { + const shape = [1, 64, 64, 32]; + const a = builder.input("input", { dataType: "float32", shape }); + const bData = new Float32Array(1 * 64 * 64 * 32); + fillRandom(new Uint8Array(bData.buffer)); + const b = builder.constant({ dataType: "float32", shape }, bData); + const output = builder.mul(a, b); + const elements = 1 * 64 * 64 * 32; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "large", + buildGraph(builder) { + const shape = [1, 128, 128, 64]; + const a = builder.input("input", { dataType: "float32", shape }); + const bData = new Float32Array(1 * 128 * 128 * 64); + fillRandom(new Uint8Array(bData.buffer)); + const b = builder.constant({ dataType: "float32", shape }, bData); + const output = builder.mul(a, b); + const elements = 1 * 128 * 128 * 64; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "xl", + buildGraph(builder) { + const shape = [1, 256, 256, 32]; + const a = builder.input("input", { dataType: "float32", shape }); + const bData = new Float32Array(1 * 256 * 256 * 32); + fillRandom(new Uint8Array(bData.buffer)); + const b = builder.constant({ dataType: "float32", shape }, bData); + const output = builder.mul(a, b); + const elements = 1 * 256 * 256 * 32; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "xxl", + buildGraph(builder) { + const shape = [1, 512, 512, 32]; + const a = builder.input("input", { dataType: "float32", shape }); + const bData = new Float32Array(1 * 512 * 512 * 32); + fillRandom(new Uint8Array(bData.buffer)); + const b = builder.constant({ dataType: "float32", shape }, bData); + const output = builder.mul(a, b); + const elements = 1 * 512 * 512 * 32; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 3 * 4, outputShape: shape, totalElements: elements }; + }, + }, + ], + }, + + relu: { + opType: "relu", + sizes: [ + { + label: "xs", + buildGraph(builder) { + const shape = [1, 16, 16, 8]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.relu(input); + const elements = 1 * 16 * 16 * 8; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "small", + buildGraph(builder) { + const shape = [1, 32, 32, 16]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.relu(input); + const elements = 1 * 32 * 32 * 16; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "medium", + buildGraph(builder) { + const shape = [1, 64, 64, 32]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.relu(input); + const elements = 1 * 64 * 64 * 32; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "large", + buildGraph(builder) { + const shape = [1, 128, 128, 64]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.relu(input); + const elements = 1 * 128 * 128 * 64; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "xl", + buildGraph(builder) { + const shape = [1, 256, 256, 32]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.relu(input); + const elements = 1 * 256 * 256 * 32; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "xxl", + buildGraph(builder) { + const shape = [1, 512, 512, 32]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.relu(input); + const elements = 1 * 512 * 512 * 32; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements }; + }, + }, + ], + }, + + sigmoid: { + opType: "sigmoid", + sizes: [ + { + label: "xs", + buildGraph(builder) { + const shape = [1, 16, 16, 8]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.sigmoid(input); + const elements = 1 * 16 * 16 * 8; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 4, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "small", + buildGraph(builder) { + const shape = [1, 32, 32, 16]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.sigmoid(input); + const elements = 1 * 32 * 32 * 16; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 4, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "medium", + buildGraph(builder) { + const shape = [1, 64, 64, 32]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.sigmoid(input); + const elements = 1 * 64 * 64 * 32; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 4, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "large", + buildGraph(builder) { + const shape = [1, 128, 128, 64]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.sigmoid(input); + const elements = 1 * 128 * 128 * 64; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 4, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "xl", + buildGraph(builder) { + const shape = [1, 256, 256, 32]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.sigmoid(input); + const elements = 1 * 256 * 256 * 32; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 4, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "xxl", + buildGraph(builder) { + const shape = [1, 512, 512, 32]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.sigmoid(input); + const elements = 1 * 512 * 512 * 32; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 4, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements }; + }, + }, + ], + }, + + clamp: { + opType: "clamp", + sizes: [ + { + label: "xs", + buildGraph(builder) { + const shape = [1, 16, 16, 8]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.clamp(input, { minValue: 0, maxValue: 6 }); + const elements = 1 * 16 * 16 * 8; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 2, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "small", + buildGraph(builder) { + const shape = [1, 32, 32, 16]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.clamp(input, { minValue: 0, maxValue: 6 }); + const elements = 1 * 32 * 32 * 16; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 2, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "medium", + buildGraph(builder) { + const shape = [1, 64, 64, 32]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.clamp(input, { minValue: 0, maxValue: 6 }); + const elements = 1 * 64 * 64 * 32; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 2, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "large", + buildGraph(builder) { + const shape = [1, 128, 128, 64]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.clamp(input, { minValue: 0, maxValue: 6 }); + const elements = 1 * 128 * 128 * 64; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 2, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "xl", + buildGraph(builder) { + const shape = [1, 256, 256, 32]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.clamp(input, { minValue: 0, maxValue: 6 }); + const elements = 1 * 256 * 256 * 32; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 2, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements }; + }, + }, + { + label: "xxl", + buildGraph(builder) { + const shape = [1, 512, 512, 32]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.clamp(input, { minValue: 0, maxValue: 6 }); + const elements = 1 * 512 * 512 * 32; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: elements * 2, bytes: elements * 2 * 4, outputShape: shape, totalElements: elements }; + }, + }, + ], + }, + + averagePool2d: { + opType: "averagePool2d", + sizes: [ + { + label: "xs", + buildGraph(builder) { + const shape = [1, 16, 16, 8]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.averagePool2d(input, { + windowDimensions: [3, 3], strides: [2, 2], padding: [1, 1, 1, 1], layout: "nhwc", + }); + const outElements = 1 * 8 * 8 * 8; + const inElements = 1 * 16 * 16 * 8; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: outElements * 3 * 3, bytes: (inElements + outElements) * 4, outputShape: [1, 8, 8, 8], totalElements: inElements }; + }, + }, + { + label: "small", + buildGraph(builder) { + const shape = [1, 32, 32, 16]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.averagePool2d(input, { + windowDimensions: [3, 3], strides: [2, 2], padding: [1, 1, 1, 1], layout: "nhwc", + }); + const outElements = 1 * 16 * 16 * 16; + const inElements = 1 * 32 * 32 * 16; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: outElements * 3 * 3, bytes: (inElements + outElements) * 4, outputShape: [1, 16, 16, 16], totalElements: inElements }; + }, + }, + { + label: "medium", + buildGraph(builder) { + const shape = [1, 64, 64, 32]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.averagePool2d(input, { + windowDimensions: [3, 3], strides: [2, 2], padding: [1, 1, 1, 1], layout: "nhwc", + }); + const outElements = 1 * 32 * 32 * 32; + const inElements = 1 * 64 * 64 * 32; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: outElements * 3 * 3, bytes: (inElements + outElements) * 4, outputShape: [1, 32, 32, 32], totalElements: inElements }; + }, + }, + { + label: "large", + buildGraph(builder) { + const shape = [1, 128, 128, 64]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.averagePool2d(input, { + windowDimensions: [3, 3], strides: [2, 2], padding: [1, 1, 1, 1], layout: "nhwc", + }); + const outElements = 1 * 64 * 64 * 64; + const inElements = 1 * 128 * 128 * 64; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: outElements * 3 * 3, bytes: (inElements + outElements) * 4, outputShape: [1, 64, 64, 64], totalElements: inElements }; + }, + }, + { + label: "xl", + buildGraph(builder) { + const shape = [1, 256, 256, 32]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.averagePool2d(input, { + windowDimensions: [3, 3], strides: [2, 2], padding: [1, 1, 1, 1], layout: "nhwc", + }); + const outElements = 1 * 128 * 128 * 32; + const inElements = 1 * 256 * 256 * 32; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: outElements * 3 * 3, bytes: (inElements + outElements) * 4, outputShape: [1, 128, 128, 32], totalElements: inElements }; + }, + }, + { + label: "xxl", + buildGraph(builder) { + const shape = [1, 512, 512, 32]; + const input = builder.input("input", { dataType: "float32", shape }); + const output = builder.averagePool2d(input, { + windowDimensions: [3, 3], strides: [2, 2], padding: [1, 1, 1, 1], layout: "nhwc", + }); + const outElements = 1 * 256 * 256 * 32; + const inElements = 1 * 512 * 512 * 32; + return { outputs: { output }, inputName: "input", inputShape: shape, flops: outElements * 3 * 3, bytes: (inElements + outElements) * 4, outputShape: [1, 256, 256, 32], totalElements: inElements }; + }, + }, + ], + }, +};