From 2c0af7d18f60865d9b1c5ae7158f57fce439385b Mon Sep 17 00:00:00 2001
From: rahulc7 <rahulc7@meta.com>
Date: Tue, 30 Dec 2025 15:01:34 -0800
Subject: [PATCH 1/3] Adding Tests for CadenceDefaultQuantizer

Differential Revision: D88899457
---
 .../cadence/aot/tests/test_quantizer_ops.py   | 101 +++++++++++++++++-
 1 file changed, 100 insertions(+), 1 deletion(-)

diff --git a/backends/cadence/aot/tests/test_quantizer_ops.py b/backends/cadence/aot/tests/test_quantizer_ops.py
index 19a68f9b108..fa53c1818f5 100644
--- a/backends/cadence/aot/tests/test_quantizer_ops.py
+++ b/backends/cadence/aot/tests/test_quantizer_ops.py
@@ -33,6 +33,7 @@
     CadenceWithSoftmaxQuantizer,
     qconfig_A16,
     qconfig_A8W8,
+    qconfig_A8W8sym,
 )
 from executorch.exir.pass_base import NodeMetadata
 from parameterized import parameterized
@@ -53,7 +54,6 @@
 # Quantizers intentionally excluded from annotation testing.
 # These should be explicitly justified when added.
 EXCLUDED_FROM_ANNOTATION_TESTING: set[type[CadenceQuantizer]] = {
-    CadenceDefaultQuantizer,  # TODO: T247438143 Add test coverage
     CadenceFusedConvReluQuantizer,  # TODO: T247438151 Add test coverage
     CadenceNopQuantizer,  # No-op quantizer, doesn't annotate anything
     CadenceW8A32MixedQuantizer,  # TODO: T247438158 Add test coverage
@@ -137,6 +137,61 @@
         # For add: both inputs are activations
         [qconfig_A8W8.input_activation, qconfig_A8W8.input_activation],
     ),
+    # CadenceDefaultQuantizer test cases
+    (
+        "default_matmul_A8W8",
+        lambda self: self._build_matmul_graph(),
+        CadenceDefaultQuantizer(),
+        torch.ops.aten.matmul.default,
+        qconfig_A8W8.output_activation,
+        # For matmul: both inputs are activations
+        [qconfig_A8W8.input_activation, qconfig_A8W8.input_activation],
+    ),
+    (
+        "default_linear_A8W8",
+        lambda self: self._build_linear_graph(),
+        CadenceDefaultQuantizer(),
+        torch.ops.aten.linear.default,
+        qconfig_A8W8.output_activation,
+        # For linear: [input_activation, weight]
+        [qconfig_A8W8.input_activation, qconfig_A8W8.weight],
+    ),
+    (
+        "default_conv1d_A8W8sym",
+        lambda self: self._build_conv1d_graph(),
+        CadenceDefaultQuantizer(),
+        torch.ops.aten.conv1d.default,
+        qconfig_A8W8sym.output_activation,
+        # For conv1d: [input_activation, weight] with symmetric weights
+        [qconfig_A8W8sym.input_activation, qconfig_A8W8sym.weight],
+    ),
+    (
+        "default_conv2d_A8W8sym",
+        lambda self: self._build_conv2d_graph(),
+        CadenceDefaultQuantizer(),
+        torch.ops.aten.conv2d.default,
+        qconfig_A8W8sym.output_activation,
+        # For conv2d: [input_activation, weight] with symmetric weights
+        [qconfig_A8W8sym.input_activation, qconfig_A8W8sym.weight],
+    ),
+    (
+        "default_bmm_A8W8",
+        lambda self: self._build_bmm_graph(),
+        CadenceDefaultQuantizer(),
+        torch.ops.aten.bmm.default,
+        qconfig_A8W8.output_activation,
+        # For bmm: both inputs are activations
+        [qconfig_A8W8.input_activation, qconfig_A8W8.input_activation],
+    ),
+    (
+        "default_relu_A8W8",
+        lambda self: self._build_relu_graph(),
+        CadenceDefaultQuantizer(),
+        torch.ops.aten.relu.default,
+        qconfig_A8W8.output_activation,
+        # For relu: only input_activation
+        [qconfig_A8W8.input_activation],
+    ),
 ]
 
 # Derive the set of tested quantizer classes from the test cases.
@@ -309,6 +364,50 @@ def _build_add_graph(self) -> tuple[torch.fx.GraphModule, torch.fx.Node]:
         self.assertEqual(len(add_nodes), 1, "Should find exactly one add node")
         return gm, add_nodes[0]
 
+    def _build_bmm_graph(self) -> tuple[torch.fx.GraphModule, torch.fx.Node]:
+        """Build a simple graph with a bmm (batch matrix multiply) operation."""
+        builder = GraphBuilder()
+        # BMM requires 3D tensors: (batch, n, m) @ (batch, m, p) -> (batch, n, p)
+        x = builder.placeholder("x", torch.randn(2, 4, 8))
+        y = builder.placeholder("y", torch.randn(2, 8, 4))
+        bmm = builder.call_operator(
+            op=torch.ops.aten.bmm.default,
+            args=(x, y),
+            meta=NodeMetadata(
+                {"source_fn_stack": [("bmm", torch.ops.aten.bmm.default)]}
+            ),
+        )
+        builder.output([bmm])
+        gm = builder.get_graph_module()
+
+        bmm_nodes = gm.graph.find_nodes(
+            op="call_function",
+            target=torch.ops.aten.bmm.default,
+        )
+        self.assertEqual(len(bmm_nodes), 1, "Should find exactly one bmm node")
+        return gm, bmm_nodes[0]
+
+    def _build_relu_graph(self) -> tuple[torch.fx.GraphModule, torch.fx.Node]:
+        """Build a simple graph with a relu operation."""
+        builder = GraphBuilder()
+        x = builder.placeholder("x", torch.randn(1, 10))
+        relu = builder.call_operator(
+            op=torch.ops.aten.relu.default,
+            args=(x,),
+            meta=NodeMetadata(
+                {"source_fn_stack": [("relu", torch.ops.aten.relu.default)]}
+            ),
+        )
+        builder.output([relu])
+        gm = builder.get_graph_module()
+
+        relu_nodes = gm.graph.find_nodes(
+            op="call_function",
+            target=torch.ops.aten.relu.default,
+        )
+        self.assertEqual(len(relu_nodes), 1, "Should find exactly one relu node")
+        return gm, relu_nodes[0]
+
     @parameterized.expand(QUANTIZER_ANNOTATION_TEST_CASES)
     def test_quantizer_annotation(
         self,

From 636930a55206f422024b725f779ed27a947fe756 Mon Sep 17 00:00:00 2001
From: rahulc7 <rahulc7@meta.com>
Date: Tue, 30 Dec 2025 15:01:34 -0800
Subject: [PATCH 2/3] Changing logic to deal with graphs with derived
 quantization spec

Differential Revision: D88955761
---
 .../cadence/aot/tests/test_quantizer_ops.py   | 71 ++++++++++++++-----
 1 file changed, 55 insertions(+), 16 deletions(-)

diff --git a/backends/cadence/aot/tests/test_quantizer_ops.py b/backends/cadence/aot/tests/test_quantizer_ops.py
index fa53c1818f5..66d61c5c5e3 100644
--- a/backends/cadence/aot/tests/test_quantizer_ops.py
+++ b/backends/cadence/aot/tests/test_quantizer_ops.py
@@ -64,6 +64,7 @@
 # Test case definitions for quantizer annotation tests.
 # Format: (name, graph_builder_fn, quantizer_instance, target_op, expected_output_qspec, expected_input_qspecs)
 # Adding a new quantizer test only requires adding a tuple to this list.
+# Note: Use None in expected_input_qspecs to skip comparison for that input (e.g., for DerivedQuantizationSpec).
 QUANTIZER_ANNOTATION_TEST_CASES: list[
     tuple[
         str,
@@ -71,7 +72,7 @@
         CadenceQuantizer,
         OpOverload,
         QuantizationSpec,
-        list[QuantizationSpec],
+        list[QuantizationSpec | None],
     ]
 ] = [
     (
@@ -192,6 +193,16 @@
         # For relu: only input_activation
         [qconfig_A8W8.input_activation],
     ),
+    (
+        "default_addmm_A8W8",
+        lambda self: self._build_addmm_graph(),
+        CadenceDefaultQuantizer(),
+        torch.ops.aten.addmm.default,
+        qconfig_A8W8.output_activation,
+        # For addmm: [bias (DerivedQuantizationSpec), mat1, mat2]
+        # Use None to skip comparison for bias since it's a DerivedQuantizationSpec
+        [None, qconfig_A8W8.input_activation, qconfig_A8W8.weight],
+    ),
 ]
 
 # Derive the set of tested quantizer classes from the test cases.
@@ -408,6 +419,31 @@ def _build_relu_graph(self) -> tuple[torch.fx.GraphModule, torch.fx.Node]:
         self.assertEqual(len(relu_nodes), 1, "Should find exactly one relu node")
         return gm, relu_nodes[0]
 
+    def _build_addmm_graph(self) -> tuple[torch.fx.GraphModule, torch.fx.Node]:
+        """Build a simple graph with an addmm operation."""
+        builder = GraphBuilder()
+        # addmm: bias + (mat1 @ mat2)
+        # args: (bias, mat1, mat2)
+        bias = builder.placeholder("bias", torch.randn(5))
+        mat1 = builder.placeholder("mat1", torch.randn(1, 10))
+        mat2 = builder.placeholder("mat2", torch.randn(10, 5))
+        addmm = builder.call_operator(
+            op=torch.ops.aten.addmm.default,
+            args=(bias, mat1, mat2),
+            meta=NodeMetadata(
+                {"source_fn_stack": [("addmm", torch.ops.aten.addmm.default)]}
+            ),
+        )
+        builder.output([addmm])
+        gm = builder.get_graph_module()
+
+        addmm_nodes = gm.graph.find_nodes(
+            op="call_function",
+            target=torch.ops.aten.addmm.default,
+        )
+        self.assertEqual(len(addmm_nodes), 1, "Should find exactly one addmm node")
+        return gm, addmm_nodes[0]
+
     @parameterized.expand(QUANTIZER_ANNOTATION_TEST_CASES)
     def test_quantizer_annotation(
         self,
@@ -416,7 +452,7 @@ def test_quantizer_annotation(
         quantizer: CadenceQuantizer,
         target: OpOverload,
         expected_output_qspec: QuantizationSpec,
-        expected_input_qspecs: list[QuantizationSpec],
+        expected_input_qspecs: list[QuantizationSpec | None],
     ) -> None:
         """Parameterized test for quantizer annotations."""
         gm, op_node = graph_builder_fn(self)
@@ -431,21 +467,24 @@ def test_quantizer_annotation(
 
         # Verify input annotations
         self.assertEqual(len(annotation.input_qspec_map), len(expected_input_qspecs))
-        for i, (input_node, input_qspec) in enumerate(
-            annotation.input_qspec_map.items()
-        ):
-            expected_arg = op_node.args[i]
-            assert isinstance(expected_arg, torch.fx.Node)
-            self.assertEqual(
-                input_node,
-                expected_arg,
-                f"Input node mismatch at index {i}",
-            )
-            self.assertEqual(
-                input_qspec,
-                expected_input_qspecs[i],
-                f"Input qspec mismatch at index {i}",
+        for input_node, input_qspec in annotation.input_qspec_map.items():
+            # Find the index of this input node in the op's args
+            arg_index = None
+            for i, arg in enumerate(op_node.args):
+                if arg is input_node:
+                    arg_index = i
+                    break
+            self.assertIsNotNone(
+                arg_index,
+                f"Input node {input_node} not found in op_node.args",
             )
+            # Skip comparison if expected qspec is None (e.g., for DerivedQuantizationSpec)
+            if expected_input_qspecs[arg_index] is not None:
+                self.assertEqual(
+                    input_qspec,
+                    expected_input_qspecs[arg_index],
+                    f"Input qspec mismatch at arg index {arg_index}",
+                )
 
     def test_all_quantizers_have_annotation_tests(self) -> None:
         """Ensure every CadenceQuantizer subclass is either tested or explicitly excluded."""

From 4217e25097cab42fb7365b4b19e90c5556c91abd Mon Sep 17 00:00:00 2001
From: Rahul Chandra <rahulc7@meta.com>
Date: Tue, 30 Dec 2025 15:10:19 -0800
Subject: [PATCH 3/3] Adding Tests for CadenceFusedConvReluQuantizer (#16358)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:
Pull Request resolved: https://github.com/pytorch/executorch/pull/16358

A fused pattern is when the quantizer recognizes a sequence of operations and treats as a single unit for quantization purposes. So for example, for a Conv2D + ReLU fusion, rather than having something like this:
```
  input → [quantize] → conv2d → [dequantize] → [quantize] → relu → [dequantize] → output
```
a fused pattern quantizes them together like so:

```
  input → [quantize] → conv2d → relu → [dequantize] → output
```

We need to make a few changes in our framework to test this.

# Change 1: We allow graph builders to return a 3rd element for fused patterns
 For fused patterns like conv+relu, the quantization annotations are split across two nodes:
  - Output annotation is on the relu node (the final output of the fused pattern)
  - Input annotations are on the conv node (where the quantized inputs enter)

 The existing graph builders return (gm, target_node), which works for single-op patterns where both annotations are on the same node. For fused patterns, we need to know both nodes, so graph builders can now optionally return (gm, output_node,  input_source_node).

# Change 2: We check annotations on the correct nodes for fused patterns

The test previously assumed output_qspec and input_qspec_map were both on the same node. For fused patterns, they're on different nodes:
  - output_qspec is checked on the output node (relu)
  - input_qspec_map is checked on the input source node (conv)

This change is backwards-compatible: for non-fused patterns, both nodes are the same.

Reviewed By: hsharma35

Differential Revision: D89630759
---
 .../cadence/aot/tests/test_quantizer_ops.py   | 104 +++++++++++++++---
 1 file changed, 90 insertions(+), 14 deletions(-)

diff --git a/backends/cadence/aot/tests/test_quantizer_ops.py b/backends/cadence/aot/tests/test_quantizer_ops.py
index 66d61c5c5e3..831ab3b95b6 100644
--- a/backends/cadence/aot/tests/test_quantizer_ops.py
+++ b/backends/cadence/aot/tests/test_quantizer_ops.py
@@ -46,15 +46,18 @@
 
 # Type alias for graph builder functions.
 # These functions take a test instance and return a graph module and the target op node.
+# For fused patterns (e.g., conv+relu), an optional third element specifies the node
+# whose args contain the quantized inputs (e.g., conv node for conv+relu fusion).
 GraphBuilderFn = Callable[
-    ["QuantizerAnnotationTest"], tuple[torch.fx.GraphModule, torch.fx.Node]
+    ["QuantizerAnnotationTest"],
+    tuple[torch.fx.GraphModule, torch.fx.Node]
+    | tuple[torch.fx.GraphModule, torch.fx.Node, torch.fx.Node],
 ]
 
 
 # Quantizers intentionally excluded from annotation testing.
 # These should be explicitly justified when added.
 EXCLUDED_FROM_ANNOTATION_TESTING: set[type[CadenceQuantizer]] = {
-    CadenceFusedConvReluQuantizer,  # TODO: T247438151 Add test coverage
     CadenceNopQuantizer,  # No-op quantizer, doesn't annotate anything
     CadenceW8A32MixedQuantizer,  # TODO: T247438158 Add test coverage
     CadenceRmsNormNopQuantizer,  # No-op quantizer, doesn't annotate anything, preserves rms_norm from decomposition
@@ -203,6 +206,16 @@
         # Use None to skip comparison for bias since it's a DerivedQuantizationSpec
         [None, qconfig_A8W8.input_activation, qconfig_A8W8.weight],
     ),
+    # CadenceFusedConvReluQuantizer test cases
+    (
+        "fused_conv2d_relu_A8W8sym",
+        lambda self: self._build_conv2d_relu_graph(),
+        CadenceFusedConvReluQuantizer(),
+        torch.ops.aten.relu.default,
+        qconfig_A8W8sym.output_activation,
+        # For fused conv2d+relu: [input_activation, weight] from conv2d node
+        [qconfig_A8W8sym.input_activation, qconfig_A8W8sym.weight],
+    ),
 ]
 
 # Derive the set of tested quantizer classes from the test cases.
@@ -444,6 +457,52 @@ def _build_addmm_graph(self) -> tuple[torch.fx.GraphModule, torch.fx.Node]:
         self.assertEqual(len(addmm_nodes), 1, "Should find exactly one addmm node")
         return gm, addmm_nodes[0]
 
+    def _build_conv2d_relu_graph(
+        self,
+    ) -> tuple[torch.fx.GraphModule, torch.fx.Node, torch.fx.Node]:
+        """Build a graph with a conv2d followed by relu (fused pattern).
+
+        Returns:
+            A tuple of (graph_module, relu_node, conv_node).
+            The relu_node is the target node where the annotation is placed.
+            The conv_node is the input source node whose args contain the quantized inputs.
+        """
+        builder = GraphBuilder()
+        # Input shape: (batch, in_channels, height, width)
+        x = builder.placeholder("x", torch.randn(1, 3, 8, 8))
+        # Weight shape: (out_channels, in_channels, kernel_h, kernel_w)
+        weight = builder.placeholder("weight", torch.randn(6, 3, 3, 3))
+        conv2d = builder.call_operator(
+            op=torch.ops.aten.conv2d.default,
+            args=(x, weight),
+            meta=NodeMetadata(
+                {"source_fn_stack": [("conv2d", torch.ops.aten.conv2d.default)]}
+            ),
+        )
+        relu = builder.call_operator(
+            op=torch.ops.aten.relu.default,
+            args=(conv2d,),
+            meta=NodeMetadata(
+                {"source_fn_stack": [("relu", torch.ops.aten.relu.default)]}
+            ),
+        )
+        builder.output([relu])
+        gm = builder.get_graph_module()
+
+        relu_nodes = gm.graph.find_nodes(
+            op="call_function",
+            target=torch.ops.aten.relu.default,
+        )
+        self.assertEqual(len(relu_nodes), 1, "Should find exactly one relu node")
+
+        conv2d_nodes = gm.graph.find_nodes(
+            op="call_function",
+            target=torch.ops.aten.conv2d.default,
+        )
+        self.assertEqual(len(conv2d_nodes), 1, "Should find exactly one conv2d node")
+
+        return gm, relu_nodes[0], conv2d_nodes[0]
+
     @parameterized.expand(QUANTIZER_ANNOTATION_TEST_CASES)
     def test_quantizer_annotation(
         self,
@@ -455,28 +514,45 @@ def test_quantizer_annotation(
         expected_input_qspecs: list[QuantizationSpec | None],
     ) -> None:
         """Parameterized test for quantizer annotations."""
-        gm, op_node = graph_builder_fn(self)
+        result = graph_builder_fn(self)
+        # Handle both 2-element and 3-element returns from graph builders.
+        # For fused patterns, the 3rd element specifies the node whose args
+        # contain the quantized inputs (e.g., conv node for conv+relu fusion).
+        if len(result) == 3:
+            gm = result[0]
+            output_node = result[1]
+            input_source_node = result[2]
+        else:
+            gm = result[0]
+            output_node = result[1]
+            input_source_node = output_node
 
         quantizer.annotate(gm)
 
-        annotation: QuantizationAnnotation = op_node.meta[Q_ANNOTATION_KEY]
-        self.assertTrue(annotation._annotated)
-
-        # Verify output annotation
-        self.assertEqual(annotation.output_qspec, expected_output_qspec)
+        # Verify output annotation (always on the output node)
+        output_annotation: QuantizationAnnotation = output_node.meta[Q_ANNOTATION_KEY]
+        self.assertTrue(output_annotation._annotated)
+        self.assertEqual(output_annotation.output_qspec, expected_output_qspec)
 
-        # Verify input annotations
-        self.assertEqual(len(annotation.input_qspec_map), len(expected_input_qspecs))
-        for input_node, input_qspec in annotation.input_qspec_map.items():
-            # Find the index of this input node in the op's args
+        # Verify input annotations (on the input source node, which may differ for fused patterns)
+        input_annotation: QuantizationAnnotation = input_source_node.meta[
+            Q_ANNOTATION_KEY
+        ]
+        self.assertEqual(
+            len(input_annotation.input_qspec_map), len(expected_input_qspecs)
+        )
+        for input_node, input_qspec in input_annotation.input_qspec_map.items():
+            # Find the index of this input node in the input source node's args
             arg_index = None
-            for i, arg in enumerate(op_node.args):
+            args = input_source_node.args
+            assert isinstance(args, tuple)
+            for i, arg in enumerate(args):
                 if arg is input_node:
                     arg_index = i
                     break
             self.assertIsNotNone(
                 arg_index,
-                f"Input node {input_node} not found in op_node.args",
+                f"Input node {input_node} not found in input_source_node.args",
             )
             # Skip comparison if expected qspec is None (e.g., for DerivedQuantizationSpec)
             if expected_input_qspecs[arg_index] is not None: