From 2c0af7d18f60865d9b1c5ae7158f57fce439385b Mon Sep 17 00:00:00 2001 From: rahulc7 Date: Tue, 30 Dec 2025 15:01:34 -0800 Subject: [PATCH 1/3] Adding Tests for CadenceDefaultQuantizer Differential Revision: D88899457 --- .../cadence/aot/tests/test_quantizer_ops.py | 101 +++++++++++++++++- 1 file changed, 100 insertions(+), 1 deletion(-) diff --git a/backends/cadence/aot/tests/test_quantizer_ops.py b/backends/cadence/aot/tests/test_quantizer_ops.py index 19a68f9b108..fa53c1818f5 100644 --- a/backends/cadence/aot/tests/test_quantizer_ops.py +++ b/backends/cadence/aot/tests/test_quantizer_ops.py @@ -33,6 +33,7 @@ CadenceWithSoftmaxQuantizer, qconfig_A16, qconfig_A8W8, + qconfig_A8W8sym, ) from executorch.exir.pass_base import NodeMetadata from parameterized import parameterized @@ -53,7 +54,6 @@ # Quantizers intentionally excluded from annotation testing. # These should be explicitly justified when added. EXCLUDED_FROM_ANNOTATION_TESTING: set[type[CadenceQuantizer]] = { - CadenceDefaultQuantizer, # TODO: T247438143 Add test coverage CadenceFusedConvReluQuantizer, # TODO: T247438151 Add test coverage CadenceNopQuantizer, # No-op quantizer, doesn't annotate anything CadenceW8A32MixedQuantizer, # TODO: T247438158 Add test coverage @@ -137,6 +137,61 @@ # For add: both inputs are activations [qconfig_A8W8.input_activation, qconfig_A8W8.input_activation], ), + # CadenceDefaultQuantizer test cases + ( + "default_matmul_A8W8", + lambda self: self._build_matmul_graph(), + CadenceDefaultQuantizer(), + torch.ops.aten.matmul.default, + qconfig_A8W8.output_activation, + # For matmul: both inputs are activations + [qconfig_A8W8.input_activation, qconfig_A8W8.input_activation], + ), + ( + "default_linear_A8W8", + lambda self: self._build_linear_graph(), + CadenceDefaultQuantizer(), + torch.ops.aten.linear.default, + qconfig_A8W8.output_activation, + # For linear: [input_activation, weight] + [qconfig_A8W8.input_activation, qconfig_A8W8.weight], + ), + ( + "default_conv1d_A8W8sym", + lambda self: self._build_conv1d_graph(), + CadenceDefaultQuantizer(), + torch.ops.aten.conv1d.default, + qconfig_A8W8sym.output_activation, + # For conv1d: [input_activation, weight] with symmetric weights + [qconfig_A8W8sym.input_activation, qconfig_A8W8sym.weight], + ), + ( + "default_conv2d_A8W8sym", + lambda self: self._build_conv2d_graph(), + CadenceDefaultQuantizer(), + torch.ops.aten.conv2d.default, + qconfig_A8W8sym.output_activation, + # For conv2d: [input_activation, weight] with symmetric weights + [qconfig_A8W8sym.input_activation, qconfig_A8W8sym.weight], + ), + ( + "default_bmm_A8W8", + lambda self: self._build_bmm_graph(), + CadenceDefaultQuantizer(), + torch.ops.aten.bmm.default, + qconfig_A8W8.output_activation, + # For bmm: both inputs are activations + [qconfig_A8W8.input_activation, qconfig_A8W8.input_activation], + ), + ( + "default_relu_A8W8", + lambda self: self._build_relu_graph(), + CadenceDefaultQuantizer(), + torch.ops.aten.relu.default, + qconfig_A8W8.output_activation, + # For relu: only input_activation + [qconfig_A8W8.input_activation], + ), ] # Derive the set of tested quantizer classes from the test cases. @@ -309,6 +364,50 @@ def _build_add_graph(self) -> tuple[torch.fx.GraphModule, torch.fx.Node]: self.assertEqual(len(add_nodes), 1, "Should find exactly one add node") return gm, add_nodes[0] + def _build_bmm_graph(self) -> tuple[torch.fx.GraphModule, torch.fx.Node]: + """Build a simple graph with a bmm (batch matrix multiply) operation.""" + builder = GraphBuilder() + # BMM requires 3D tensors: (batch, n, m) @ (batch, m, p) -> (batch, n, p) + x = builder.placeholder("x", torch.randn(2, 4, 8)) + y = builder.placeholder("y", torch.randn(2, 8, 4)) + bmm = builder.call_operator( + op=torch.ops.aten.bmm.default, + args=(x, y), + meta=NodeMetadata( + {"source_fn_stack": [("bmm", torch.ops.aten.bmm.default)]} + ), + ) + builder.output([bmm]) + gm = builder.get_graph_module() + + bmm_nodes = gm.graph.find_nodes( + op="call_function", + target=torch.ops.aten.bmm.default, + ) + self.assertEqual(len(bmm_nodes), 1, "Should find exactly one bmm node") + return gm, bmm_nodes[0] + + def _build_relu_graph(self) -> tuple[torch.fx.GraphModule, torch.fx.Node]: + """Build a simple graph with a relu operation.""" + builder = GraphBuilder() + x = builder.placeholder("x", torch.randn(1, 10)) + relu = builder.call_operator( + op=torch.ops.aten.relu.default, + args=(x,), + meta=NodeMetadata( + {"source_fn_stack": [("relu", torch.ops.aten.relu.default)]} + ), + ) + builder.output([relu]) + gm = builder.get_graph_module() + + relu_nodes = gm.graph.find_nodes( + op="call_function", + target=torch.ops.aten.relu.default, + ) + self.assertEqual(len(relu_nodes), 1, "Should find exactly one relu node") + return gm, relu_nodes[0] + @parameterized.expand(QUANTIZER_ANNOTATION_TEST_CASES) def test_quantizer_annotation( self, From 636930a55206f422024b725f779ed27a947fe756 Mon Sep 17 00:00:00 2001 From: rahulc7 Date: Tue, 30 Dec 2025 15:01:34 -0800 Subject: [PATCH 2/3] Changing logic to deal with graphs with derived quantization spec Differential Revision: D88955761 --- .../cadence/aot/tests/test_quantizer_ops.py | 71 ++++++++++++++----- 1 file changed, 55 insertions(+), 16 deletions(-) diff --git a/backends/cadence/aot/tests/test_quantizer_ops.py b/backends/cadence/aot/tests/test_quantizer_ops.py index fa53c1818f5..66d61c5c5e3 100644 --- a/backends/cadence/aot/tests/test_quantizer_ops.py +++ b/backends/cadence/aot/tests/test_quantizer_ops.py @@ -64,6 +64,7 @@ # Test case definitions for quantizer annotation tests. # Format: (name, graph_builder_fn, quantizer_instance, target_op, expected_output_qspec, expected_input_qspecs) # Adding a new quantizer test only requires adding a tuple to this list. +# Note: Use None in expected_input_qspecs to skip comparison for that input (e.g., for DerivedQuantizationSpec). QUANTIZER_ANNOTATION_TEST_CASES: list[ tuple[ str, @@ -71,7 +72,7 @@ CadenceQuantizer, OpOverload, QuantizationSpec, - list[QuantizationSpec], + list[QuantizationSpec | None], ] ] = [ ( @@ -192,6 +193,16 @@ # For relu: only input_activation [qconfig_A8W8.input_activation], ), + ( + "default_addmm_A8W8", + lambda self: self._build_addmm_graph(), + CadenceDefaultQuantizer(), + torch.ops.aten.addmm.default, + qconfig_A8W8.output_activation, + # For addmm: [bias (DerivedQuantizationSpec), mat1, mat2] + # Use None to skip comparison for bias since it's a DerivedQuantizationSpec + [None, qconfig_A8W8.input_activation, qconfig_A8W8.weight], + ), ] # Derive the set of tested quantizer classes from the test cases. @@ -408,6 +419,31 @@ def _build_relu_graph(self) -> tuple[torch.fx.GraphModule, torch.fx.Node]: self.assertEqual(len(relu_nodes), 1, "Should find exactly one relu node") return gm, relu_nodes[0] + def _build_addmm_graph(self) -> tuple[torch.fx.GraphModule, torch.fx.Node]: + """Build a simple graph with an addmm operation.""" + builder = GraphBuilder() + # addmm: bias + (mat1 @ mat2) + # args: (bias, mat1, mat2) + bias = builder.placeholder("bias", torch.randn(5)) + mat1 = builder.placeholder("mat1", torch.randn(1, 10)) + mat2 = builder.placeholder("mat2", torch.randn(10, 5)) + addmm = builder.call_operator( + op=torch.ops.aten.addmm.default, + args=(bias, mat1, mat2), + meta=NodeMetadata( + {"source_fn_stack": [("addmm", torch.ops.aten.addmm.default)]} + ), + ) + builder.output([addmm]) + gm = builder.get_graph_module() + + addmm_nodes = gm.graph.find_nodes( + op="call_function", + target=torch.ops.aten.addmm.default, + ) + self.assertEqual(len(addmm_nodes), 1, "Should find exactly one addmm node") + return gm, addmm_nodes[0] + @parameterized.expand(QUANTIZER_ANNOTATION_TEST_CASES) def test_quantizer_annotation( self, @@ -416,7 +452,7 @@ def test_quantizer_annotation( quantizer: CadenceQuantizer, target: OpOverload, expected_output_qspec: QuantizationSpec, - expected_input_qspecs: list[QuantizationSpec], + expected_input_qspecs: list[QuantizationSpec | None], ) -> None: """Parameterized test for quantizer annotations.""" gm, op_node = graph_builder_fn(self) @@ -431,21 +467,24 @@ def test_quantizer_annotation( # Verify input annotations self.assertEqual(len(annotation.input_qspec_map), len(expected_input_qspecs)) - for i, (input_node, input_qspec) in enumerate( - annotation.input_qspec_map.items() - ): - expected_arg = op_node.args[i] - assert isinstance(expected_arg, torch.fx.Node) - self.assertEqual( - input_node, - expected_arg, - f"Input node mismatch at index {i}", - ) - self.assertEqual( - input_qspec, - expected_input_qspecs[i], - f"Input qspec mismatch at index {i}", + for input_node, input_qspec in annotation.input_qspec_map.items(): + # Find the index of this input node in the op's args + arg_index = None + for i, arg in enumerate(op_node.args): + if arg is input_node: + arg_index = i + break + self.assertIsNotNone( + arg_index, + f"Input node {input_node} not found in op_node.args", ) + # Skip comparison if expected qspec is None (e.g., for DerivedQuantizationSpec) + if expected_input_qspecs[arg_index] is not None: + self.assertEqual( + input_qspec, + expected_input_qspecs[arg_index], + f"Input qspec mismatch at arg index {arg_index}", + ) def test_all_quantizers_have_annotation_tests(self) -> None: """Ensure every CadenceQuantizer subclass is either tested or explicitly excluded.""" From 4217e25097cab42fb7365b4b19e90c5556c91abd Mon Sep 17 00:00:00 2001 From: Rahul Chandra Date: Tue, 30 Dec 2025 15:10:19 -0800 Subject: [PATCH 3/3] Adding Tests for CadenceFusedConvReluQuantizer (#16358) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/16358 A fused pattern is when the quantizer recognizes a sequence of operations and treats as a single unit for quantization purposes. So for example, for a Conv2D + ReLU fusion, rather than having something like this: ``` input → [quantize] → conv2d → [dequantize] → [quantize] → relu → [dequantize] → output ``` a fused pattern quantizes them together like so: ``` input → [quantize] → conv2d → relu → [dequantize] → output ``` We need to make a few changes in our framework to test this. # Change 1: We allow graph builders to return a 3rd element for fused patterns For fused patterns like conv+relu, the quantization annotations are split across two nodes: - Output annotation is on the relu node (the final output of the fused pattern) - Input annotations are on the conv node (where the quantized inputs enter) The existing graph builders return (gm, target_node), which works for single-op patterns where both annotations are on the same node. For fused patterns, we need to know both nodes, so graph builders can now optionally return (gm, output_node, input_source_node). # Change 2: We check annotations on the correct nodes for fused patterns The test previously assumed output_qspec and input_qspec_map were both on the same node. For fused patterns, they're on different nodes: - output_qspec is checked on the output node (relu) - input_qspec_map is checked on the input source node (conv) This change is backwards-compatible: for non-fused patterns, both nodes are the same. Reviewed By: hsharma35 Differential Revision: D89630759 --- .../cadence/aot/tests/test_quantizer_ops.py | 104 +++++++++++++++--- 1 file changed, 90 insertions(+), 14 deletions(-) diff --git a/backends/cadence/aot/tests/test_quantizer_ops.py b/backends/cadence/aot/tests/test_quantizer_ops.py index 66d61c5c5e3..831ab3b95b6 100644 --- a/backends/cadence/aot/tests/test_quantizer_ops.py +++ b/backends/cadence/aot/tests/test_quantizer_ops.py @@ -46,15 +46,18 @@ # Type alias for graph builder functions. # These functions take a test instance and return a graph module and the target op node. +# For fused patterns (e.g., conv+relu), an optional third element specifies the node +# whose args contain the quantized inputs (e.g., conv node for conv+relu fusion). GraphBuilderFn = Callable[ - ["QuantizerAnnotationTest"], tuple[torch.fx.GraphModule, torch.fx.Node] + ["QuantizerAnnotationTest"], + tuple[torch.fx.GraphModule, torch.fx.Node] + | tuple[torch.fx.GraphModule, torch.fx.Node, torch.fx.Node], ] # Quantizers intentionally excluded from annotation testing. # These should be explicitly justified when added. EXCLUDED_FROM_ANNOTATION_TESTING: set[type[CadenceQuantizer]] = { - CadenceFusedConvReluQuantizer, # TODO: T247438151 Add test coverage CadenceNopQuantizer, # No-op quantizer, doesn't annotate anything CadenceW8A32MixedQuantizer, # TODO: T247438158 Add test coverage CadenceRmsNormNopQuantizer, # No-op quantizer, doesn't annotate anything, preserves rms_norm from decomposition @@ -203,6 +206,16 @@ # Use None to skip comparison for bias since it's a DerivedQuantizationSpec [None, qconfig_A8W8.input_activation, qconfig_A8W8.weight], ), + # CadenceFusedConvReluQuantizer test cases + ( + "fused_conv2d_relu_A8W8sym", + lambda self: self._build_conv2d_relu_graph(), + CadenceFusedConvReluQuantizer(), + torch.ops.aten.relu.default, + qconfig_A8W8sym.output_activation, + # For fused conv2d+relu: [input_activation, weight] from conv2d node + [qconfig_A8W8sym.input_activation, qconfig_A8W8sym.weight], + ), ] # Derive the set of tested quantizer classes from the test cases. @@ -444,6 +457,52 @@ def _build_addmm_graph(self) -> tuple[torch.fx.GraphModule, torch.fx.Node]: self.assertEqual(len(addmm_nodes), 1, "Should find exactly one addmm node") return gm, addmm_nodes[0] + def _build_conv2d_relu_graph( + self, + ) -> tuple[torch.fx.GraphModule, torch.fx.Node, torch.fx.Node]: + """Build a graph with a conv2d followed by relu (fused pattern). + + Returns: + A tuple of (graph_module, relu_node, conv_node). + The relu_node is the target node where the annotation is placed. + The conv_node is the input source node whose args contain the quantized inputs. + """ + builder = GraphBuilder() + # Input shape: (batch, in_channels, height, width) + x = builder.placeholder("x", torch.randn(1, 3, 8, 8)) + # Weight shape: (out_channels, in_channels, kernel_h, kernel_w) + weight = builder.placeholder("weight", torch.randn(6, 3, 3, 3)) + conv2d = builder.call_operator( + op=torch.ops.aten.conv2d.default, + args=(x, weight), + meta=NodeMetadata( + {"source_fn_stack": [("conv2d", torch.ops.aten.conv2d.default)]} + ), + ) + relu = builder.call_operator( + op=torch.ops.aten.relu.default, + args=(conv2d,), + meta=NodeMetadata( + {"source_fn_stack": [("relu", torch.ops.aten.relu.default)]} + ), + ) + builder.output([relu]) + gm = builder.get_graph_module() + + relu_nodes = gm.graph.find_nodes( + op="call_function", + target=torch.ops.aten.relu.default, + ) + self.assertEqual(len(relu_nodes), 1, "Should find exactly one relu node") + + conv2d_nodes = gm.graph.find_nodes( + op="call_function", + target=torch.ops.aten.conv2d.default, + ) + self.assertEqual(len(conv2d_nodes), 1, "Should find exactly one conv2d node") + + return gm, relu_nodes[0], conv2d_nodes[0] + @parameterized.expand(QUANTIZER_ANNOTATION_TEST_CASES) def test_quantizer_annotation( self, @@ -455,28 +514,45 @@ def test_quantizer_annotation( expected_input_qspecs: list[QuantizationSpec | None], ) -> None: """Parameterized test for quantizer annotations.""" - gm, op_node = graph_builder_fn(self) + result = graph_builder_fn(self) + # Handle both 2-element and 3-element returns from graph builders. + # For fused patterns, the 3rd element specifies the node whose args + # contain the quantized inputs (e.g., conv node for conv+relu fusion). + if len(result) == 3: + gm = result[0] + output_node = result[1] + input_source_node = result[2] + else: + gm = result[0] + output_node = result[1] + input_source_node = output_node quantizer.annotate(gm) - annotation: QuantizationAnnotation = op_node.meta[Q_ANNOTATION_KEY] - self.assertTrue(annotation._annotated) - - # Verify output annotation - self.assertEqual(annotation.output_qspec, expected_output_qspec) + # Verify output annotation (always on the output node) + output_annotation: QuantizationAnnotation = output_node.meta[Q_ANNOTATION_KEY] + self.assertTrue(output_annotation._annotated) + self.assertEqual(output_annotation.output_qspec, expected_output_qspec) - # Verify input annotations - self.assertEqual(len(annotation.input_qspec_map), len(expected_input_qspecs)) - for input_node, input_qspec in annotation.input_qspec_map.items(): - # Find the index of this input node in the op's args + # Verify input annotations (on the input source node, which may differ for fused patterns) + input_annotation: QuantizationAnnotation = input_source_node.meta[ + Q_ANNOTATION_KEY + ] + self.assertEqual( + len(input_annotation.input_qspec_map), len(expected_input_qspecs) + ) + for input_node, input_qspec in input_annotation.input_qspec_map.items(): + # Find the index of this input node in the input source node's args arg_index = None - for i, arg in enumerate(op_node.args): + args = input_source_node.args + assert isinstance(args, tuple) + for i, arg in enumerate(args): if arg is input_node: arg_index = i break self.assertIsNotNone( arg_index, - f"Input node {input_node} not found in op_node.args", + f"Input node {input_node} not found in input_source_node.args", ) # Skip comparison if expected qspec is None (e.g., for DerivedQuantizationSpec) if expected_input_qspecs[arg_index] is not None: