From aa935c392a5e4390f1f7340a05dca9f1bdecffdd Mon Sep 17 00:00:00 2001 From: feiniaofeiafei Date: Tue, 30 Dec 2025 20:13:36 +0800 Subject: [PATCH 1/7] fix --- .../doris/nereids/properties/FuncDeps.java | 29 ++++++++++++----- .../rewrite/PushDownAggThroughJoinOnPkFk.java | 31 ++++++++++++++----- .../doris/regression/suite/Suite.groovy | 5 +++ 3 files changed, 51 insertions(+), 14 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDeps.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDeps.java index 849736ef51a737..6bbc937c60f46a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDeps.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDeps.java @@ -183,6 +183,7 @@ public boolean isFuncDeps(Set dominate, Set dependency) { return items.contains(new FuncDepsItem(dominate, dependency)); } + // 这个也是判断是否为双射的 public boolean isCircleDeps(Set dominate, Set dependency) { return items.contains(new FuncDepsItem(dominate, dependency)) && items.contains(new FuncDepsItem(dependency, dominate)); @@ -201,16 +202,30 @@ public Map, Set>> getREdges() { } /** - * find the determinants of dependencies + * Finds all slot sets that have a bijective relationship with the given slot set. + * Given edges containing: + * {A} -> {{B}, {C}} + * {B} -> {{A}, {D}} + * {C} -> {{A}} + * When slot = {A}, returns {{B}} because {A} and {B} mutually determine each other. + * {C} is not returned because {C} does not determine {A} (one-way dependency only). */ - public Set> findDeterminats(Set dependency) { - Set> determinants = new HashSet<>(); - for (FuncDepsItem item : items) { - if (item.dependencies.equals(dependency)) { - determinants.add(item.determinants); + public Set> findBijectionSlots(Set slot) { + Set> bijectionSlots = new HashSet<>(); + if (!edges.containsKey(slot)) { + return bijectionSlots; + } + for (Set dep : edges.get(slot)) { + if (!edges.containsKey(dep)) { + continue; + } + for (Set det : edges.get(dep)) { + if (det.equals(slot)) { + bijectionSlots.add(dep); + } } } - return determinants; + return bijectionSlots; } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownAggThroughJoinOnPkFk.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownAggThroughJoinOnPkFk.java index 2aeb59ae9c73f9..8635bd36d55275 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownAggThroughJoinOnPkFk.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownAggThroughJoinOnPkFk.java @@ -47,6 +47,7 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Set; +import java.util.stream.Collectors; /** * Push down agg through join with foreign key: @@ -131,13 +132,27 @@ private LogicalAggregate eliminatePrimaryOutput(LogicalAggregate agg, Plan if (primary.getOutputSet().stream().noneMatch(aggInputs::contains)) { return agg; } - Set primaryOutputSet = primary.getOutputSet(); - Set primarySlots = Sets.intersection(aggInputs, primaryOutputSet); + // Firstly, using fd to eliminate group by key. + // group by primary_table_pk, primary_table_other + // -> group by primary_table_pk + Set> groupBySlots = new HashSet<>(); + for (Expression expression : agg.getGroupByExpressions()) { + groupBySlots.add(expression.getInputSlots()); + } DataTrait dataTrait = child.getLogicalProperties().getTrait(); FuncDeps funcDeps = dataTrait.getAllValidFuncDeps(Sets.union(foreign.getOutputSet(), primary.getOutputSet())); + Set foreignOutput = Sets.intersection(agg.getOutputSet(), foreign.getOutputSet()); + Set> minGroupBySlots = funcDeps.eliminateDeps(groupBySlots, foreignOutput); + List minGroupBySlotList = minGroupBySlots.stream().flatMap(Set::stream).collect(Collectors.toList()); + + // Secondly, put bijective slot into map: {primary_table_pk : foreign_table_fk} + // Bijective slots are mutually interchangeable within GROUP BY keys. + // group by primary_table_pk equals group by foreign_table_fk + Set primaryOutputSet = primary.getOutputSet(); + Set primarySlots = Sets.intersection(aggInputs, primaryOutputSet); HashMap primaryToForeignDeps = new HashMap<>(); for (Slot slot : primarySlots) { - Set> replacedSlotSets = funcDeps.findDeterminats(ImmutableSet.of(slot)); + Set> replacedSlotSets = funcDeps.findBijectionSlots(ImmutableSet.of(slot)); for (Set replacedSlots : replacedSlotSets) { if (primaryOutputSet.stream().noneMatch(replacedSlots::contains) && replacedSlots.size() == 1) { @@ -147,7 +162,9 @@ private LogicalAggregate eliminatePrimaryOutput(LogicalAggregate agg, Plan } } - Set newGroupBySlots = constructNewGroupBy(agg, primaryOutputSet, primaryToForeignDeps); + // Thirdly, construct new Agg below join. + Set newGroupBySlots = constructNewGroupBy(minGroupBySlotList, primaryOutputSet, + primaryToForeignDeps); List newOutput = constructNewOutput( agg, primaryOutputSet, primaryToForeignDeps, funcDeps, primary); if (newGroupBySlots == null || newOutput == null) { @@ -156,10 +173,10 @@ private LogicalAggregate eliminatePrimaryOutput(LogicalAggregate agg, Plan return agg.withGroupByAndOutput(ImmutableList.copyOf(newGroupBySlots), ImmutableList.copyOf(newOutput)); } - private @Nullable Set constructNewGroupBy(LogicalAggregate agg, Set primaryOutputs, - Map primaryToForeignBiDeps) { + private @Nullable Set constructNewGroupBy(List gbyExpression, + Set primaryOutputs, Map primaryToForeignBiDeps) { Set newGroupBySlots = new HashSet<>(); - for (Expression expression : agg.getGroupByExpressions()) { + for (Expression expression : gbyExpression) { if (!(expression instanceof Slot)) { return null; } diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy index da6ee9fe4426a1..7c457fb32f28c8 100644 --- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy +++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy @@ -3545,4 +3545,9 @@ class Suite implements GroovyInterceptable { GlobalLock.unlock(lockName) } } + + def explain_and_result = { tag, sql -> + "qt_${tag}_shape" "explain shape plan ${sql}" + "order_qt_${tag}_result" "${sql}" + } } From 42a5c708ca91084c5766a5369bd5c28abc8999ac Mon Sep 17 00:00:00 2001 From: feiniaofeiafei Date: Tue, 30 Dec 2025 20:37:26 +0800 Subject: [PATCH 2/7] add case --- .../agg_join_pkfk/agg_join_pkfk.out | 36 +++++++++++++++++++ .../doris/regression/suite/Suite.groovy | 5 --- 2 files changed, 36 insertions(+), 5 deletions(-) create mode 100644 regression-test/data/nereids_rules_p0/agg_join_pkfk/agg_join_pkfk.out diff --git a/regression-test/data/nereids_rules_p0/agg_join_pkfk/agg_join_pkfk.out b/regression-test/data/nereids_rules_p0/agg_join_pkfk/agg_join_pkfk.out new file mode 100644 index 00000000000000..4a514373e805aa --- /dev/null +++ b/regression-test/data/nereids_rules_p0/agg_join_pkfk/agg_join_pkfk.out @@ -0,0 +1,36 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !not_push_down_shape -- +PhysicalResultSink +--hashAgg[GLOBAL] +----hashAgg[LOCAL] +------hashJoin[INNER_JOIN] hashCondition=((store_sales_test.ss_customer_sk = customer_test.c_customer_sk)) otherCondition=() +--------PhysicalOlapScan[store_sales_test] +--------PhysicalOlapScan[customer_test] + +-- !not_push_down_result -- +Smith John 2024-01-01 + +-- !push_down_shape -- +PhysicalResultSink +--hashJoin[INNER_JOIN] hashCondition=((store_sales_test.ss_customer_sk = customer_test.c_customer_sk)) otherCondition=() +----hashAgg[GLOBAL] +------hashAgg[LOCAL] +--------PhysicalOlapScan[store_sales_test] +----PhysicalOlapScan[customer_test] + +-- !push_down_result -- +John 1 2024-01-01 +John 2 2024-01-01 + +-- !push_down_with_count_shape -- +PhysicalResultSink +--hashJoin[INNER_JOIN] hashCondition=((store_sales_test.ss_customer_sk = customer_test.c_customer_sk)) otherCondition=() +----hashAgg[GLOBAL] +------hashAgg[LOCAL] +--------PhysicalOlapScan[store_sales_test] +----PhysicalOlapScan[customer_test] + +-- !push_down_with_count_result -- +John 1 2024-01-01 1 +John 2 2024-01-01 1 + diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy index 7c457fb32f28c8..da6ee9fe4426a1 100644 --- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy +++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy @@ -3545,9 +3545,4 @@ class Suite implements GroovyInterceptable { GlobalLock.unlock(lockName) } } - - def explain_and_result = { tag, sql -> - "qt_${tag}_shape" "explain shape plan ${sql}" - "order_qt_${tag}_result" "${sql}" - } } From bdca54ce9bee936e2faf9719bee5ab40d4528d6b Mon Sep 17 00:00:00 2001 From: feiniaofeiafei Date: Wed, 31 Dec 2025 14:06:31 +0800 Subject: [PATCH 3/7] fix --- .../doris/nereids/properties/FuncDeps.java | 23 ++++++++-------- .../rewrite/PushDownAggThroughJoinOnPkFk.java | 27 +++++++++++++------ 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDeps.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDeps.java index 6bbc937c60f46a..928586ef4590ef 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDeps.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDeps.java @@ -144,27 +144,28 @@ private Set findValidItems(Set requireOutputs) { *

* Example: * Given: - * - Initial slots: {{A, B, C}, {D, E}, {F, G}} - * - Required outputs: {A, D, F} - * - Valid functional dependencies: {A} -> {B}, {D, E} -> {G}, {F} -> {G} + * - Initial slots: {{A}, {B}, {C}, {D}, {E}} + * - Required outputs: {} + * - validItems: {A} -> {B}, {B} -> {C}, {C} -> {D}, {D} -> {A}, {A} -> {E} * * Process: - * 1. Start with minSlotSet = {{A, B, C}, {D, E}, {F, G}} + * 1. Start with minSlotSet = {{A}, {B}, {C}, {D}, {E}} * 2. For {A} -> {B}: * - Both {A} and {B} are in minSlotSet, so mark {B} for elimination - * 3. For {D, E} -> {G}: - * - Both {D, E} and {G} are in minSlotSet, so mark {G} for elimination - * 4. For {F} -> {G}: - * - Both {F} and {G} are in minSlotSet, but {G} is already marked for elimination - * 5. Remove eliminated slots: {B} and {G} + * 3. For {B} -> {C}: + * - Both {B} and {C} are in minSlotSet, so mark {C} for elimination + * 4. For {C} -> {D}: + * - Both {C} and {D} are in minSlotSet, so mark {D} for elimination + * 4. For {D} -> {E}: + * - Both {D} and {E} are in minSlotSet, so mark {E} for elimination * - * Result: {{A, C}, {D, E}, {F}} + * Result: {{A}} *

* * @param slots the initial set of slot sets to be reduced * @param requireOutputs the set of slots that must be preserved in the output * @return the minimal set of slot sets after applying all possible reductions - */ + */ public Set> eliminateDeps(Set> slots, Set requireOutputs) { Set> minSlotSet = Sets.newHashSet(slots); Set> eliminatedSlots = new HashSet<>(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownAggThroughJoinOnPkFk.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownAggThroughJoinOnPkFk.java index 8635bd36d55275..1aba1c79ae159a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownAggThroughJoinOnPkFk.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownAggThroughJoinOnPkFk.java @@ -47,7 +47,6 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Set; -import java.util.stream.Collectors; /** * Push down agg through join with foreign key: @@ -136,14 +135,27 @@ private LogicalAggregate eliminatePrimaryOutput(LogicalAggregate agg, Plan // group by primary_table_pk, primary_table_other // -> group by primary_table_pk Set> groupBySlots = new HashSet<>(); + Set validSlots = new HashSet<>(); for (Expression expression : agg.getGroupByExpressions()) { groupBySlots.add(expression.getInputSlots()); + validSlots.addAll(expression.getInputSlots()); } DataTrait dataTrait = child.getLogicalProperties().getTrait(); - FuncDeps funcDeps = dataTrait.getAllValidFuncDeps(Sets.union(foreign.getOutputSet(), primary.getOutputSet())); + FuncDeps funcDeps = dataTrait.getAllValidFuncDeps(validSlots); Set foreignOutput = Sets.intersection(agg.getOutputSet(), foreign.getOutputSet()); Set> minGroupBySlots = funcDeps.eliminateDeps(groupBySlots, foreignOutput); - List minGroupBySlotList = minGroupBySlots.stream().flatMap(Set::stream).collect(Collectors.toList()); + Set removeExpression = new HashSet<>(); + for (Set slots : groupBySlots) { + if (!minGroupBySlots.contains(slots) && !foreignOutput.containsAll(slots)) { + removeExpression.add(slots.iterator().next()); + } + } + List minGroupBySlotList = new ArrayList<>(); + for (Expression expression : agg.getGroupByExpressions()) { + if (!removeExpression.contains(expression)) { + minGroupBySlotList.add(expression); + } + } // Secondly, put bijective slot into map: {primary_table_pk : foreign_table_fk} // Bijective slots are mutually interchangeable within GROUP BY keys. @@ -151,8 +163,9 @@ private LogicalAggregate eliminatePrimaryOutput(LogicalAggregate agg, Plan Set primaryOutputSet = primary.getOutputSet(); Set primarySlots = Sets.intersection(aggInputs, primaryOutputSet); HashMap primaryToForeignDeps = new HashMap<>(); + FuncDeps funcDepsForJoin = dataTrait.getAllValidFuncDeps(Sets.union(primaryOutputSet, foreign.getOutputSet())); for (Slot slot : primarySlots) { - Set> replacedSlotSets = funcDeps.findBijectionSlots(ImmutableSet.of(slot)); + Set> replacedSlotSets = funcDepsForJoin.findBijectionSlots(ImmutableSet.of(slot)); for (Set replacedSlots : replacedSlotSets) { if (primaryOutputSet.stream().noneMatch(replacedSlots::contains) && replacedSlots.size() == 1) { @@ -166,7 +179,7 @@ private LogicalAggregate eliminatePrimaryOutput(LogicalAggregate agg, Plan Set newGroupBySlots = constructNewGroupBy(minGroupBySlotList, primaryOutputSet, primaryToForeignDeps); List newOutput = constructNewOutput( - agg, primaryOutputSet, primaryToForeignDeps, funcDeps, primary); + agg, primaryOutputSet, primaryToForeignDeps, funcDepsForJoin, primary); if (newGroupBySlots == null || newOutput == null) { return null; } @@ -213,9 +226,7 @@ private LogicalAggregate eliminatePrimaryOutput(LogicalAggregate agg, Plan && expression.child(0).child(0) instanceof Slot) { // count(slot) can be rewritten by circle deps Slot slot = (Slot) expression.child(0).child(0); - if (primaryToForeignDeps.containsKey(slot) - && funcDeps.isCircleDeps( - ImmutableSet.of(slot), ImmutableSet.of(primaryToForeignDeps.get(slot)))) { + if (primaryToForeignDeps.containsKey(slot)) { expression = (NamedExpression) expression.rewriteUp(e -> e instanceof Slot ? primaryToForeignDeps.getOrDefault((Slot) e, (Slot) e) From 930f030b2237862b8865c514906e0206276fa5aa Mon Sep 17 00:00:00 2001 From: feiniaofeiafei Date: Mon, 5 Jan 2026 22:22:33 +0800 Subject: [PATCH 4/7] fix --- .../doris/nereids/properties/FuncDeps.java | 1 - .../rules/rewrite/EliminateGroupByKey.java | 43 +++++++++++-------- .../rewrite/PushDownAggThroughJoinOnPkFk.java | 36 ++++++---------- 3 files changed, 40 insertions(+), 40 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDeps.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDeps.java index 928586ef4590ef..879b2de9fe6468 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDeps.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDeps.java @@ -184,7 +184,6 @@ public boolean isFuncDeps(Set dominate, Set dependency) { return items.contains(new FuncDepsItem(dominate, dependency)); } - // 这个也是判断是否为双射的 public boolean isCircleDeps(Set dominate, Set dependency) { return items.contains(new FuncDepsItem(dominate, dependency)) && items.contains(new FuncDepsItem(dependency, dominate)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateGroupByKey.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateGroupByKey.java index fbe0988daff5bc..4e1b3117ab53ff 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateGroupByKey.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateGroupByKey.java @@ -18,6 +18,7 @@ package org.apache.doris.nereids.rules.rewrite; import org.apache.doris.nereids.annotation.DependsRules; +import org.apache.doris.nereids.properties.DataTrait; import org.apache.doris.nereids.properties.FuncDeps; import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; @@ -78,6 +79,28 @@ public List buildRules() { } LogicalAggregate eliminateGroupByKey(LogicalAggregate agg, Set requireOutput) { + Set removeExpression = findCanBeRemovedExpressions(agg, requireOutput, + agg.child().getLogicalProperties().getTrait()); + List newGroupExpression = new ArrayList<>(); + for (Expression expression : agg.getGroupByExpressions()) { + if (!removeExpression.contains(expression)) { + newGroupExpression.add(expression); + } + } + List newOutput = new ArrayList<>(); + for (NamedExpression expression : agg.getOutputExpressions()) { + if (!removeExpression.contains(expression)) { + newOutput.add(expression); + } + } + return agg.withGroupByAndOutput(newGroupExpression, newOutput); + } + + /** + * return removeExpression + */ + public static Set findCanBeRemovedExpressions(LogicalAggregate agg, + Set requireOutput, DataTrait dataTrait) { Map> groupBySlots = new HashMap<>(); Set validSlots = new HashSet<>(); for (Expression expression : agg.getGroupByExpressions()) { @@ -85,10 +108,9 @@ LogicalAggregate eliminateGroupByKey(LogicalAggregate agg, validSlots.addAll(expression.getInputSlots()); } - FuncDeps funcDeps = agg.child().getLogicalProperties() - .getTrait().getAllValidFuncDeps(validSlots); + FuncDeps funcDeps = dataTrait.getAllValidFuncDeps(validSlots); if (funcDeps.isEmpty()) { - return null; + return new HashSet<>(); } Set> minGroupBySlots = funcDeps.eliminateDeps(new HashSet<>(groupBySlots.values()), requireOutput); @@ -99,19 +121,6 @@ LogicalAggregate eliminateGroupByKey(LogicalAggregate agg, removeExpression.add(entry.getKey()); } } - - List newGroupExpression = new ArrayList<>(); - for (Expression expression : agg.getGroupByExpressions()) { - if (!removeExpression.contains(expression)) { - newGroupExpression.add(expression); - } - } - List newOutput = new ArrayList<>(); - for (NamedExpression expression : agg.getOutputExpressions()) { - if (!removeExpression.contains(expression)) { - newOutput.add(expression); - } - } - return agg.withGroupByAndOutput(newGroupExpression, newOutput); + return removeExpression; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownAggThroughJoinOnPkFk.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownAggThroughJoinOnPkFk.java index 1aba1c79ae159a..28dcc005ce41bb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownAggThroughJoinOnPkFk.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownAggThroughJoinOnPkFk.java @@ -18,7 +18,6 @@ package org.apache.doris.nereids.rules.rewrite; import org.apache.doris.common.Pair; -import org.apache.doris.nereids.properties.DataTrait; import org.apache.doris.nereids.properties.FuncDeps; import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; @@ -125,6 +124,9 @@ public List buildRules() { } // eliminate the slot of primary plan in agg + // e.g. + // select primary_table_pk, primary_table_other from primary_table join foreign_table on pk = fk + // group by pk, primary_table_other_cols; private LogicalAggregate eliminatePrimaryOutput(LogicalAggregate agg, Plan child, Plan primary, Plan foreign) { Set aggInputs = agg.getInputSlots(); @@ -132,24 +134,11 @@ private LogicalAggregate eliminatePrimaryOutput(LogicalAggregate agg, Plan return agg; } // Firstly, using fd to eliminate group by key. - // group by primary_table_pk, primary_table_other - // -> group by primary_table_pk - Set> groupBySlots = new HashSet<>(); - Set validSlots = new HashSet<>(); - for (Expression expression : agg.getGroupByExpressions()) { - groupBySlots.add(expression.getInputSlots()); - validSlots.addAll(expression.getInputSlots()); - } - DataTrait dataTrait = child.getLogicalProperties().getTrait(); - FuncDeps funcDeps = dataTrait.getAllValidFuncDeps(validSlots); - Set foreignOutput = Sets.intersection(agg.getOutputSet(), foreign.getOutputSet()); - Set> minGroupBySlots = funcDeps.eliminateDeps(groupBySlots, foreignOutput); - Set removeExpression = new HashSet<>(); - for (Set slots : groupBySlots) { - if (!minGroupBySlots.contains(slots) && !foreignOutput.containsAll(slots)) { - removeExpression.add(slots.iterator().next()); - } - } + // group by pk, primary_table_other_cols; + // -> group by pk; + Set removeExpression = EliminateGroupByKey.findCanBeRemovedExpressions(agg, + Sets.intersection(agg.getOutputSet(), foreign.getOutputSet()), + child.getLogicalProperties().getTrait()); List minGroupBySlotList = new ArrayList<>(); for (Expression expression : agg.getGroupByExpressions()) { if (!removeExpression.contains(expression)) { @@ -157,13 +146,14 @@ private LogicalAggregate eliminatePrimaryOutput(LogicalAggregate agg, Plan } } - // Secondly, put bijective slot into map: {primary_table_pk : foreign_table_fk} + // Secondly, put bijective slot into map: {pk : fk} // Bijective slots are mutually interchangeable within GROUP BY keys. - // group by primary_table_pk equals group by foreign_table_fk + // group by pk -> group by fk Set primaryOutputSet = primary.getOutputSet(); Set primarySlots = Sets.intersection(aggInputs, primaryOutputSet); HashMap primaryToForeignDeps = new HashMap<>(); - FuncDeps funcDepsForJoin = dataTrait.getAllValidFuncDeps(Sets.union(primaryOutputSet, foreign.getOutputSet())); + FuncDeps funcDepsForJoin = child.getLogicalProperties().getTrait() + .getAllValidFuncDeps(Sets.union(primaryOutputSet, foreign.getOutputSet())); for (Slot slot : primarySlots) { Set> replacedSlotSets = funcDepsForJoin.findBijectionSlots(ImmutableSet.of(slot)); for (Set replacedSlots : replacedSlotSets) { @@ -176,6 +166,8 @@ private LogicalAggregate eliminatePrimaryOutput(LogicalAggregate agg, Plan } // Thirdly, construct new Agg below join. + // For the pk-fk join, the foreign table side will not expand rows. + // As a result, executing agg(group by fk) before join is same with executing agg(group by fk) after join. Set newGroupBySlots = constructNewGroupBy(minGroupBySlotList, primaryOutputSet, primaryToForeignDeps); List newOutput = constructNewOutput( From c67ddf6b22d5b1254a32ef6aa6888c407182a520 Mon Sep 17 00:00:00 2001 From: feiniaofeiafei Date: Tue, 6 Jan 2026 10:41:50 +0800 Subject: [PATCH 5/7] fix shape --- .../tpcds_sf100/noStatsRfPrune/query38.out | 81 +++++++++---------- .../tpcds_sf100/noStatsRfPrune/query87.out | 81 +++++++++---------- .../tpcds_sf100/no_stats_shape/query38.out | 81 +++++++++---------- .../tpcds_sf100/no_stats_shape/query87.out | 81 +++++++++---------- .../tpcds_sf100/rf_prune/query38.out | 81 +++++++++---------- .../tpcds_sf100/rf_prune/query87.out | 81 +++++++++---------- .../shape_check/tpcds_sf100/shape/query38.out | 81 +++++++++---------- .../shape_check/tpcds_sf100/shape/query87.out | 81 +++++++++---------- .../shape_check/tpcds_sf1000/hint/query38.out | 81 +++++++++---------- .../shape_check/tpcds_sf1000/hint/query87.out | 81 +++++++++---------- .../tpcds_sf1000/shape/query38.out | 81 +++++++++---------- .../tpcds_sf1000/shape/query87.out | 81 +++++++++---------- 12 files changed, 468 insertions(+), 504 deletions(-) diff --git a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query38.out b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query38.out index d4857e3c0cd68b..1008fac09b2f11 100644 --- a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query38.out +++ b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query38.out @@ -8,49 +8,46 @@ PhysicalResultSink ----------hashAgg[LOCAL] ------------PhysicalProject --------------PhysicalIntersect RFV2: RF6[c_last_name->c_last_name] RF7[c_last_name->c_last_name] -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() -----------------------hashAgg[GLOBAL] -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------hashAgg[LOCAL] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 ---------------------------------PhysicalProject -----------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) -------------------------------------PhysicalOlapScan[date_dim] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() -----------------------hashAgg[GLOBAL] -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------hashAgg[LOCAL] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 ---------------------------------PhysicalProject -----------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) -------------------------------------PhysicalOlapScan[date_dim] +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1 +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[customer] +--------------------------PhysicalProject +----------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) +------------------------------PhysicalOlapScan[date_dim] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] RFV2: RF6 -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() -----------------------hashAgg[GLOBAL] -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------hashAgg[LOCAL] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 ---------------------------------PhysicalProject -----------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) -------------------------------------PhysicalOlapScan[date_dim] +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3 +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[customer] RFV2: RF6 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) +------------------------------PhysicalOlapScan[date_dim] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] RFV2: RF7 +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ss_sold_date_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[customer] RFV2: RF7 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) +------------------------------PhysicalOlapScan[date_dim] diff --git a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query87.out b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query87.out index 0af75be03b5a96..f7c070bd942411 100644 --- a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query87.out +++ b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query87.out @@ -6,49 +6,46 @@ PhysicalResultSink ------hashAgg[LOCAL] --------PhysicalProject ----------PhysicalExcept -------------PhysicalDistribute[DistributionSpecHash] ---------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 -----------------------------PhysicalProject -------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) ---------------------------------PhysicalOlapScan[date_dim] +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] ------------------PhysicalProject ---------------------PhysicalOlapScan[customer] -------------PhysicalDistribute[DistributionSpecHash] ---------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 -----------------------------PhysicalProject -------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) ---------------------------------PhysicalOlapScan[date_dim] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] +----------------------PhysicalProject +------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) +--------------------------PhysicalOlapScan[date_dim] +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] ------------------PhysicalProject ---------------------PhysicalOlapScan[customer] -------------PhysicalDistribute[DistributionSpecHash] ---------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 -----------------------------PhysicalProject -------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) ---------------------------------PhysicalOlapScan[date_dim] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3 +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] +----------------------PhysicalProject +------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) +--------------------------PhysicalOlapScan[date_dim] +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] ------------------PhysicalProject ---------------------PhysicalOlapScan[customer] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[web_sales] apply RFs: RF5 +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] +----------------------PhysicalProject +------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) +--------------------------PhysicalOlapScan[date_dim] diff --git a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query38.out b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query38.out index fa43188e97d919..a56a536123e54f 100644 --- a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query38.out +++ b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query38.out @@ -8,49 +8,46 @@ PhysicalResultSink ----------hashAgg[LOCAL] ------------PhysicalProject --------------PhysicalIntersect RFV2: RF6[c_last_name->c_last_name] RF7[c_last_name->c_last_name] -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ws_bill_customer_sk] -----------------------hashAgg[GLOBAL] -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------hashAgg[LOCAL] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 ---------------------------------PhysicalProject -----------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) -------------------------------------PhysicalOlapScan[date_dim] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk] -----------------------hashAgg[GLOBAL] -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------hashAgg[LOCAL] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 ---------------------------------PhysicalProject -----------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) -------------------------------------PhysicalOlapScan[date_dim] +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF0 c_customer_sk->[ws_bill_customer_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[customer] +--------------------------PhysicalProject +----------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) +------------------------------PhysicalOlapScan[date_dim] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] RFV2: RF6 -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ss_customer_sk] -----------------------hashAgg[GLOBAL] -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------hashAgg[LOCAL] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5 ---------------------------------PhysicalProject -----------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) -------------------------------------PhysicalOlapScan[date_dim] +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[cs_bill_customer_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[customer] RFV2: RF6 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) +------------------------------PhysicalOlapScan[date_dim] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] RFV2: RF7 +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ss_sold_date_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 c_customer_sk->[ss_customer_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5 +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[customer] RFV2: RF7 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) +------------------------------PhysicalOlapScan[date_dim] diff --git a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query87.out b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query87.out index b67550c0227918..a31c5dc9e114cf 100644 --- a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query87.out +++ b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query87.out @@ -6,49 +6,46 @@ PhysicalResultSink ------hashAgg[LOCAL] --------PhysicalProject ----------PhysicalExcept -------------PhysicalDistribute[DistributionSpecHash] ---------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ss_customer_sk] -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 -----------------------------PhysicalProject -------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) ---------------------------------PhysicalOlapScan[date_dim] +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] ------------------PhysicalProject ---------------------PhysicalOlapScan[customer] -------------PhysicalDistribute[DistributionSpecHash] ---------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk] -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 -----------------------------PhysicalProject -------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) ---------------------------------PhysicalOlapScan[date_dim] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF0 c_customer_sk->[ss_customer_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] +----------------------PhysicalProject +------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) +--------------------------PhysicalOlapScan[date_dim] +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] ------------------PhysicalProject ---------------------PhysicalOlapScan[customer] -------------PhysicalDistribute[DistributionSpecHash] ---------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ws_bill_customer_sk] -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5 -----------------------------PhysicalProject -------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) ---------------------------------PhysicalOlapScan[date_dim] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[cs_bill_customer_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] +----------------------PhysicalProject +------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) +--------------------------PhysicalOlapScan[date_dim] +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] ------------------PhysicalProject ---------------------PhysicalOlapScan[customer] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 c_customer_sk->[ws_bill_customer_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5 +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] +----------------------PhysicalProject +------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) +--------------------------PhysicalOlapScan[date_dim] diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query38.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query38.out index d4857e3c0cd68b..7534ddcc8c2579 100644 --- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query38.out +++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query38.out @@ -8,49 +8,46 @@ PhysicalResultSink ----------hashAgg[LOCAL] ------------PhysicalProject --------------PhysicalIntersect RFV2: RF6[c_last_name->c_last_name] RF7[c_last_name->c_last_name] -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() -----------------------hashAgg[GLOBAL] -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------hashAgg[LOCAL] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 ---------------------------------PhysicalProject -----------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) -------------------------------------PhysicalOlapScan[date_dim] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() -----------------------hashAgg[GLOBAL] -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------hashAgg[LOCAL] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 ---------------------------------PhysicalProject -----------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) -------------------------------------PhysicalOlapScan[date_dim] +------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] RFV2: RF6 -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() -----------------------hashAgg[GLOBAL] -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------hashAgg[LOCAL] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 ---------------------------------PhysicalProject -----------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) -------------------------------------PhysicalOlapScan[date_dim] +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] RFV2: RF6 +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] RFV2: RF7 +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] RFV2: RF7 diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query87.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query87.out index 00d730cfd4cb68..32c3855f30b3fa 100644 --- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query87.out +++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query87.out @@ -6,49 +6,46 @@ PhysicalResultSink ------hashAgg[LOCAL] --------PhysicalProject ----------PhysicalExcept RFV2: RF6[c_last_name->c_last_name] RF7[c_last_name->c_last_name] -------------PhysicalDistribute[DistributionSpecHash] ---------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 -----------------------------PhysicalProject -------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) ---------------------------------PhysicalOlapScan[date_dim] +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] ------------------PhysicalProject ---------------------PhysicalOlapScan[customer] -------------PhysicalDistribute[DistributionSpecHash] ---------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 -----------------------------PhysicalProject -------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) ---------------------------------PhysicalOlapScan[date_dim] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] ------------------PhysicalProject ---------------------PhysicalOlapScan[customer] RFV2: RF6 -------------PhysicalDistribute[DistributionSpecHash] ---------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 -----------------------------PhysicalProject -------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) ---------------------------------PhysicalOlapScan[date_dim] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] RFV2: RF6 +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] ------------------PhysicalProject ---------------------PhysicalOlapScan[customer] RFV2: RF7 +--------------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] RFV2: RF7 diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query38.out b/regression-test/data/shape_check/tpcds_sf100/shape/query38.out index fa43188e97d919..e55825e7e76457 100644 --- a/regression-test/data/shape_check/tpcds_sf100/shape/query38.out +++ b/regression-test/data/shape_check/tpcds_sf100/shape/query38.out @@ -8,49 +8,46 @@ PhysicalResultSink ----------hashAgg[LOCAL] ------------PhysicalProject --------------PhysicalIntersect RFV2: RF6[c_last_name->c_last_name] RF7[c_last_name->c_last_name] -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ws_bill_customer_sk] -----------------------hashAgg[GLOBAL] -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------hashAgg[LOCAL] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 ---------------------------------PhysicalProject -----------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) -------------------------------------PhysicalOlapScan[date_dim] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk] -----------------------hashAgg[GLOBAL] -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------hashAgg[LOCAL] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 ---------------------------------PhysicalProject -----------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) -------------------------------------PhysicalOlapScan[date_dim] +------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ws_bill_customer_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] RFV2: RF6 -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ss_customer_sk] -----------------------hashAgg[GLOBAL] -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------hashAgg[LOCAL] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5 ---------------------------------PhysicalProject -----------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) -------------------------------------PhysicalOlapScan[date_dim] +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] RFV2: RF6 +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] RFV2: RF7 +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ss_customer_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] RFV2: RF7 diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query87.out b/regression-test/data/shape_check/tpcds_sf100/shape/query87.out index cfd1a87f7da01a..247dfbbbc89a90 100644 --- a/regression-test/data/shape_check/tpcds_sf100/shape/query87.out +++ b/regression-test/data/shape_check/tpcds_sf100/shape/query87.out @@ -6,49 +6,46 @@ PhysicalResultSink ------hashAgg[LOCAL] --------PhysicalProject ----------PhysicalExcept RFV2: RF6[c_last_name->c_last_name] RF7[c_last_name->c_last_name] -------------PhysicalDistribute[DistributionSpecHash] ---------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ss_customer_sk] -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 -----------------------------PhysicalProject -------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) ---------------------------------PhysicalOlapScan[date_dim] +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] ------------------PhysicalProject ---------------------PhysicalOlapScan[customer] -------------PhysicalDistribute[DistributionSpecHash] ---------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk] -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 -----------------------------PhysicalProject -------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) ---------------------------------PhysicalOlapScan[date_dim] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ss_customer_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] ------------------PhysicalProject ---------------------PhysicalOlapScan[customer] RFV2: RF6 -------------PhysicalDistribute[DistributionSpecHash] ---------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ws_bill_customer_sk] -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5 -----------------------------PhysicalProject -------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) ---------------------------------PhysicalOlapScan[date_dim] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] RFV2: RF6 +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] ------------------PhysicalProject ---------------------PhysicalOlapScan[customer] RFV2: RF7 +--------------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ws_bill_customer_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] RFV2: RF7 diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query38.out b/regression-test/data/shape_check/tpcds_sf1000/hint/query38.out index 1a49401e891319..fc988652648dc2 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/hint/query38.out +++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query38.out @@ -8,49 +8,46 @@ PhysicalResultSink ----------hashAgg[LOCAL] ------------PhysicalProject --------------PhysicalIntersect -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ws_bill_customer_sk] -----------------------hashAgg[GLOBAL] -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------hashAgg[LOCAL] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 ---------------------------------PhysicalProject -----------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189)) -------------------------------------PhysicalOlapScan[date_dim] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk] -----------------------hashAgg[GLOBAL] -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------hashAgg[LOCAL] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 ---------------------------------PhysicalProject -----------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189)) -------------------------------------PhysicalOlapScan[date_dim] +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ws_bill_customer_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ss_customer_sk] -----------------------hashAgg[GLOBAL] -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------hashAgg[LOCAL] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5 ---------------------------------PhysicalProject -----------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189)) -------------------------------------PhysicalOlapScan[date_dim] +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ss_customer_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query87.out b/regression-test/data/shape_check/tpcds_sf1000/hint/query87.out index 4f102613865865..9f5547c4459a45 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/hint/query87.out +++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query87.out @@ -6,49 +6,46 @@ PhysicalResultSink ------hashAgg[LOCAL] --------PhysicalProject ----------PhysicalExcept -------------PhysicalDistribute[DistributionSpecHash] ---------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ss_customer_sk] -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 -----------------------------PhysicalProject -------------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202)) ---------------------------------PhysicalOlapScan[date_dim] +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] ------------------PhysicalProject ---------------------PhysicalOlapScan[customer] -------------PhysicalDistribute[DistributionSpecHash] ---------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk] -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 -----------------------------PhysicalProject -------------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202)) ---------------------------------PhysicalOlapScan[date_dim] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ss_customer_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] ------------------PhysicalProject ---------------------PhysicalOlapScan[customer] -------------PhysicalDistribute[DistributionSpecHash] ---------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ws_bill_customer_sk] -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5 -----------------------------PhysicalProject -------------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202)) ---------------------------------PhysicalOlapScan[date_dim] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] ------------------PhysicalProject ---------------------PhysicalOlapScan[customer] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ws_bill_customer_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query38.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query38.out index 1a49401e891319..fc988652648dc2 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query38.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query38.out @@ -8,49 +8,46 @@ PhysicalResultSink ----------hashAgg[LOCAL] ------------PhysicalProject --------------PhysicalIntersect -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ws_bill_customer_sk] -----------------------hashAgg[GLOBAL] -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------hashAgg[LOCAL] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 ---------------------------------PhysicalProject -----------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189)) -------------------------------------PhysicalOlapScan[date_dim] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk] -----------------------hashAgg[GLOBAL] -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------hashAgg[LOCAL] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 ---------------------------------PhysicalProject -----------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189)) -------------------------------------PhysicalOlapScan[date_dim] +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ws_bill_customer_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ss_customer_sk] -----------------------hashAgg[GLOBAL] -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------hashAgg[LOCAL] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5 ---------------------------------PhysicalProject -----------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189)) -------------------------------------PhysicalOlapScan[date_dim] +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ss_customer_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query87.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query87.out index 4f102613865865..9f5547c4459a45 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query87.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query87.out @@ -6,49 +6,46 @@ PhysicalResultSink ------hashAgg[LOCAL] --------PhysicalProject ----------PhysicalExcept -------------PhysicalDistribute[DistributionSpecHash] ---------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ss_customer_sk] -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 -----------------------------PhysicalProject -------------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202)) ---------------------------------PhysicalOlapScan[date_dim] +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] ------------------PhysicalProject ---------------------PhysicalOlapScan[customer] -------------PhysicalDistribute[DistributionSpecHash] ---------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk] -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 -----------------------------PhysicalProject -------------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202)) ---------------------------------PhysicalOlapScan[date_dim] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ss_customer_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] ------------------PhysicalProject ---------------------PhysicalOlapScan[customer] -------------PhysicalDistribute[DistributionSpecHash] ---------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ws_bill_customer_sk] -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5 -----------------------------PhysicalProject -------------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202)) ---------------------------------PhysicalOlapScan[date_dim] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] ------------------PhysicalProject ---------------------PhysicalOlapScan[customer] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ws_bill_customer_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] From 88551ea2a3b621a955d4c75d2a8570aa417eda94 Mon Sep 17 00:00:00 2001 From: feiniaofeiafei Date: Tue, 6 Jan 2026 15:38:42 +0800 Subject: [PATCH 6/7] add test --- .../agg_join_pkfk/agg_join_pkfk.groovy | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 regression-test/suites/nereids_rules_p0/agg_join_pkfk/agg_join_pkfk.groovy diff --git a/regression-test/suites/nereids_rules_p0/agg_join_pkfk/agg_join_pkfk.groovy b/regression-test/suites/nereids_rules_p0/agg_join_pkfk/agg_join_pkfk.groovy new file mode 100644 index 00000000000000..cdda1babad70a9 --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/agg_join_pkfk/agg_join_pkfk.groovy @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +suite("agg_join_pkfk") { + multi_sql """ + SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'; + set disable_join_reorder=true; + drop table if exists customer_test; + CREATE TABLE customer_test ( + c_customer_sk INT not null , + c_first_name VARCHAR(50), + c_last_name VARCHAR(50) + ); + drop table if exists store_sales_test; + CREATE TABLE store_sales_test ( + ss_customer_sk INT, + d_date DATE + ); + + INSERT INTO customer_test VALUES (1, 'John', 'Smith'); + INSERT INTO customer_test VALUES (2, 'John', 'Smith'); + + INSERT INTO store_sales_test VALUES (1, '2024-01-01'); + INSERT INTO store_sales_test VALUES (2, '2024-01-01'); + + alter table customer_test add constraint c_pk primary key (c_customer_sk); + alter table store_sales_test add constraint ss_c_fk foreign key (ss_customer_sk) references customer_test(c_customer_sk); + """ + explainAndOrderResult 'not_push_down', """ + SELECT DISTINCT c_last_name, c_first_name, d_date + FROM store_sales_test inner join customer_test + on store_sales_test.ss_customer_sk = customer_test.c_customer_sk; + """ + + explainAndOrderResult 'push_down', """ + SELECT DISTINCT c_first_name,c_customer_sk, d_date + FROM store_sales_test inner join customer_test + on store_sales_test.ss_customer_sk = customer_test.c_customer_sk; + """ + + explainAndOrderResult 'push_down_with_count', """ + SELECT c_first_name,c_customer_sk, d_date,count(c_customer_sk) from ( + select * + FROM store_sales_test inner join customer_test + on store_sales_test.ss_customer_sk = customer_test.c_customer_sk + ) t + group by c_first_name,c_customer_sk, d_date; + """ +} \ No newline at end of file From 3fd07b24ba3887e3a9519435205a4c4eb96bb393 Mon Sep 17 00:00:00 2001 From: feiniaofeiafei Date: Tue, 6 Jan 2026 17:36:28 +0800 Subject: [PATCH 7/7] fix --- .../suites/nereids_rules_p0/agg_join_pkfk/agg_join_pkfk.groovy | 1 + 1 file changed, 1 insertion(+) diff --git a/regression-test/suites/nereids_rules_p0/agg_join_pkfk/agg_join_pkfk.groovy b/regression-test/suites/nereids_rules_p0/agg_join_pkfk/agg_join_pkfk.groovy index cdda1babad70a9..778547830c1656 100644 --- a/regression-test/suites/nereids_rules_p0/agg_join_pkfk/agg_join_pkfk.groovy +++ b/regression-test/suites/nereids_rules_p0/agg_join_pkfk/agg_join_pkfk.groovy @@ -17,6 +17,7 @@ suite("agg_join_pkfk") { multi_sql """ SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'; + set runtime_filter_mode=OFF; set disable_join_reorder=true; drop table if exists customer_test; CREATE TABLE customer_test (