object JoinSelection extends Strategy with PredicateHelper with JoinSelectionHelper
Select the proper physical plan for join based on join strategy hints, the availability of equi-join keys and the sizes of joining relations. Below are the existing join strategies, their characteristics and their limitations.
- Broadcast hash join (BHJ): Only supported for equi-joins, while the join keys do not need to be sortable. Supported for all join types except full outer joins. BHJ usually performs faster than the other join algorithms when the broadcast side is small. However, broadcasting tables is a network-intensive operation and it could cause OOM or perform badly in some cases, especially when the build/broadcast side is big.
- Shuffle hash join: Only supported for equi-joins, while the join keys do not need to be sortable. Supported for all join types. Building hash map from table is a memory-intensive operation and it could cause OOM when the build side is big.
- Shuffle sort merge join (SMJ): Only supported for equi-joins and the join keys have to be sortable. Supported for all join types.
- Broadcast nested loop join (BNLJ): Supports both equi-joins and non-equi-joins. Supports all the join types, but the implementation is optimized for: 1) broadcasting the left side in a right outer join; 2) broadcasting the right side in a left outer, left semi, left anti or existence join; 3) broadcasting either side in an inner-like join. For other cases, we need to scan the data multiple times, which can be rather slow.
- Shuffle-and-replicate nested loop join (a.k.a. cartesian product join): Supports both equi-joins and non-equi-joins. Supports only inner like joins.
- Alphabetic
- By Inheritance
- JoinSelection
- JoinSelectionHelper
- PredicateHelper
- AliasHelper
- SparkStrategy
- GenericStrategy
- Logging
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
apply(plan: LogicalPlan): Seq[SparkPlan]
- Definition Classes
- JoinSelection → GenericStrategy
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
buildBalancedPredicate(expressions: Seq[Expression], op: (Expression, Expression) ⇒ Expression): Expression
- Attributes
- protected
- Definition Classes
- PredicateHelper
-
def
canBroadcastBySize(plan: LogicalPlan, conf: SQLConf): Boolean
- Definition Classes
- JoinSelectionHelper
-
def
canBuildBroadcastLeft(joinType: JoinType): Boolean
- Definition Classes
- JoinSelectionHelper
-
def
canBuildBroadcastRight(joinType: JoinType): Boolean
- Definition Classes
- JoinSelectionHelper
-
def
canBuildShuffledHashJoinLeft(joinType: JoinType): Boolean
- Definition Classes
- JoinSelectionHelper
-
def
canBuildShuffledHashJoinRight(joinType: JoinType): Boolean
- Definition Classes
- JoinSelectionHelper
-
def
canEvaluate(expr: Expression, plan: LogicalPlan): Boolean
- Attributes
- protected
- Definition Classes
- PredicateHelper
-
def
canEvaluateWithinJoin(expr: Expression): Boolean
- Attributes
- protected
- Definition Classes
- PredicateHelper
-
def
canPlanAsBroadcastHashJoin(join: Join, conf: SQLConf): Boolean
- Definition Classes
- JoinSelectionHelper
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
extractPredicatesWithinOutputSet(condition: Expression, outputSet: AttributeSet): Option[Expression]
- Attributes
- protected
- Definition Classes
- PredicateHelper
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
def
findExpressionAndTrackLineageDown(exp: Expression, plan: LogicalPlan): Option[(Expression, LogicalPlan)]
- Definition Classes
- PredicateHelper
-
def
getAliasMap(exprs: Seq[NamedExpression]): AttributeMap[Alias]
- Attributes
- protected
- Definition Classes
- AliasHelper
-
def
getAliasMap(plan: Aggregate): AttributeMap[Alias]
- Attributes
- protected
- Definition Classes
- AliasHelper
-
def
getAliasMap(plan: Project): AttributeMap[Alias]
- Attributes
- protected
- Definition Classes
- AliasHelper
-
def
getBroadcastBuildSide(left: LogicalPlan, right: LogicalPlan, joinType: JoinType, hint: JoinHint, hintOnly: Boolean, conf: SQLConf): Option[BuildSide]
- Definition Classes
- JoinSelectionHelper
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
getShuffleHashJoinBuildSide(left: LogicalPlan, right: LogicalPlan, joinType: JoinType, hint: JoinHint, hintOnly: Boolean, conf: SQLConf): Option[BuildSide]
- Definition Classes
- JoinSelectionHelper
-
def
getSmallerSide(left: LogicalPlan, right: LogicalPlan): BuildSide
- Definition Classes
- JoinSelectionHelper
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
hintToBroadcastLeft(hint: JoinHint): Boolean
- Definition Classes
- JoinSelectionHelper
-
def
hintToBroadcastRight(hint: JoinHint): Boolean
- Definition Classes
- JoinSelectionHelper
-
def
hintToNotBroadcastLeft(hint: JoinHint): Boolean
- Definition Classes
- JoinSelectionHelper
-
def
hintToNotBroadcastRight(hint: JoinHint): Boolean
- Definition Classes
- JoinSelectionHelper
-
def
hintToPreferShuffleHashJoinLeft(hint: JoinHint): Boolean
- Definition Classes
- JoinSelectionHelper
-
def
hintToPreferShuffleHashJoinRight(hint: JoinHint): Boolean
- Definition Classes
- JoinSelectionHelper
-
def
hintToShuffleHashJoinLeft(hint: JoinHint): Boolean
- Definition Classes
- JoinSelectionHelper
-
def
hintToShuffleHashJoinRight(hint: JoinHint): Boolean
- Definition Classes
- JoinSelectionHelper
-
def
hintToShuffleReplicateNL(hint: JoinHint): Boolean
- Definition Classes
- JoinSelectionHelper
-
def
hintToSortMergeJoin(hint: JoinHint): Boolean
- Definition Classes
- JoinSelectionHelper
-
def
initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
initializeLogIfNecessary(isInterpreter: Boolean): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
def
isNullIntolerant(expr: Expression): Boolean
- Attributes
- protected
- Definition Classes
- PredicateHelper
-
def
isTraceEnabled(): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
log: Logger
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logName: String
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
def
outputWithNullability(output: Seq[Attribute], nonNullAttrExprIds: Seq[ExprId]): Seq[Attribute]
- Attributes
- protected
- Definition Classes
- PredicateHelper
-
def
planLater(plan: LogicalPlan): SparkPlan
- Attributes
- protected
- Definition Classes
- SparkStrategy → GenericStrategy
-
def
replaceAlias(expr: Expression, aliasMap: AttributeMap[Alias]): Expression
- Attributes
- protected
- Definition Classes
- AliasHelper
-
def
replaceAliasButKeepName(expr: NamedExpression, aliasMap: AttributeMap[Alias]): NamedExpression
- Attributes
- protected
- Definition Classes
- AliasHelper
-
def
splitConjunctivePredicates(condition: Expression): Seq[Expression]
- Attributes
- protected
- Definition Classes
- PredicateHelper
-
def
splitDisjunctivePredicates(condition: Expression): Seq[Expression]
- Attributes
- protected
- Definition Classes
- PredicateHelper
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
def
trimAliases(e: Expression): Expression
- Attributes
- protected
- Definition Classes
- AliasHelper
-
def
trimNonTopLevelAliases[T <: Expression](e: T): T
- Attributes
- protected
- Definition Classes
- AliasHelper
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()