object JoinSelection extends Strategy with PredicateHelper with JoinSelectionHelper
Select the proper physical plan for join based on join strategy hints, the availability of equi-join keys and the sizes of joining relations. Below are the existing join strategies, their characteristics and their limitations.
- Broadcast hash join (BHJ): Only supported for equi-joins, while the join keys do not need to be sortable. Supported for all join types except full outer joins. BHJ usually performs faster than the other join algorithms when the broadcast side is small. However, broadcasting tables is a network-intensive operation and it could cause OOM or perform badly in some cases, especially when the build/broadcast side is big.
- Shuffle hash join: Only supported for equi-joins, while the join keys do not need to be sortable. Supported for all join types. Building hash map from table is a memory-intensive operation and it could cause OOM when the build side is big.
- Shuffle sort merge join (SMJ): Only supported for equi-joins and the join keys have to be sortable. Supported for all join types.
- Broadcast nested loop join (BNLJ): Supports both equi-joins and non-equi-joins. Supports all the join types, but the implementation is optimized for: 1) broadcasting the left side in a right outer join; 2) broadcasting the right side in a left outer, left semi, left anti or existence join; 3) broadcasting either side in an inner-like join. For other cases, we need to scan the data multiple times, which can be rather slow.
- Shuffle-and-replicate nested loop join (a.k.a. cartesian product join): Supports both equi-joins and non-equi-joins. Supports only inner like joins.
- Alphabetic
- By Inheritance
- JoinSelection
- JoinSelectionHelper
- PredicateHelper
- AliasHelper
- SparkStrategy
- GenericStrategy
- Logging
- AnyRef
- Any
- Hide All
- Show All
- Public
- Protected
Value Members
- final def !=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
- final def ##: Int
- Definition Classes
- AnyRef → Any
- final def ==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
- def apply(plan: LogicalPlan): Seq[SparkPlan]
- Definition Classes
- JoinSelection → GenericStrategy
- final def asInstanceOf[T0]: T0
- Definition Classes
- Any
- def buildBalancedPredicate(expressions: Seq[Expression], op: (Expression, Expression) => Expression): Expression
- Attributes
- protected
- Definition Classes
- PredicateHelper
- def canBroadcastBySize(plan: LogicalPlan, conf: SQLConf): Boolean
- Definition Classes
- JoinSelectionHelper
- def canBuildBroadcastLeft(joinType: JoinType): Boolean
- Definition Classes
- JoinSelectionHelper
- def canBuildBroadcastRight(joinType: JoinType): Boolean
- Definition Classes
- JoinSelectionHelper
- def canBuildShuffledHashJoinLeft(joinType: JoinType): Boolean
- Definition Classes
- JoinSelectionHelper
- def canBuildShuffledHashJoinRight(joinType: JoinType): Boolean
- Definition Classes
- JoinSelectionHelper
- def canEvaluate(expr: Expression, plan: LogicalPlan): Boolean
- Attributes
- protected
- Definition Classes
- PredicateHelper
- def canEvaluateWithinJoin(expr: Expression): Boolean
- Attributes
- protected
- Definition Classes
- PredicateHelper
- def canPlanAsBroadcastHashJoin(join: Join, conf: SQLConf): Boolean
- Definition Classes
- JoinSelectionHelper
- def canPruneLeft(joinType: JoinType): Boolean
- Definition Classes
- JoinSelectionHelper
- def canPruneRight(joinType: JoinType): Boolean
- Definition Classes
- JoinSelectionHelper
- def clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.CloneNotSupportedException]) @native()
- final def eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
- def equals(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef → Any
- def extractPredicatesWithinOutputSet(condition: Expression, outputSet: AttributeSet): Option[Expression]
- Attributes
- protected
- Definition Classes
- PredicateHelper
- def finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.Throwable])
- def findExpressionAndTrackLineageDown(exp: Expression, plan: LogicalPlan): Option[(Expression, LogicalPlan)]
- Definition Classes
- PredicateHelper
- def getAliasMap(exprs: Seq[NamedExpression]): AttributeMap[Alias]
- Attributes
- protected
- Definition Classes
- AliasHelper
- def getAliasMap(plan: Aggregate): AttributeMap[Alias]
- Attributes
- protected
- Definition Classes
- AliasHelper
- def getAliasMap(plan: Project): AttributeMap[Alias]
- Attributes
- protected
- Definition Classes
- AliasHelper
- def getBroadcastBuildSide(left: LogicalPlan, right: LogicalPlan, joinType: JoinType, hint: JoinHint, hintOnly: Boolean, conf: SQLConf): Option[BuildSide]
- Definition Classes
- JoinSelectionHelper
- final def getClass(): Class[_ <: AnyRef]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
- def getShuffleHashJoinBuildSide(left: LogicalPlan, right: LogicalPlan, joinType: JoinType, hint: JoinHint, hintOnly: Boolean, conf: SQLConf): Option[BuildSide]
- Definition Classes
- JoinSelectionHelper
- def getSmallerSide(left: LogicalPlan, right: LogicalPlan): BuildSide
- Definition Classes
- JoinSelectionHelper
- def hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
- def hintToBroadcastLeft(hint: JoinHint): Boolean
- Definition Classes
- JoinSelectionHelper
- def hintToBroadcastRight(hint: JoinHint): Boolean
- Definition Classes
- JoinSelectionHelper
- def hintToNotBroadcastLeft(hint: JoinHint): Boolean
- Definition Classes
- JoinSelectionHelper
- def hintToNotBroadcastRight(hint: JoinHint): Boolean
- Definition Classes
- JoinSelectionHelper
- def hintToPreferShuffleHashJoin(hint: JoinHint): Boolean
- Definition Classes
- JoinSelectionHelper
- def hintToPreferShuffleHashJoinLeft(hint: JoinHint): Boolean
- Definition Classes
- JoinSelectionHelper
- def hintToPreferShuffleHashJoinRight(hint: JoinHint): Boolean
- Definition Classes
- JoinSelectionHelper
- def hintToShuffleHashJoin(hint: JoinHint): Boolean
- Definition Classes
- JoinSelectionHelper
- def hintToShuffleHashJoinLeft(hint: JoinHint): Boolean
- Definition Classes
- JoinSelectionHelper
- def hintToShuffleHashJoinRight(hint: JoinHint): Boolean
- Definition Classes
- JoinSelectionHelper
- def hintToShuffleReplicateNL(hint: JoinHint): Boolean
- Definition Classes
- JoinSelectionHelper
- def hintToSortMergeJoin(hint: JoinHint): Boolean
- Definition Classes
- JoinSelectionHelper
- def initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
- Attributes
- protected
- Definition Classes
- Logging
- def initializeLogIfNecessary(isInterpreter: Boolean): Unit
- Attributes
- protected
- Definition Classes
- Logging
- final def isInstanceOf[T0]: Boolean
- Definition Classes
- Any
- def isLikelySelective(e: Expression): Boolean
- Definition Classes
- PredicateHelper
- def isNullIntolerant(expr: Expression): Boolean
- Attributes
- protected
- Definition Classes
- PredicateHelper
- def isTraceEnabled(): Boolean
- Attributes
- protected
- Definition Classes
- Logging
- def log: Logger
- Attributes
- protected
- Definition Classes
- Logging
- def logDebug(msg: => String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logDebug(msg: => String): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logError(msg: => String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logError(msg: => String): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logInfo(msg: => String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logInfo(msg: => String): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logName: String
- Attributes
- protected
- Definition Classes
- Logging
- def logTrace(msg: => String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logTrace(msg: => String): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logWarning(msg: => String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logWarning(msg: => String): Unit
- Attributes
- protected
- Definition Classes
- Logging
- final def ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
- final def notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
- final def notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
- def outputWithNullability(output: Seq[Attribute], nonNullAttrExprIds: Seq[ExprId]): Seq[Attribute]
- Attributes
- protected
- Definition Classes
- PredicateHelper
- def planLater(plan: LogicalPlan): SparkPlan
- Attributes
- protected
- Definition Classes
- SparkStrategy → GenericStrategy
- def replaceAlias(expr: Expression, aliasMap: AttributeMap[Alias]): Expression
- Attributes
- protected
- Definition Classes
- AliasHelper
- def replaceAliasButKeepName(expr: NamedExpression, aliasMap: AttributeMap[Alias]): NamedExpression
- Attributes
- protected
- Definition Classes
- AliasHelper
- def splitConjunctivePredicates(condition: Expression): Seq[Expression]
- Attributes
- protected
- Definition Classes
- PredicateHelper
- def splitDisjunctivePredicates(condition: Expression): Seq[Expression]
- Attributes
- protected
- Definition Classes
- PredicateHelper
- final def synchronized[T0](arg0: => T0): T0
- Definition Classes
- AnyRef
- def toString(): String
- Definition Classes
- AnyRef → Any
- def trimAliases(e: Expression): Expression
- Attributes
- protected
- Definition Classes
- AliasHelper
- def trimNonTopLevelAliases[T <: Expression](e: T): T
- Attributes
- protected
- Definition Classes
- AliasHelper
- final def wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.InterruptedException])
- final def wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.InterruptedException])
- final def wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.InterruptedException]) @native()