object KyuubiArrowConverters extends SQLConfHelper with Logging
- Alphabetic
- By Inheritance
- KyuubiArrowConverters
- Logging
- SQLConfHelper
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Type Members
- type Batch = (Array[Byte], Long)
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
conf: SQLConf
- Definition Classes
- SQLConfHelper
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
initializeLogIfNecessary(isInterpreter: Boolean): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
def
isTraceEnabled(): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
log: Logger
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logName: String
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
def
slice(schema: StructType, timeZoneId: String, bytes: Array[Byte], start: Int, length: Int): Array[Byte]
this method is to slice the input Arrow record batch byte array
bytes
, starting fromstart
and takinglength
number of elements. -
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
takeAsArrowBatches(collectLimitExec: CollectLimitExec, maxRecordsPerBatch: Long, maxEstimatedBatchSize: Long, timeZoneId: String): Array[Batch]
Forked from
org.apache.spark.sql.execution.SparkPlan#executeTake()
, the algorithm can be summarized in the following steps: 1.Forked from
org.apache.spark.sql.execution.SparkPlan#executeTake()
, the algorithm can be summarized in the following steps: 1. If the limit specified in the CollectLimitExec object is 0, the function returns an empty array of batches. 2. Otherwise, execute the child query plan of the CollectLimitExec object to obtain an RDD of data to collect. 3. Use an iterative approach to collect data in batches until the specified limit is reached. In each iteration, it selects a subset of the partitions of the RDD to scan and tries to collect data from them. 4. For each partition subset, we use the runJob method of the Spark context to execute a closure that scans the partition data and converts it to Arrow batches. 5. Check if the collected data reaches the specified limit. If not, it selects another subset of partitions to scan and repeats the process until the limit is reached or all partitions have been scanned. 6. Return an array of all the collected Arrow batches.Note that: 1. The returned Arrow batches row count >= limit, if the input df has more than the
limit
row count 2. We don't implement thetakeFromEnd
logical -
def
toBatchIterator(rowIter: Iterator[InternalRow], schema: StructType, maxRecordsPerBatch: Long, maxEstimatedBatchSize: Long, limit: Long, timeZoneId: String): ArrowBatchIterator
Different from org.apache.spark.sql.execution.arrow.ArrowConverters.toBatchIterator, each output arrow batch contains this batch row count.
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()