DataFrameFunctions

Instance Constructors

new DataFrameFunctions(df: DataFrame)

Value Members

final def !=(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def !=(arg0: Any): Boolean

Definition Classes
Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def ==(arg0: Any): Boolean

Definition Classes
Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def createMemSQLTableAs(dbName: String, tableName: String, dbHost: String = null, dbPort: Int = 1, user: String = null, password: String = null, ifNotExists: Boolean = false, keys: List[MemSQLKey] = List(), extraCols: List[MemSQLExtraColumn] = List(), useKeylessShardedOptimization: Boolean = false): DataFrame

Creates a MemSQL table with a schema matching the provided org.apache.spark.sql.DataFrame and loads the data into it.
Creates a MemSQL table with a schema matching the provided org.apache.spark.sql.DataFrame and loads the data into it.
If dbHost, dbPort, user and password are not specified, the com.memsql.spark.context.MemSQLContext will determine where each partition's data is sent. If the Spark executors are colocated with writable MemSQL nodes, then each Spark partition will insert into a randomly chosen colocated writable MemSQL node. If the Spark executors are not colocated with writable MemSQL nodes, Spark partitions will insert writable MemSQL nodes round robin.
dbName
The name of the database.
tableName
The name of the table.
dbHost
The host of the database.
dbPort
The port of the database.
user
The user for the database.
password
The password for the database.
ifNotExists
Use CREATE TABLE IF NOT EXISTS
keys
A scala.List of com.memsql.spark.connector.dataframe.MemSQLKey specifications to add to the CREATE TABLE statement.
extraCols
A scala.List of com.memsql.spark.connector.dataframe.MemSQLExtraColumn specifications to add to the CREATE TABLE statement.
useKeylessShardedOptimization
If set, data is loaded directly into leaf partitions. Can increase performance at the expense of higher variance sharding.
returns
A org.apache.spark.sql.DataFrame containing the schema and inserted rows in MemSQL.
def createMemSQLTableFromSchema(dbName: String, tableName: String, dbHost: String = null, dbPort: Int = 1, user: String = null, password: String = null, ifNotExists: Boolean = false, keys: List[MemSQLKey] = List(), extraCols: List[MemSQLExtraColumn] = List()): DataFrame

Creates a MemSQL table with a schema matching the provided org.apache.spark.sql.DataFrame.
Creates a MemSQL table with a schema matching the provided org.apache.spark.sql.DataFrame.
dbName
The name of the database.
tableName
The name of the table.
dbHost
The master aggregator host.
dbPort
The master aggregator port.
user
The user for the database.
password
The password for the database.
ifNotExists
Use CREATE TABLE IF NOT EXISTS
keys
A scala.List of com.memsql.spark.connector.dataframe.MemSQLKey specifications to add to the CREATE TABLE statement.
extraCols
A scala.List of com.memsql.spark.connector.dataframe.MemSQLExtraColumn specifications to add to the CREATE TABLE statement.
returns
A org.apache.spark.sql.DataFrame containing the schema and inserted rows in MemSQL.
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def saveToMemSQL(dbName: String, tableName: String, dbHost: String = null, dbPort: Int = 1, user: String = null, password: String = null, onDuplicateKeyBehavior: Option[OnDupKeyBehavior] = None, onDuplicateKeySql: String = "", upsertBatchSize: Int = ..., useKeylessShardedOptimization: Boolean = false): Long

Saves a Spark org.apache.spark.sql.DataFrame to a MemSQL table with the same column names.
Saves a Spark org.apache.spark.sql.DataFrame to a MemSQL table with the same column names.
If dbHost, dbPort, user and password are not specified, the com.memsql.spark.context.MemSQLContext will determine where each partition's data is sent. If the Spark executors are colocated with writable MemSQL nodes, then each Spark partition will insert into a randomly chosen colocated writable MemSQL node. If the Spark executors are not colocated with writable MemSQL nodes, Spark partitions will insert writable MemSQL nodes round robin.
dbName
The name of the database.
tableName
The name of the table.
dbHost
The host of the database.
dbPort
The port of the database.
user
The user for the database.
password
The password for the database.
onDuplicateKeyBehavior
How to handle duplicate key errors when inserting rows. If this is OnDupKeyBehavior.Replace, we will replace existing rows with the ones in rdd. If this is OnDupKeyBehavior.Ignore, we will leave existing rows as they are. If this is Update, we will use the SQL code in onDuplicateKeySql. If this is None, we will throw an error if there are any duplicate key errors.
onDuplicateKeySql
Optional SQL to include in the "ON DUPLICATE KEY UPDATE" clause of the INSERT queries we generate. If this is a non-empty string, onDuplicateKeyBehavior must be OnDupKeyBehavior.Update.
upsertBatchSize
How many rows to insert per INSERT query. Has no effect if onDuplicateKeySql is not specified.
useKeylessShardedOptimization
If set, data is loaded directly into leaf partitions. Can increase performance at the expense of higher variance sharding.
returns
The number of rows inserted into MemSQL.
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

class DataFrameFunctions extends Serializable

Instance Constructors

new DataFrameFunctions(df: DataFrame)

Value Members

final def !=(arg0: AnyRef): Boolean

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: AnyRef): Boolean

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def clone(): AnyRef

def createMemSQLTableFromSchema(dbName: String, tableName: String, dbHost: String = null, dbPort: Int = 1, user: String = null, password: String = null, ifNotExists: Boolean = false, keys: List[MemSQLKey] = List(), extraCols: List[MemSQLExtraColumn] = List()): DataFrame

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from Serializable

Inherited from Serializable

Inherited from AnyRef

Inherited from Any

Ungrouped