BigQuerySparkJob

Instance Constructors

new BigQuerySparkJob(cliConfig: BigQueryLoadConfig, maybeSchema: Option[Schema] = None)(implicit settings: Settings)

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def analyze(fullTableName: String): Any

Attributes
protected
Definition Classes
SparkJob
def appendToFile(storageHandler: StorageHandler, dataToSave: DataFrame, path: Path): Unit

Saves a dataset.
Saves a dataset. If the path is empty (the first time we call metrics on the schema) then we can write.
If there's already parquet files stored in it, then create a temporary directory to compute on, and flush the path to move updated metrics in it
dataToSave
: dataset to be saved
path
: Path to save the file at

Attributes
protected
Definition Classes
SparkJob
def applyTableIamPolicy(tableId: TableId, rls: RowLevelSecurity): Policy

To set access control on a table or view, we can use Identity and Access Management (IAM) policy After you create a table or view, you can set its policy with a set-iam-policy call For each call, we compare if the existing policy is equal to the defined one (in the Yaml file) If it's the case, we do nothing, otherwise we update the Table policy
To set access control on a table or view, we can use Identity and Access Management (IAM) policy After you create a table or view, you can set its policy with a set-iam-policy call For each call, we compare if the existing policy is equal to the defined one (in the Yaml file) If it's the case, we do nothing, otherwise we update the Table policy

Definition Classes
BigQueryJobBase
final def asInstanceOf[T0]: T0

Definition Classes
Any
val bigquery: BigQuery

Definition Classes
BigQueryJobBase
val bqTable: String

Definition Classes
BigQueryJobBase
val bucket: String
val cliConfig: BigQueryLoadConfig

Definition Classes
BigQuerySparkJob → BigQueryJobBase
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
val conf: Configuration
def createViews(views: Views, sqlParameters: Map[String, String]): Unit

Attributes
protected
Definition Classes
SparkJob
val datasetId: DatasetId

Definition Classes
BigQueryJobBase
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getOrCreateDataset(): Dataset

Definition Classes
BigQueryJobBase
def getOrCreateTable(dataFrame: Option[DataFrame], maybeSchema: Option[Schema]): (Table, StandardTableDefinition)
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
val logger: Logger

Attributes
protected
Definition Classes
StrictLogging
def name: String

Definition Classes
BigQuerySparkJob → JobBase
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def partitionDataset(dataset: DataFrame, partition: List[String]): DataFrame

Attributes
protected
Definition Classes
SparkJob
def partitionedDatasetWriter(dataset: DataFrame, partition: List[String]): DataFrameWriter[Row]

Partition a dataset using dataset columns.
Partition a dataset using dataset columns. To partition the dataset using the ingestion time, use the reserved column names :
- comet_date
- comet_year
- comet_month
- comet_day
- comet_hour
- comet_minute These columns are renamed to "date", "year", "month", "day", "hour", "minute" in the dataset and their values is set to the current date/time.
dataset
: Input dataset
partition
: list of columns to use for partitioning.
returns
The Spark session used to run this job

Attributes
protected
Definition Classes
SparkJob
def prepareConf(): Configuration
def prepareRLS(): List[String]

Definition Classes
BigQueryJobBase
val projectId: String

Definition Classes
BigQuerySparkJob → BigQueryJobBase
def registerUdf(udf: String): Unit

Attributes
protected
Definition Classes
SparkJob
def run(): Try[JobResult]

Just to force any spark job to implement its entry point within the "run" method
Just to force any spark job to implement its entry point within the "run" method
returns
: Spark Session used for the job

Definition Classes
BigQuerySparkJob → JobBase
def runJob(statement: String, location: String): Job

Definition Classes
BigQueryJobBase
def runSparkConnector(): Try[SparkJobResult]
lazy val session: SparkSession

Definition Classes
SparkJob
implicit val settings: Settings

Definition Classes
BigQuerySparkJob → JobBase
lazy val sparkEnv: SparkEnv

Definition Classes
SparkJob
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
val tableId: TableId

Definition Classes
BigQueryJobBase
def timePartitioning(partitionField: String, days: Option[Int] = None, requirePartitionFilter: Boolean): Builder

Definition Classes
BigQueryJobBase
def toString(): String

Definition Classes
AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Docs: object BigQuerySparkJob | package bqload

class BigQuerySparkJob extends SparkJob with BigQueryJobBase

Instance Constructors

new BigQuerySparkJob(cliConfig: BigQueryLoadConfig, maybeSchema: Option[Schema] = None)(implicit settings: Settings)

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

def analyze(fullTableName: String): Any

def appendToFile(storageHandler: StorageHandler, dataToSave: DataFrame, path: Path): Unit

def applyTableIamPolicy(tableId: TableId, rls: RowLevelSecurity): Policy

final def asInstanceOf[T0]: T0

val bigquery: BigQuery

val bqTable: String

val bucket: String

val cliConfig: BigQueryLoadConfig

def clone(): AnyRef

val conf: Configuration

def createViews(views: Views, sqlParameters: Map[String, String]): Unit

val datasetId: DatasetId

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def getOrCreateDataset(): Dataset

def getOrCreateTable(dataFrame: Option[DataFrame], maybeSchema: Option[Schema]): (Table, StandardTableDefinition)

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

val logger: Logger

def name: String

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

def partitionDataset(dataset: DataFrame, partition: List[String]): DataFrame

def partitionedDatasetWriter(dataset: DataFrame, partition: List[String]): DataFrameWriter[Row]

def prepareConf(): Configuration

def prepareRLS(): List[String]

val projectId: String

def registerUdf(udf: String): Unit

def run(): Try[JobResult]

def runJob(statement: String, location: String): Job

def runSparkConnector(): Try[SparkJobResult]

lazy val session: SparkSession

implicit val settings: Settings

lazy val sparkEnv: SparkEnv

final def synchronized[T0](arg0: ⇒ T0): T0

val tableId: TableId

def timePartitioning(partitionField: String, days: Option[Int] = None, requirePartitionFilter: Boolean): Builder

def toString(): String

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from BigQueryJobBase

Inherited from SparkJob

Inherited from JobBase

Inherited from StrictLogging

Inherited from AnyRef

Inherited from Any

Ungrouped