OdpsOps

Instance Constructors

new OdpsOps(sc: SparkContext, accessKeyId: String, accessKeySecret: String, odpsUrl: String, tunnelUrl: String)

Value Members

final def !=(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def !=(arg0: Any): Boolean

Definition Classes
Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def ==(arg0: Any): Boolean

Definition Classes
Any
val account: AliyunAccount
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
val dateFormat: SimpleDateFormat
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def fakeClassTag[T]: ClassTag[T]
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getColumnByIdx(project: String, table: String, idx: Int): (String, String)
def getColumnByName(project: String, table: String, name: String): (String, String)
def getTableSchema(project: String, table: String, isPartition: Boolean): Array[(String, String)]
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def isTraceEnabled(): Boolean

Attributes
protected
Definition Classes
Logging
def loadOdpsTable(sqlContext: SQLContext, project: String, table: String, cols: Array[Int], numPartition: Int): DataFrame

Load ODPS table into org.apache.spark.sql.DataFrame.
Load ODPS table into org.apache.spark.sql.DataFrame.
```
val sqlContext = ...
val odpsOps = ...
val odpstableDF = odpsOps.loadOdpsTable(sqlContext, "odps-project",
  "odps-table", Array(0, 2, 3), 2)
```
sqlContext
A Spark SQL context
project
The name of ODPS project.
table
The name of table, which job is reading.
cols
Implying to load which columns, i.e. Array(0, 1, 3).
numPartition
The number of RDD partition, implying the concurrency to read ODPS table.
returns
A DataFrame which contains relevant records of ODPS table.
def loadOdpsTable(sqlContext: SQLContext, project: String, table: String, partition: String, cols: Array[Int], numPartition: Int): DataFrame

Load ODPS table into org.apache.spark.sql.DataFrame.
Load ODPS table into org.apache.spark.sql.DataFrame.
```
val sqlContext = ...
val odpsOps = ...
val odpstableDF = odpsOps.loadOdpsTable(sqlContext, "odps-project",
  "odps-table", "odps-partition", Array(0, 2, 3), 2)
```
sqlContext
A Spark SQL context
project
The name of ODPS project.
table
The name of table, which job is reading.
partition
The name of partition, when job is reading a Partitioned Table, like pt='xxx',ds='yyy'.
cols
Implying to load which columns
numPartition
The number of RDD partition, implying the concurrency to read ODPS table.
returns
A DataFrame which contains relevant records of ODPS table.
def log: Logger

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logName: String

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
val odps: Odps
val odpsUtils: OdpsUtils
def readTable[T](project: String, table: String, transfer: (Record, TableSchema) ⇒ T, numPartition: Int)(implicit arg0: ClassTag[T]): RDD[T]

Read table from ODPS.
Read table from ODPS.
```
val odpsOps = ...
val odpsTable = odpsOps.readTable("odps-project", "odps-table", readFunc, 2)

def readFunc(record: Record, schema: TableSchema): Array[Long] = {
  val ret = new Array[Long](schema.getColumns.size())
  for (i <- 0 until schema.getColumns.size()) {
    ret(i) = record.getString(i).toLong
  }
  ret
}
```
project
The name of ODPS project.
table
The name of table, which job is reading.
transfer
A function for transferring ODPS table to org.apache.spark.rdd.RDD. We apply the function to all com.aliyun.odps.data.Record of table.
numPartition
The number of RDD partition, implying the concurrency to read ODPS table.
returns
A RDD which contains all records of ODPS table.

Annotations
@unchecked()
def readTable[T](project: String, table: String, partition: String, transfer: (Record, TableSchema) ⇒ T, numPartition: Int)(implicit arg0: ClassTag[T]): RDD[T]

Read table from ODPS.
Read table from ODPS.
```
val odpsOps = ...
val odpsTable = odpsOps.readTable("odps-project", "odps-table",
   "odps-partition", readFunc, 2)

def readFunc(record: Record, schema: TableSchema): Array[Long] = {
  val ret = new Array[Long](schema.getColumns.size())
  for (i <- 0 until schema.getColumns.size()) {
    ret(i) = record.getString(i).toLong
  }
  ret
}
```
project
The name of ODPS project.
table
The name of table, which job is reading.
partition
The name of partition, when job is reading a Partitioned Table, like pt='xxx',ds='yyy'.
transfer
A function for transferring ODPS table to org.apache.spark.rdd.RDD. We apply the function to all com.aliyun.odps.data.Record of table.
numPartition
The number of RDD partition, implying the concurrency to read ODPS table.
returns
A RDD which contains all records of ODPS table.

Annotations
@unchecked()
def readTableWithJava[R](project: String, table: String, transfer: Function2[Record, TableSchema, R], numPartition: Int): JavaRDD[R]

Read table from ODPS.
Read table from ODPS.
```
OdpsOps odpsOps = ...
static class RecordToLongs implements Function2<Record, TableSchema, List<Long>> {
  @Override
  public List<Long> call(Record record, TableSchema schema) throws Exception {
      List<Long> ret = new ArrayList<Long>();
    for (int i = 0; i < schema.getColumns().size(); i++) {
      ret.add(Long.valueOf(record.getString(i)));
    }
    return ret;
  }
 }

 JavaRDD<List<Long>> readData = odpsOps.readTableWithJava("odps-project",
     "odps-table", new RecordToLongs(), 2);
```
project
The name of ODPS project.
table
The name of table from which the job is reading
transfer
A function for transferring ODPS table to org.apache.spark.api.java.JavaRDD. We apply the function to all com.aliyun.odps.data.Record of table.
numPartition
The number of RDD partition, implying the concurrency to read ODPS table.
returns
A JavaRDD which contains all records of ODPS table.
def readTableWithJava[R](project: String, table: String, partition: String, transfer: Function2[Record, TableSchema, R], numPartition: Int): JavaRDD[R]

Read table from ODPS.
Read table from ODPS.
```
OdpsOps odpsOps = ...
static class RecordToLongs implements Function2<Record, TableSchema, List<Long>> {
  @Override
  public List<Long> call(Record record, TableSchema schema) throws Exception {
      List<Long> ret = new ArrayList<Long>();
      for (int i = 0; i < schema.getColumns().size(); i++) {
          ret.add(Long.valueOf(record.getString(i)));
      }
      return ret;
  }
 }

 JavaRDD<List<Long>> readData = odpsOps.readTableWithJava("odps-project",
     "odps-table", "odps-partition", new RecordToLongs(), 2);
```
project
The name of ODPS project.
table
The name of table, which job are reading.
partition
The name of partition, when job is reading a Partitioned Table, like pt='xxx',ds='yyy'.
transfer
A function for transferring ODPS table to org.apache.spark.api.java.JavaRDD. We apply the function to all com.aliyun.odps.data.Record of table.
numPartition
The number of RDD partition, implying the concurrency to read ODPS table.
returns
A JavaRDD which contains all records of ODPS table.
def saveToTable[T](project: String, table: String, rdd: RDD[T], transfer: (T, Record, TableSchema) ⇒ Unit)(implicit arg0: ClassTag[T]): Unit

Save a RDD to ODPS table.
Save a RDD to ODPS table.
```
val odpsOps = ...
val data: RDD[Array[Long]] = ...
odps.saveToTable("odps-project", "odps-table", data, writeFunc)

def writeFunc(kv: Array[Long], record: Record, schema: TableSchema) {
  for (i <- 0 until schema.getColumns.size()) {
    record.setString(i, kv(i).toString)
  }
}
```
project
The name of ODPS project.
table
The name of table, which job is writing.
rdd
A org.apache.spark.rdd.RDD which will be written into a ODPS table.
transfer
A function for transferring org.apache.spark.rdd.RDD to ODPS table. We apply the function to all elements of RDD.

Annotations
@unchecked()
def saveToTable[T](project: String, table: String, partition: String, rdd: RDD[T], transfer: (T, Record, TableSchema) ⇒ Unit, defaultCreate: Boolean, overwrite: Boolean)(implicit arg0: ClassTag[T]): Unit

Save a RDD to ODPS table.
Save a RDD to ODPS table.
```
val odpsOps = ...
val data: RDD[Array[Long]] = ...
odps.saveToTable("odps-project", "odps-table", "odps-partition", data,
  writeFunc, false, false)

def writeFunc(kv: Array[Long], record: Record, schema: TableSchema) {
  for (i <- 0 until schema.getColumns.size()) {
    record.setString(i, kv(i).toString)
  }
}
```
project
The name of ODPS project.
table
The name of table, which job is writing.
partition
The name of partition, when job is writing a Partitioned Table, like pt='xxx',ds='yyy'.
rdd
A org.apache.spark.rdd.RDD which will be written into a ODPS table.
transfer
A function for transferring org.apache.spark.rdd.RDD to ODPS table. We apply the function to all elements of RDD.
defaultCreate
Implying whether to create a table partition, if the specific partition does not exist.
overwrite
Implying whether to overwrite the specific partition if exists. NOTE: only support overwriting partition, not table.

Annotations
@unchecked()
def saveToTable[T](project: String, table: String, partition: String, rdd: RDD[T], transfer: (T, Record, TableSchema) ⇒ Unit, defaultCreate: Boolean)(implicit arg0: ClassTag[T]): Unit

Save a RDD to ODPS table.
Save a RDD to ODPS table.
```
val odpsOps = ...
val data: RDD[Array[Long]] = ...
odps.saveToTable("odps-project", "odps-table", "odps-partition", data,
  writeFunc, false)

def writeFunc(kv: Array[Long], record: Record, schema: TableSchema) {
  for (i <- 0 until schema.getColumns.size()) {
    record.setString(i, kv(i).toString)
  }
}
```
project
The name of ODPS project.
table
The name of table, which job is writing.
partition
The name of partition, when job is writing a Partitioned Table, like pt='xxx',ds='yyy'.
rdd
A org.apache.spark.rdd.RDD which will be written into a ODPS table.
transfer
A function for transferring org.apache.spark.rdd.RDD to ODPS table. We apply the function to all elements of RDD.
defaultCreate
Implying whether to create a table partition, if the specific partition does not exist.
def saveToTable[T](project: String, table: String, partition: String, rdd: RDD[T], transfer: (T, Record, TableSchema) ⇒ Unit)(implicit arg0: ClassTag[T]): Unit

Save a RDD to ODPS table.
Save a RDD to ODPS table.
```
val odpsOps = ...
val data: RDD[Array[Long]] = ...
odps.saveToTable("odps-project", "odps-table", "odps-partition", data,
  writeFunc)

def writeFunc(kv: Array[Long], record: Record, schema: TableSchema) {
  for (i <- 0 until schema.getColumns.size()) {
    record.setString(i, kv(i).toString)
  }
}
```
project
The name of ODPS project.
table
The name of table, which job is writing.
partition
The name of partition, when job is writing a Partitioned Table, like pt='xxx',ds='yyy'.
rdd
A org.apache.spark.rdd.RDD which will be written into a ODPS table.
transfer
A function for transferring org.apache.spark.rdd.RDD to ODPS table. We apply the function to all elements of RDD.

def saveToTableWithJava[T](project: String, table: String, javaRdd: JavaRDD[T], transfer: Function3[T, Record, TableSchema, Unit]): Unit

Save a RDD to ODPS table.

OdpsOps odpsOps = ...
JavaRDD<List<Long>> data = ...
static class SaveRecord implements Function3<List<Long>, Record,
   TableSchema, BoxedUnit> {
  @Override
  public BoxedUnit call(List<Long> data, Record record, TableSchema schema)
     throws Exception {
    for (int i = 0; i < schema.getColumns().size(); i++) {
      record.setString(i, data.get(i).toString());
    }
    return null;
  }
}

odpsOps.saveToTableWithJava("odps-project", "odps-table", data,
   new SaveRecord());

project: The name of ODPS project.
table: The name of table to which the job is writing.
javaRdd: A org.apache.spark.api.java.JavaRDD which will be written into a ODPS table.
transfer: A function for transferring org.apache.spark.api.java.JavaRDD to ODPS table. We apply the function to all elements of JavaRDD.

def saveToTableWithJava[T](project: String, table: String, partition: String, javaRdd: JavaRDD[T], transfer: Function3[T, Record, TableSchema, Unit], defaultCreate: Boolean, overwrite: Boolean): Unit

Save a RDD to ODPS table.
Save a RDD to ODPS table.
```
OdpsOps odpsOps = ...
JavaRDD<List<Long>> data = ...
static class SaveRecord implements Function3<List<Long>, Record,
  TableSchema, BoxedUnit> {
  @Override
  public BoxedUnit call(List<Long> data, Record record, TableSchema schema)
    throws Exception {
    for (int i = 0; i < schema.getColumns().size(); i++) {
      record.setString(i, data.get(i).toString());
    }
    return null;
  }
}

odpsOps.saveToTableWithJava("odps-project", "odps-table", "odps-partition",
   data, new SaveRecord(), false, false);
```
project
The name of ODPS project.
table
The name of table to which the job is writing.
partition
The name of partition, when job is writing a Partitioned Table, like pt='xxx',ds='yyy'.
javaRdd
A org.apache.spark.api.java.JavaRDD which will be written into a ODPS table.
transfer
A function for transferring org.apache.spark.api.java.JavaRDD to ODPS table. We apply the function to all elements of JavaRDD.
defaultCreate
Implying whether to create a table partition, if the specific partition does not exist.
overwrite
Implying whether to overwrite the specific partition if exists. NOTE: only support overwriting partition, not table.
def saveToTableWithJava[T](project: String, table: String, partition: String, javaRdd: JavaRDD[T], transfer: Function3[T, Record, TableSchema, Unit], defaultCreate: Boolean): Unit

Save a RDD to ODPS table.
Save a RDD to ODPS table.
```
OdpsOps odpsOps = ...
JavaRDD<List<Long>> data = ...
static class SaveRecord implements Function3<List<Long>, Record,
    TableSchema, BoxedUnit> {
  @Override
  public BoxedUnit call(List<Long> data, Record record, TableSchema schema)
     throws Exception {
    for (int i = 0; i < schema.getColumns().size(); i++) {
      record.setString(i, data.get(i).toString());
    }
    return null;
  }
}

odpsOps.saveToTableWithJava("odps-project", "odps-table",
   "odps-partition", data, new SaveRecord(), false);
```
project
The name of ODPS project.
table
The name of table to which the job is writing.
partition
The name of partition, when job is writing a Partitioned Table, like pt='xxx',ds='yyy'.
javaRdd
A org.apache.spark.api.java.JavaRDD which will be written into a ODPS table.
transfer
A function for transferring org.apache.spark.api.java.JavaRDD to ODPS table.We apply the function to all elements of JavaRDD.
defaultCreate
Implying whether to create a table partition, if specific partition does not exist.
def saveToTableWithJava[T](project: String, table: String, partition: String, javaRdd: JavaRDD[T], transfer: Function3[T, Record, TableSchema, Unit]): Unit

Save a RDD to ODPS table.
Save a RDD to ODPS table.
```
OdpsOps odpsOps = ...
JavaRDD<List<Long>> data = ...
static class SaveRecord implements Function3<List<Long>,
     Record, TableSchema, BoxedUnit> {
  @Override
  public BoxedUnit call(List<Long> data, Record record, TableSchema schema)
      throws Exception {
    for (int i = 0; i < schema.getColumns().size(); i++) {
      record.setString(i, data.get(i).toString());
    }
    return null;
  }
}

odpsOps.saveToTableWithJava("odps-project", "odps-table",
   "odps-partition", data, new SaveRecord());
```
project
The name of ODPS project.
table
The name of table to which the job is writing.
partition
The name of partition, when job is writing a Partitioned Table, like pt='xxx',ds='yyy'.
javaRdd
A org.apache.spark.api.java.JavaRDD which will be written into a ODPS table.
transfer
A function for transferring org.apache.spark.api.java.JavaRDD to ODPS table. We apply the function to all elements of JavaRDD.
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
val tunnel: TableTunnel
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

class OdpsOps extends Logging with Serializable

Instance Constructors

new OdpsOps(sc: SparkContext, accessKeyId: String, accessKeySecret: String, odpsUrl: String, tunnelUrl: String)

Value Members

final def !=(arg0: AnyRef): Boolean

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: AnyRef): Boolean

final def ==(arg0: Any): Boolean

val account: AliyunAccount

final def asInstanceOf[T0]: T0

def clone(): AnyRef

val dateFormat: SimpleDateFormat

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def fakeClassTag[T]: ClassTag[T]

def finalize(): Unit

final def getClass(): Class[_]

def getColumnByIdx(project: String, table: String, idx: Int): (String, String)

def getColumnByName(project: String, table: String, name: String): (String, String)

def getTableSchema(project: String, table: String, isPartition: Boolean): Array[(String, String)]

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

def isTraceEnabled(): Boolean

def loadOdpsTable(sqlContext: SQLContext, project: String, table: String, cols: Array[Int], numPartition: Int): DataFrame

def loadOdpsTable(sqlContext: SQLContext, project: String, table: String, partition: String, cols: Array[Int], numPartition: Int): DataFrame

def log: Logger

def logDebug(msg: ⇒ String, throwable: Throwable): Unit

def logDebug(msg: ⇒ String): Unit

def logError(msg: ⇒ String, throwable: Throwable): Unit

def logError(msg: ⇒ String): Unit

def logInfo(msg: ⇒ String, throwable: Throwable): Unit

def logInfo(msg: ⇒ String): Unit

def logName: String

def logTrace(msg: ⇒ String, throwable: Throwable): Unit

def logTrace(msg: ⇒ String): Unit

def logWarning(msg: ⇒ String, throwable: Throwable): Unit

def logWarning(msg: ⇒ String): Unit

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

val odps: Odps

val odpsUtils: OdpsUtils

def readTable[T](project: String, table: String, transfer: (Record, TableSchema) ⇒ T, numPartition: Int)(implicit arg0: ClassTag[T]): RDD[T]

def readTable[T](project: String, table: String, partition: String, transfer: (Record, TableSchema) ⇒ T, numPartition: Int)(implicit arg0: ClassTag[T]): RDD[T]

def readTableWithJava[R](project: String, table: String, transfer: Function2[Record, TableSchema, R], numPartition: Int): JavaRDD[R]

def readTableWithJava[R](project: String, table: String, partition: String, transfer: Function2[Record, TableSchema, R], numPartition: Int): JavaRDD[R]

def saveToTable[T](project: String, table: String, rdd: RDD[T], transfer: (T, Record, TableSchema) ⇒ Unit)(implicit arg0: ClassTag[T]): Unit

def saveToTable[T](project: String, table: String, partition: String, rdd: RDD[T], transfer: (T, Record, TableSchema) ⇒ Unit, defaultCreate: Boolean, overwrite: Boolean)(implicit arg0: ClassTag[T]): Unit

def saveToTable[T](project: String, table: String, partition: String, rdd: RDD[T], transfer: (T, Record, TableSchema) ⇒ Unit, defaultCreate: Boolean)(implicit arg0: ClassTag[T]): Unit

def saveToTable[T](project: String, table: String, partition: String, rdd: RDD[T], transfer: (T, Record, TableSchema) ⇒ Unit)(implicit arg0: ClassTag[T]): Unit

def saveToTableWithJava[T](project: String, table: String, javaRdd: JavaRDD[T], transfer: Function3[T, Record, TableSchema, Unit]): Unit

def saveToTableWithJava[T](project: String, table: String, partition: String, javaRdd: JavaRDD[T], transfer: Function3[T, Record, TableSchema, Unit], defaultCreate: Boolean, overwrite: Boolean): Unit

def saveToTableWithJava[T](project: String, table: String, partition: String, javaRdd: JavaRDD[T], transfer: Function3[T, Record, TableSchema, Unit], defaultCreate: Boolean): Unit

def saveToTableWithJava[T](project: String, table: String, partition: String, javaRdd: JavaRDD[T], transfer: Function3[T, Record, TableSchema, Unit]): Unit

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

val tunnel: TableTunnel

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from Serializable

Inherited from Serializable

Inherited from Logging

Inherited from AnyRef

Inherited from Any

Ungrouped