-
public final class ApiV1Kt
-
-
Field Summary
Fields Modifier and Type Field Description private final Map<KClass<?>, Encoder<?>>
ENCODERS
private final SparkContext
sparkContext
-
Method Summary
Modifier and Type Method Description final Map<KClass<?>, Encoder<?>>
getENCODERS()
final SparkContext
getSparkContext()
final <T extends Any> Dataset<T>
toDS(SparkSession $self, List<T> list)
Utility method to create dataset from list final <T extends Any> Dataset<T>
toDS(List<T> $self, SparkSession spark)
Utility method to create dataset from list final <T extends Any> Dataset<T>
dsOf(SparkSession $self, Array<Out T> t)
Utility method to create dataset from list final <T extends Any> Encoder<T>
encoder()
Main method of API, which gives you seamless integration with Spark: It creates encoder for any given supported type TSupported types are data classes, primitives, and Lists, Maps and Arrays containing them final <T extends Any> Encoder<T>
generateEncoder(KType type, KClass<?> cls)
final <T extends Any, R extends Any> Dataset<R>
map(Dataset<T> $self, Function1<T, R> func)
final <T extends Any, R extends Any> Dataset<R>
flatMap(Dataset<T> $self, Function1<T, Iterator<R>> func)
final <T extends Any, I extends Iterable<T>> Dataset<T>
flatten(Dataset<I> $self)
final <T extends Any, R extends Any> KeyValueGroupedDataset<R, T>
groupByKey(Dataset<T> $self, Function1<T, R> func)
final <T extends Any, R extends Any> Dataset<R>
mapPartitions(Dataset<T> $self, Function1<Iterator<T>, Iterator<R>> func)
final <T extends Any> Dataset<T>
filterNotNull(Dataset<T> $self)
final <KEY extends Any, VALUE extends Any, R extends Any> KeyValueGroupedDataset<KEY, R>
mapValues(KeyValueGroupedDataset<KEY, VALUE> $self, Function1<VALUE, R> func)
final <KEY extends Any, VALUE extends Any, R extends Any> Dataset<R>
mapGroups(KeyValueGroupedDataset<KEY, VALUE> $self, Function2<KEY, Iterator<VALUE>, R> func)
final <KEY extends Any, VALUE extends Any> Dataset<Pair<KEY, VALUE>>
reduceGroups(KeyValueGroupedDataset<KEY, VALUE> $self, Function2<VALUE, VALUE, VALUE> func)
final <T extends Any, R extends Any> Dataset<R>
downcast(Dataset<T> $self)
final <R extends Any> Dataset<R>
as(Dataset<?> $self)
final <R extends Any> Dataset<R>
to(Dataset<?> $self)
final <T extends Any> Unit
forEach(Dataset<T> $self, Function1<T, Unit> func)
final <T extends Any> Dataset<T>
debugCodegen(Dataset<T> $self)
It's hard to call Dataset.debugCodegen from kotlin, so here is utility for that final <T extends Any> Dataset<T>
debug(Dataset<T> $self)
It's hard to call Dataset.debug from kotlin, so here is utility for that final Column
eq(Column $self, Column c)
final Column
==(Column $self, Column c)
final Column
&&(Column $self, Column c)
final Column
lit(Object a)
final <L extends Any, R extends Any> Dataset<Pair<L, R>>
leftJoin(Dataset<L> $self, Dataset<R> right, Column col)
Alias for Dataset.joinWith which passes "left" argument and respects the fact that in result of left join right relation is nullable final <L extends Any, R extends Any> Dataset<Pair<L, R>>
rightJoin(Dataset<L> $self, Dataset<R> right, Column col)
Alias for Dataset.joinWith which passes "right" argument and respects the fact that in result of right join left relation is nullable final <L extends Any, R extends Any> Dataset<Pair<L, R>>
innerJoin(Dataset<L> $self, Dataset<R> right, Column col)
Alias for Dataset.joinWith which passes "inner" argument final <L extends Any, R extends Any> Dataset<Pair<L, R>>
fullJoin(Dataset<L> $self, Dataset<R> right, Column col)
Alias for Dataset.joinWith which passes "full" argument and respects the fact that in result of join any element of resulting tuple is nullable final <T extends Any> Dataset<T>
sort(Dataset<T> $self, Function1<Dataset<T>, Array<Column>> columns)
Alias for Dataset.sort which forces user to provide sorted columns from the source dataset final <T extends Any, R extends Any> R
withCached(Dataset<T> $self, Boolean blockingUnpersist, Function1<Dataset<T>, R> executeOnCached)
This function creates block, where one can call any further computations on already cached dataset Data will be unpersisted automatically at the end of computationit may be useful in many situations, for example, when one needs to write data to several targetsds.withCached {
write()
.also { it.orc("First destination") }
.also { it.avro("Second destination") }
}final <T extends Any> List<T>
toList(Dataset<Row> $self)
final <R extends Any> Array<R>
toArray(Dataset<?> $self)
final <T extends Any> Dataset<T>
showDS(Dataset<T> $self, Integer numRows, Boolean truncate)
Alternative to Dataset.show which returns source dataset. final DataType
schema(KType type, Map<String, KType> map)
final Unit
setLogLevel(SparkContext $self, SparkLogLevel level)
-
-
Method Detail
-
getENCODERS
final Map<KClass<?>, Encoder<?>> getENCODERS()
-
getSparkContext
final SparkContext getSparkContext()
-
toDS
final <T extends Any> Dataset<T> toDS(SparkSession $self, List<T> list)
Utility method to create dataset from list
-
toDS
final <T extends Any> Dataset<T> toDS(List<T> $self, SparkSession spark)
Utility method to create dataset from list
-
dsOf
final <T extends Any> Dataset<T> dsOf(SparkSession $self, Array<Out T> t)
Utility method to create dataset from list
-
encoder
final <T extends Any> Encoder<T> encoder()
Main method of API, which gives you seamless integration with Spark: It creates encoder for any given supported type TSupported types are data classes, primitives, and Lists, Maps and Arrays containing them
-
flatMap
final <T extends Any, R extends Any> Dataset<R> flatMap(Dataset<T> $self, Function1<T, Iterator<R>> func)
-
groupByKey
final <T extends Any, R extends Any> KeyValueGroupedDataset<R, T> groupByKey(Dataset<T> $self, Function1<T, R> func)
-
mapPartitions
final <T extends Any, R extends Any> Dataset<R> mapPartitions(Dataset<T> $self, Function1<Iterator<T>, Iterator<R>> func)
-
mapValues
final <KEY extends Any, VALUE extends Any, R extends Any> KeyValueGroupedDataset<KEY, R> mapValues(KeyValueGroupedDataset<KEY, VALUE> $self, Function1<VALUE, R> func)
-
mapGroups
final <KEY extends Any, VALUE extends Any, R extends Any> Dataset<R> mapGroups(KeyValueGroupedDataset<KEY, VALUE> $self, Function2<KEY, Iterator<VALUE>, R> func)
-
reduceGroups
final <KEY extends Any, VALUE extends Any> Dataset<Pair<KEY, VALUE>> reduceGroups(KeyValueGroupedDataset<KEY, VALUE> $self, Function2<VALUE, VALUE, VALUE> func)
-
debugCodegen
final <T extends Any> Dataset<T> debugCodegen(Dataset<T> $self)
It's hard to call Dataset.debugCodegen from kotlin, so here is utility for that
-
debug
final <T extends Any> Dataset<T> debug(Dataset<T> $self)
It's hard to call Dataset.debug from kotlin, so here is utility for that
-
leftJoin
final <L extends Any, R extends Any> Dataset<Pair<L, R>> leftJoin(Dataset<L> $self, Dataset<R> right, Column col)
Alias for Dataset.joinWith which passes "left" argument and respects the fact that in result of left join right relation is nullable- Parameters:
right
- right datasetcol
- join condition
-
rightJoin
final <L extends Any, R extends Any> Dataset<Pair<L, R>> rightJoin(Dataset<L> $self, Dataset<R> right, Column col)
Alias for Dataset.joinWith which passes "right" argument and respects the fact that in result of right join left relation is nullable- Parameters:
right
- right datasetcol
- join condition
-
innerJoin
final <L extends Any, R extends Any> Dataset<Pair<L, R>> innerJoin(Dataset<L> $self, Dataset<R> right, Column col)
Alias for Dataset.joinWith which passes "inner" argument- Parameters:
right
- right datasetcol
- join condition
-
fullJoin
final <L extends Any, R extends Any> Dataset<Pair<L, R>> fullJoin(Dataset<L> $self, Dataset<R> right, Column col)
Alias for Dataset.joinWith which passes "full" argument and respects the fact that in result of join any element of resulting tuple is nullable- Parameters:
right
- right datasetcol
- join condition
-
sort
final <T extends Any> Dataset<T> sort(Dataset<T> $self, Function1<Dataset<T>, Array<Column>> columns)
Alias for Dataset.sort which forces user to provide sorted columns from the source dataset- Parameters:
columns
- producer of sort columns
-
withCached
final <T extends Any, R extends Any> R withCached(Dataset<T> $self, Boolean blockingUnpersist, Function1<Dataset<T>, R> executeOnCached)
This function creates block, where one can call any further computations on already cached dataset Data will be unpersisted automatically at the end of computationit may be useful in many situations, for example, when one needs to write data to several targetsds.withCached {
write()
.also { it.orc("First destination") }
.also { it.avro("Second destination") }
}- Parameters:
blockingUnpersist
- if execution should be blocked until everything persisted will be deletedexecuteOnCached
- Block which should be executed on cached dataset.
-
showDS
final <T extends Any> Dataset<T> showDS(Dataset<T> $self, Integer numRows, Boolean truncate)
Alternative to Dataset.show which returns source dataset.
-
setLogLevel
final Unit setLogLevel(SparkContext $self, SparkLogLevel level)
-
-
-
-