Class/Object

org.zouzias.spark.lucenerdd

LuceneRDD

Related Docs: object LuceneRDD | package lucenerdd

Permalink

class LuceneRDD[T] extends RDD[T] with LuceneRDDConfigurable

Spark RDD with Lucene's query capabilities (term, prefix, fuzzy, phrase query)

Linear Supertypes
LuceneRDDConfigurable, Configurable, RDD[T], Logging, Serializable, Serializable, AnyRef, Any
Known Subclasses
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. LuceneRDD
  2. LuceneRDDConfigurable
  3. Configurable
  4. RDD
  5. Logging
  6. Serializable
  7. Serializable
  8. AnyRef
  9. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Instance Constructors

  1. new LuceneRDD(partitionsRDD: RDD[AbstractLuceneRDDPartition[T]])(implicit arg0: ClassTag[T])

    Permalink

Value Members

  1. final def !=(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  3. def ++(other: RDD[T]): RDD[T]

    Permalink
    Definition Classes
    RDD
  4. final def ==(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  5. val DefaultFacetNum: Int

    Permalink
    Attributes
    protected
    Definition Classes
    LuceneRDDConfigurable
  6. val DefaultTopK: Int

    Permalink

    Default value for topK queries

    Default value for topK queries

    Attributes
    protected
    Definition Classes
    LuceneRDDConfigurable
  7. val MaxDefaultTopKValue: Int

    Permalink
    Attributes
    protected
    Definition Classes
    LuceneRDDConfigurable
  8. def aggregate[U](zeroValue: U)(seqOp: (U, T) ⇒ U, combOp: (U, U) ⇒ U)(implicit arg0: ClassTag[U]): U

    Permalink
    Definition Classes
    RDD
  9. final def asInstanceOf[T0]: T0

    Permalink
    Definition Classes
    Any
  10. def cache(): LuceneRDD.this.type

    Permalink
    Definition Classes
    LuceneRDD → RDD
  11. def cartesian[U](other: RDD[U])(implicit arg0: ClassTag[U]): RDD[(T, U)]

    Permalink
    Definition Classes
    RDD
  12. def checkpoint(): Unit

    Permalink
    Definition Classes
    RDD
  13. def clearDependencies(): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    RDD
  14. def clone(): AnyRef

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  15. def close(): Unit

    Permalink
  16. def coalesce(numPartitions: Int, shuffle: Boolean, partitionCoalescer: Option[PartitionCoalescer])(implicit ord: Ordering[T]): RDD[T]

    Permalink
    Definition Classes
    RDD
  17. def collect[U](f: PartialFunction[T, U])(implicit arg0: ClassTag[U]): RDD[U]

    Permalink
    Definition Classes
    RDD
  18. def collect(): Array[T]

    Permalink
    Definition Classes
    RDD
  19. def compute(part: Partition, context: TaskContext): Iterator[T]

    Permalink

    RDD compute method.

    RDD compute method.

    Definition Classes
    LuceneRDD → RDD
  20. val config: Config

    Permalink
    Definition Classes
    Configurable
  21. def context: SparkContext

    Permalink
    Definition Classes
    RDD
  22. def count(): Long

    Permalink
    Definition Classes
    LuceneRDD → RDD
  23. def countApprox(timeout: Long, confidence: Double): PartialResult[BoundedDouble]

    Permalink
    Definition Classes
    RDD
  24. def countApproxDistinct(relativeSD: Double): Long

    Permalink
    Definition Classes
    RDD
  25. def countApproxDistinct(p: Int, sp: Int): Long

    Permalink
    Definition Classes
    RDD
  26. def countByValue()(implicit ord: Ordering[T]): Map[T, Long]

    Permalink
    Definition Classes
    RDD
  27. def countByValueApprox(timeout: Long, confidence: Double)(implicit ord: Ordering[T]): PartialResult[Map[T, BoundedDouble]]

    Permalink
    Definition Classes
    RDD
  28. def dedup[T1](searchQueryGen: (T1) ⇒ String, topK: Int = DefaultTopK)(implicit arg0: ClassTag[T1]): RDD[(T1, Array[SparkScoreDoc])]

    Permalink

    Deduplication of self

    Deduplication of self

    searchQueryGen

    Search query mapper function

    topK

    Number of results to deduplication

  29. final def dependencies: Seq[Dependency[_]]

    Permalink
    Definition Classes
    RDD
  30. def distinct(): RDD[T]

    Permalink
    Definition Classes
    RDD
  31. def distinct(numPartitions: Int)(implicit ord: Ordering[T]): RDD[T]

    Permalink
    Definition Classes
    RDD
  32. final def eq(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  33. def equals(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  34. def exists(elem: T): Boolean

    Permalink
  35. def exists(doc: Map[String, String]): Boolean

    Permalink

    Lucene generic query

  36. def fields(): Set[String]

    Permalink

    Return all document fields

  37. def filter(pred: (T) ⇒ Boolean): LuceneRDD[T]

    Permalink
    Definition Classes
    LuceneRDD → RDD
  38. def finalize(): Unit

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  39. def first(): T

    Permalink
    Definition Classes
    RDD
  40. def firstParent[U](implicit arg0: ClassTag[U]): RDD[U]

    Permalink
    Attributes
    protected[org.apache.spark]
    Definition Classes
    RDD
  41. def flatMap[U](f: (T) ⇒ TraversableOnce[U])(implicit arg0: ClassTag[U]): RDD[U]

    Permalink
    Definition Classes
    RDD
  42. def fold(zeroValue: T)(op: (T, T) ⇒ T): T

    Permalink
    Definition Classes
    RDD
  43. def foreach(f: (T) ⇒ Unit): Unit

    Permalink
    Definition Classes
    RDD
  44. def foreachPartition(f: (Iterator[T]) ⇒ Unit): Unit

    Permalink
    Definition Classes
    RDD
  45. def fuzzyQuery(fieldName: String, query: String, maxEdits: Int, topK: Int = DefaultTopK): LuceneRDDResponse

    Permalink

    Lucene fuzzy query

    Lucene fuzzy query

    fieldName

    Name of field

    query

    Query text

    maxEdits

    Fuzziness, edit distance

    topK

    Number of documents to return

  46. def getCheckpointFile: Option[String]

    Permalink
    Definition Classes
    RDD
  47. final def getClass(): Class[_]

    Permalink
    Definition Classes
    AnyRef → Any
  48. def getDependencies: Seq[Dependency[_]]

    Permalink
    Attributes
    protected
    Definition Classes
    RDD
  49. final def getNumPartitions: Int

    Permalink
    Definition Classes
    RDD
    Annotations
    @Since( "1.6.0" )
  50. def getPartitions: Array[Partition]

    Permalink
    Attributes
    protected
    Definition Classes
    LuceneRDD → RDD
  51. def getPreferredLocations(s: Partition): Seq[String]

    Permalink
    Attributes
    protected
    Definition Classes
    LuceneRDD → RDD
  52. def getStorageLevel: StorageLevel

    Permalink
    Definition Classes
    RDD
  53. def glom(): RDD[Array[T]]

    Permalink
    Definition Classes
    RDD
  54. def groupBy[K](f: (T) ⇒ K, p: Partitioner)(implicit kt: ClassTag[K], ord: Ordering[K]): RDD[(K, Iterable[T])]

    Permalink
    Definition Classes
    RDD
  55. def groupBy[K](f: (T) ⇒ K, numPartitions: Int)(implicit kt: ClassTag[K]): RDD[(K, Iterable[T])]

    Permalink
    Definition Classes
    RDD
  56. def groupBy[K](f: (T) ⇒ K)(implicit kt: ClassTag[K]): RDD[(K, Iterable[T])]

    Permalink
    Definition Classes
    RDD
  57. def hashCode(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  58. val id: Int

    Permalink
    Definition Classes
    RDD
  59. def initializeLogIfNecessary(isInterpreter: Boolean): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  60. def intersection(other: RDD[T], numPartitions: Int): RDD[T]

    Permalink
    Definition Classes
    RDD
  61. def intersection(other: RDD[T], partitioner: Partitioner)(implicit ord: Ordering[T]): RDD[T]

    Permalink
    Definition Classes
    RDD
  62. def intersection(other: RDD[T]): RDD[T]

    Permalink
    Definition Classes
    RDD
  63. def isCheckpointed: Boolean

    Permalink
    Definition Classes
    RDD
  64. def isEmpty(): Boolean

    Permalink
    Definition Classes
    RDD
  65. final def isInstanceOf[T0]: Boolean

    Permalink
    Definition Classes
    Any
  66. def isTraceEnabled(): Boolean

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  67. final def iterator(split: Partition, context: TaskContext): Iterator[T]

    Permalink
    Definition Classes
    RDD
  68. def keyBy[K](f: (T) ⇒ K): RDD[(K, T)]

    Permalink
    Definition Classes
    RDD
  69. def link[T1](other: RDD[T1], searchQueryGen: (T1) ⇒ String, topK: Int = DefaultTopK)(implicit arg0: ClassTag[T1]): RDD[(T1, Array[SparkScoreDoc])]

    Permalink

    Entity linkage via Lucene query over all elements of an RDD.

    Entity linkage via Lucene query over all elements of an RDD.

    T1

    A type

    other

    RDD to be linked

    searchQueryGen

    Function that generates a search query for each element of other

    returns

    an RDD of Tuple2 that contains the linked search Lucene documents in the second Note: Currently the query strings of the other RDD are collected to the driver and broadcast to the workers.

  70. def linkByQuery[T1](other: RDD[T1], searchQueryGen: (T1) ⇒ Query, topK: Int = DefaultTopK)(implicit arg0: ClassTag[T1]): RDD[(T1, Array[SparkScoreDoc])]

    Permalink

    Entity linkage via Lucene query over all elements of an RDD.

    Entity linkage via Lucene query over all elements of an RDD.

    T1

    A type

    other

    RDD to be linked

    searchQueryGen

    Function that generates a Lucene Query object for each element of other

    returns

    an RDD of Tuple2 that contains the linked search Lucene Document in the second position

  71. def linkDataFrame(other: DataFrame, searchQueryGen: (Row) ⇒ String, topK: Int = DefaultTopK): RDD[(Row, Array[SparkScoreDoc])]

    Permalink

    Entity linkage via Lucene query over all elements of an RDD.

    Entity linkage via Lucene query over all elements of an RDD.

    other

    DataFrame to be linked

    searchQueryGen

    Function that generates a search query for each element of other

    returns

    an RDD of Tuple2 that contains the linked search Lucene documents in the second

  72. def localCheckpoint(): LuceneRDD.this.type

    Permalink
    Definition Classes
    RDD
  73. def log: Logger

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  74. def logDebug(msg: ⇒ String, throwable: Throwable): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  75. def logDebug(msg: ⇒ String): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  76. def logError(msg: ⇒ String, throwable: Throwable): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  77. def logError(msg: ⇒ String): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  78. def logInfo(msg: ⇒ String, throwable: Throwable): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  79. def logInfo(msg: ⇒ String): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  80. def logName: String

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  81. def logTrace(msg: ⇒ String, throwable: Throwable): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  82. def logTrace(msg: ⇒ String): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  83. def logWarning(msg: ⇒ String, throwable: Throwable): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  84. def logWarning(msg: ⇒ String): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  85. def map[U](f: (T) ⇒ U)(implicit arg0: ClassTag[U]): RDD[U]

    Permalink
    Definition Classes
    RDD
  86. def mapPartitions[U](f: (Iterator[T]) ⇒ Iterator[U], preservesPartitioning: Boolean)(implicit arg0: ClassTag[U]): RDD[U]

    Permalink
    Definition Classes
    RDD
  87. def mapPartitionsWithIndex[U](f: (Int, Iterator[T]) ⇒ Iterator[U], preservesPartitioning: Boolean)(implicit arg0: ClassTag[U]): RDD[U]

    Permalink
    Definition Classes
    RDD
  88. def max()(implicit ord: Ordering[T]): T

    Permalink
    Definition Classes
    RDD
  89. def min()(implicit ord: Ordering[T]): T

    Permalink
    Definition Classes
    RDD
  90. def moreLikeThis(fieldName: String, query: String, minTermFreq: Int, minDocFreq: Int, topK: Int = DefaultTopK): LuceneRDDResponse

    Permalink

    Lucene's More Like This (MLT) functionality

    Lucene's More Like This (MLT) functionality

    fieldName

    Field name

    query

    Query text

    minTermFreq

    Minimum term frequency

    minDocFreq

    Minimum document frequency

    topK

    Number of returned documents

  91. var name: String

    Permalink
    Definition Classes
    RDD
  92. final def ne(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  93. final def notify(): Unit

    Permalink
    Definition Classes
    AnyRef
  94. final def notifyAll(): Unit

    Permalink
    Definition Classes
    AnyRef
  95. def parent[U](j: Int)(implicit arg0: ClassTag[U]): RDD[U]

    Permalink
    Attributes
    protected[org.apache.spark]
    Definition Classes
    RDD
  96. def partitionMapper(f: (AbstractLuceneRDDPartition[T]) ⇒ LuceneRDDResponsePartition, k: Int): LuceneRDDResponse

    Permalink

    Maps partition results

    Maps partition results

    f

    Function to apply on each partition / distributed index

    k

    number of documents to return

    Attributes
    protected
  97. val partitioner: Option[Partitioner]

    Permalink
    Definition Classes
    RDD
  98. final def partitions: Array[Partition]

    Permalink
    Definition Classes
    RDD
  99. val partitionsRDD: RDD[AbstractLuceneRDDPartition[T]]

    Permalink
    Attributes
    protected
  100. def persist(newLevel: StorageLevel): LuceneRDD.this.type

    Permalink
    Definition Classes
    LuceneRDD → RDD
  101. def persist(): LuceneRDD.this.type

    Permalink
    Definition Classes
    RDD
  102. def phraseQuery(fieldName: String, query: String, topK: Int = DefaultTopK): LuceneRDDResponse

    Permalink

    Lucene phrase Query

    Lucene phrase Query

    fieldName

    Name of field

    query

    Query text

    topK

    Number of documents to return

  103. def pipe(command: Seq[String], env: Map[String, String], printPipeContext: ((String) ⇒ Unit) ⇒ Unit, printRDDElement: (T, (String) ⇒ Unit) ⇒ Unit, separateWorkingDir: Boolean, bufferSize: Int, encoding: String): RDD[String]

    Permalink
    Definition Classes
    RDD
  104. def pipe(command: String, env: Map[String, String]): RDD[String]

    Permalink
    Definition Classes
    RDD
  105. def pipe(command: String): RDD[String]

    Permalink
    Definition Classes
    RDD
  106. final def preferredLocations(split: Partition): Seq[String]

    Permalink
    Definition Classes
    RDD
  107. def prefixQuery(fieldName: String, query: String, topK: Int = DefaultTopK): LuceneRDDResponse

    Permalink

    Lucene prefix query

    Lucene prefix query

    fieldName

    Name of field

    query

    Prefix query text

    topK

    Number of documents to return

  108. def query(searchString: String, topK: Int = DefaultTopK): LuceneRDDResponse

    Permalink

    Generic query using Lucene's query parser

    Generic query using Lucene's query parser

    searchString

    Query String

  109. def randomSplit(weights: Array[Double], seed: Long): Array[RDD[T]]

    Permalink
    Definition Classes
    RDD
  110. def reduce(f: (T, T) ⇒ T): T

    Permalink
    Definition Classes
    RDD
  111. def repartition(numPartitions: Int)(implicit ord: Ordering[T]): RDD[T]

    Permalink
    Definition Classes
    RDD
  112. def sample(withReplacement: Boolean, fraction: Double, seed: Long): RDD[T]

    Permalink
    Definition Classes
    RDD
  113. def saveAsObjectFile(path: String): Unit

    Permalink
    Definition Classes
    RDD
  114. def saveAsTextFile(path: String, codec: Class[_ <: CompressionCodec]): Unit

    Permalink
    Definition Classes
    RDD
  115. def saveAsTextFile(path: String): Unit

    Permalink
    Definition Classes
    RDD
  116. def setName(_name: String): LuceneRDD.this.type

    Permalink

    Set the name for the RDD; By default set to "LuceneRDD"

    Set the name for the RDD; By default set to "LuceneRDD"

    Definition Classes
    LuceneRDD → RDD
  117. def sortBy[K](f: (T) ⇒ K, ascending: Boolean, numPartitions: Int)(implicit ord: Ordering[K], ctag: ClassTag[K]): RDD[T]

    Permalink
    Definition Classes
    RDD
  118. def sparkContext: SparkContext

    Permalink
    Definition Classes
    RDD
  119. def subtract(other: RDD[T], p: Partitioner)(implicit ord: Ordering[T]): RDD[T]

    Permalink
    Definition Classes
    RDD
  120. def subtract(other: RDD[T], numPartitions: Int): RDD[T]

    Permalink
    Definition Classes
    RDD
  121. def subtract(other: RDD[T]): RDD[T]

    Permalink
    Definition Classes
    RDD
  122. final def synchronized[T0](arg0: ⇒ T0): T0

    Permalink
    Definition Classes
    AnyRef
  123. def take(num: Int): Array[T]

    Permalink
    Definition Classes
    RDD
  124. def takeOrdered(num: Int)(implicit ord: Ordering[T]): Array[T]

    Permalink
    Definition Classes
    RDD
  125. def takeSample(withReplacement: Boolean, num: Int, seed: Long): Array[T]

    Permalink
    Definition Classes
    RDD
  126. def termQuery(fieldName: String, query: String, topK: Int = DefaultTopK): LuceneRDDResponse

    Permalink

    Lucene term query

    Lucene term query

    fieldName

    Name of field

    query

    Term to search on

    topK

    Number of documents to return

  127. def toDebugString: String

    Permalink
    Definition Classes
    RDD
  128. def toJavaRDD(): JavaRDD[T]

    Permalink
    Definition Classes
    RDD
  129. def toLocalIterator: Iterator[T]

    Permalink
    Definition Classes
    RDD
  130. def toString(): String

    Permalink
    Definition Classes
    RDD → AnyRef → Any
  131. def top(num: Int)(implicit ord: Ordering[T]): Array[T]

    Permalink
    Definition Classes
    RDD
  132. def treeAggregate[U](zeroValue: U)(seqOp: (U, T) ⇒ U, combOp: (U, U) ⇒ U, depth: Int)(implicit arg0: ClassTag[U]): U

    Permalink
    Definition Classes
    RDD
  133. def treeReduce(f: (T, T) ⇒ T, depth: Int): T

    Permalink
    Definition Classes
    RDD
  134. def union(other: RDD[T]): RDD[T]

    Permalink
    Definition Classes
    RDD
  135. def unpersist(blocking: Boolean = true): LuceneRDD.this.type

    Permalink
    Definition Classes
    LuceneRDD → RDD
  136. final def wait(): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  137. final def wait(arg0: Long, arg1: Int): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  138. final def wait(arg0: Long): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  139. def zip[U](other: RDD[U])(implicit arg0: ClassTag[U]): RDD[(T, U)]

    Permalink
    Definition Classes
    RDD
  140. def zipPartitions[B, C, D, V](rdd2: RDD[B], rdd3: RDD[C], rdd4: RDD[D])(f: (Iterator[T], Iterator[B], Iterator[C], Iterator[D]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[D], arg3: ClassTag[V]): RDD[V]

    Permalink
    Definition Classes
    RDD
  141. def zipPartitions[B, C, D, V](rdd2: RDD[B], rdd3: RDD[C], rdd4: RDD[D], preservesPartitioning: Boolean)(f: (Iterator[T], Iterator[B], Iterator[C], Iterator[D]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[D], arg3: ClassTag[V]): RDD[V]

    Permalink
    Definition Classes
    RDD
  142. def zipPartitions[B, C, V](rdd2: RDD[B], rdd3: RDD[C])(f: (Iterator[T], Iterator[B], Iterator[C]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[V]): RDD[V]

    Permalink
    Definition Classes
    RDD
  143. def zipPartitions[B, C, V](rdd2: RDD[B], rdd3: RDD[C], preservesPartitioning: Boolean)(f: (Iterator[T], Iterator[B], Iterator[C]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[V]): RDD[V]

    Permalink
    Definition Classes
    RDD
  144. def zipPartitions[B, V](rdd2: RDD[B])(f: (Iterator[T], Iterator[B]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[V]): RDD[V]

    Permalink
    Definition Classes
    RDD
  145. def zipPartitions[B, V](rdd2: RDD[B], preservesPartitioning: Boolean)(f: (Iterator[T], Iterator[B]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[V]): RDD[V]

    Permalink
    Definition Classes
    RDD
  146. def zipWithIndex(): RDD[(T, Long)]

    Permalink
    Definition Classes
    RDD
  147. def zipWithUniqueId(): RDD[(T, Long)]

    Permalink
    Definition Classes
    RDD

Inherited from LuceneRDDConfigurable

Inherited from Configurable

Inherited from RDD[T]

Inherited from Logging

Inherited from Serializable

Inherited from Serializable

Inherited from AnyRef

Inherited from Any

Ungrouped