object ResourceHelper
Helper one-place for IO management. Streams, source and external input should be handled from here
- Alphabetic
- By Inheritance
- ResourceHelper
- AnyRef
- Any
- Hide All
- Show All
- Public
- Protected
Type Members
- case class SourceStream(resource: String) extends Product with Serializable
Structure for a SourceStream coming from compiled content
Value Members
- final def !=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
- final def ##: Int
- Definition Classes
- AnyRef → Any
- final def ==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
- final def asInstanceOf[T0]: T0
- Definition Classes
- Any
- def clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.CloneNotSupportedException]) @HotSpotIntrinsicCandidate() @native()
- def copyToLocal(path: String): String
Copies the remote resource to a local temporary folder and returns its absolute path.
Copies the remote resource to a local temporary folder and returns its absolute path.
Currently, file:/, s3:/, hdfs:/ and dbfs:/ are supported.
If the file is already on the local file system just the absolute path will be returned instead.
- path
Path to the resource
- returns
Absolute path to the temporary or local folder of the resource
- final def eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
- def equals(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef → Any
- def fileSystemFromPath(path: String): FileSystem
Get the Hadoop FileSystem from a given path
Get the Hadoop FileSystem from a given path
- path
Path to the resource
- returns
Hadoop FileSystem
- def flattenRevertValuesAsKeys(er: ExternalResource): Map[String, String]
For multiple values per keys, this optimizer flattens all values for keys to have constant access
- def getActiveSparkSession: SparkSession
- final def getClass(): Class[_ <: AnyRef]
- Definition Classes
- AnyRef → Any
- Annotations
- @HotSpotIntrinsicCandidate() @native()
- def getFileFromPath(pathToFile: String): File
- def getFilesContentBuffer(externalResource: ExternalResource): Seq[Iterator[String]]
- def getResourceFile(path: String): URL
- def getResourceStream(path: String): InputStream
NOT thread safe.
NOT thread safe. Do not call from executors.
- def getSparkSessionWithS3(awsAccessKeyId: String, awsSecretAccessKey: String, hadoopAwsVersion: String = ConfigHelper.hadoopAwsVersion, AwsJavaSdkVersion: String = ConfigHelper.awsJavaSdkVersion, region: String = "us-east-1", s3Impl: String = "org.apache.hadoop.fs.s3a.S3AFileSystem", pathStyleAccess: Boolean = true, credentialsProvider: String = "TemporaryAWSCredentialsProvider", awsSessionToken: Option[String] = None): SparkSession
- def getWordCount(externalResource: ExternalResource, wordCount: Map[String, Long] = MMap.empty[String, Long].withDefaultValue(0), pipeline: Option[PipelineModel] = None): Map[String, Long]
- def hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @HotSpotIntrinsicCandidate() @native()
- def isHTTPProtocol(urlStr: String): Boolean
- final def isInstanceOf[T0]: Boolean
- Definition Classes
- Any
- def isValidURL(url: String): Boolean
- def listLocalFiles(path: String): List[File]
- def listResourceDirectory(path: String): Seq[String]
- final def ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
- final def notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @HotSpotIntrinsicCandidate() @native()
- final def notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @HotSpotIntrinsicCandidate() @native()
- def parseKeyArrayValues(externalResource: ExternalResource): Map[String, Array[Float]]
- def parseKeyListValues(externalResource: ExternalResource): Map[String, List[String]]
- def parseKeyValueText(er: ExternalResource): Map[String, String]
General purpose key value parser from source Currently read only text files
- def parseLines(er: ExternalResource): Array[String]
General purpose line parser from source Currently read only text files
- def parseLinesIterator(er: ExternalResource): Seq[Iterator[String]]
General purpose line parser from source Currently read only text files
- def parseTupleSentences(er: ExternalResource): Array[TaggedSentence]
General purpose tuple parser from source Currently read only text files
- def parseTupleSentencesDS(er: ExternalResource): Dataset[TaggedSentence]
- def parseTupleText(er: ExternalResource): Array[(String, String)]
General purpose tuple parser from source Currently read only text files
- def readSparkDataFrame(er: ExternalResource): DataFrame
General purpose read saved Parquet Currently read only Parquet format
- def resolvePath(folder: String): String
Resolves the given path to its absolute form, handling different file systems.
Resolves the given path to its absolute form, handling different file systems.
- folder
The input path to resolve.
- returns
The resolved absolute path as a string.
- lazy val spark: SparkSession
- final def synchronized[T0](arg0: => T0): T0
- Definition Classes
- AnyRef
- def toString(): String
- Definition Classes
- AnyRef → Any
- def validFile(path: String): Boolean
- final def wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.InterruptedException])
- final def wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.InterruptedException]) @native()
- final def wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.InterruptedException])
Deprecated Value Members
- def finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.Throwable]) @Deprecated
- Deprecated
(Since version 9)