ADAMContext

Instance Constructors

new ADAMContext(sc: SparkContext)

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
def findFiles(path: Path, regex: String): Seq[Path]

Searches a path recursively, returning the names of all directories in the tree whose name matches the given regex.
Searches a path recursively, returning the names of all directories in the tree whose name matches the given regex.
path
The path to begin the search at
regex
A regular expression
returns
A sequence of Path objects corresponding to the identified directories.
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def isTraceEnabled(): Boolean

Attributes
protected
Definition Classes
Logging
def loadAlignments(filePath: String, projection: Option[Schema] = None, filePath2Opt: Option[String] = None, recordGroupOpt: Option[String] = None, stringency: ValidationStringency = ValidationStringency.STRICT): AlignmentRecordRDD

Loads alignments from a given path, and infers the input type.
Loads alignments from a given path, and infers the input type.
This method can load:
* AlignmentRecords via Parquet (default) * SAM/BAM (.sam, .bam) * FASTQ (interleaved, single end, paired end) (.ifq, .fq/.fastq) * FASTA (.fa, .fasta) * NucleotideContigFragments via Parquet (.contig.adam)
As hinted above, the input type is inferred from the file path extension.
filePath
Path to load data from.
projection
The fields to project; ignored if not Parquet.
filePath2Opt
The path to load a second end of FASTQ data from. Ignored if not FASTQ.
recordGroupOpt
Optional record group name to set if loading FASTQ.
stringency
Validation stringency used on FASTQ import/merging.
returns
Returns an AlignmentRecordRDD which wraps the RDD of reads, sequence dictionary representing the contigs these reads are aligned to if the reads are aligned, and the record group dictionary for the reads if one is available.

See also
loadFasta
loadFastq
loadInterleavedFastq
loadParquetAlignments
loadBam
def loadAlignmentsFromPaths(paths: Seq[Path]): AlignmentRecordRDD

Takes a sequence of Path objects and loads alignments using that path.
Takes a sequence of Path objects and loads alignments using that path.
This infers the type of each path, and thus can be used to load a mixture of different files from disk. I.e., if you want to load 2 BAM files and 3 Parquet files, this is the method you are looking for!
The RDDs obtained from loading each file are simply unioned together, while the record group dictionaries are naively merged. The sequence dictionaries are merged in a way that dedupes the sequence records in each dictionary.
paths
The locations of the files to load.
returns
Returns an AlignmentRecordRDD which wraps the RDD of reads, sequence dictionary representing the contigs these reads are aligned to if the reads are aligned, and the record group dictionary for the reads if one is available.

See also
loadAlignments
def loadBam(filePath: String, validationStringency: ValidationStringency = ValidationStringency.STRICT): AlignmentRecordRDD

Loads a SAM/BAM file.
Loads a SAM/BAM file.
This reads the sequence and record group dictionaries from the SAM/BAM file header. SAMRecords are read from the file and converted to the AlignmentRecord schema.
filePath
Path to the file on disk.
returns
Returns an AlignmentRecordRDD which wraps the RDD of reads, sequence dictionary representing the contigs these reads are aligned to if the reads are aligned, and the record group dictionary for the reads if one is available.

See also
loadAlignments
def loadBed(filePath: String, minPartitions: Option[Int] = None): FeatureRDD
def loadCoverage(filePath: String): CoverageRDD

Loads Parquet file of Features to a CoverageRDD.
Loads Parquet file of Features to a CoverageRDD. Coverage is stored in the score attribute of Feature.
filePath
File path to load coverage from
returns
CoverageRDD containing an RDD of Coverage
def loadDictionary[T](filePath: String)(implicit ev1: (T) ⇒ SpecificRecord, ev2: Manifest[T]): SequenceDictionary

This method should create a new SequenceDictionary from any parquet file which contains records that have the requisite reference{Name,Id,Length,Url} fields.
This method should create a new SequenceDictionary from any parquet file which contains records that have the requisite reference{Name,Id,Length,Url} fields.
(If the path is a BAM or SAM file, and the implicit type is an Read, then it just defaults to reading the SequenceDictionary out of the BAM header in the normal way.)
T
The type of records to return
filePath
The path to the input data
returns
A sequenceDictionary containing the names and indices of all the sequences to which the records in the corresponding file are aligned.
def loadFasta(filePath: String, fragmentLength: Long): NucleotideContigFragmentRDD
def loadFastq(filePath1: String, filePath2Opt: Option[String], recordGroupOpt: Option[String] = None, stringency: ValidationStringency = ValidationStringency.STRICT): AlignmentRecordRDD
def loadFeatures(filePath: String, projection: Option[Schema] = None, minPartitions: Option[Int] = None): FeatureRDD
def loadFeatures(filePath: String, projection: Option[Schema], minPartitions: Int): FeatureRDD
def loadFragments(filePath: String): FragmentRDD
def loadGenes(filePath: String, projection: Option[Schema] = None): GeneRDD
def loadGenotypes(filePath: String, projection: Option[Schema] = None): GenotypeRDD
def loadGff3(filePath: String, minPartitions: Option[Int] = None): FeatureRDD
def loadGtf(filePath: String, minPartitions: Option[Int] = None): FeatureRDD
def loadIndexedBam(filePath: String, viewRegions: Iterable[ReferenceRegion])(implicit s: DummyImplicit): AlignmentRecordRDD

Functions like loadBam, but uses bam index files to look at fewer blocks, and only returns records within the specified ReferenceRegions.
Functions like loadBam, but uses bam index files to look at fewer blocks, and only returns records within the specified ReferenceRegions. Bam index file required.
filePath
The path to the input data. Currently this path must correspond to a single Bam file. The bam index file associated needs to have the same name.
viewRegions
Iterable of ReferenceRegions we are filtering on
def loadIndexedBam(filePath: String, viewRegion: ReferenceRegion): AlignmentRecordRDD

Functions like loadBam, but uses bam index files to look at fewer blocks, and only returns records within a specified ReferenceRegion.
Functions like loadBam, but uses bam index files to look at fewer blocks, and only returns records within a specified ReferenceRegion. Bam index file required.
filePath
The path to the input data. Currently this path must correspond to a single Bam file. The bam index file associated needs to have the same name.
viewRegion
The ReferenceRegion we are filtering on
def loadIndexedVcf(filePath: String, viewRegions: Iterable[ReferenceRegion])(implicit s: DummyImplicit): VariantContextRDD

Loads a VCF file indexed by a tabix (tbi) file into an RDD.
Loads a VCF file indexed by a tabix (tbi) file into an RDD.
filePath
The file to load.
viewRegions
Iterator of ReferenceRegions we are filtering on.
returns
Returns a VariantContextRDD.
def loadIndexedVcf(filePath: String, viewRegion: ReferenceRegion): VariantContextRDD

Loads a VCF file indexed by a tabix (tbi) file into an RDD.
Loads a VCF file indexed by a tabix (tbi) file into an RDD.
filePath
The file to load.
viewRegion
ReferenceRegions we are filtering on.
returns
Returns a VariantContextRDD.
def loadInterleavedFastq(filePath: String): AlignmentRecordRDD
def loadInterleavedFastqAsFragments(filePath: String): FragmentRDD
def loadIntervalList(filePath: String, minPartitions: Option[Int] = None): FeatureRDD
def loadNarrowPeak(filePath: String, minPartitions: Option[Int] = None): FeatureRDD
def loadPairedFastq(filePath1: String, filePath2: String, recordGroupOpt: Option[String], stringency: ValidationStringency): AlignmentRecordRDD
def loadParquet[T](filePath: String, predicate: Option[FilterPredicate] = None, projection: Option[Schema] = None)(implicit ev1: (T) ⇒ SpecificRecord, ev2: Manifest[T]): RDD[T]

This method will create a new RDD.
This method will create a new RDD.
T
The type of records to return
filePath
The path to the input data
predicate
An optional pushdown predicate to use when reading the data
projection
An option projection schema to use when reading the data
returns
An RDD with records of the specified type
def loadParquetAlignments(filePath: String, predicate: Option[FilterPredicate] = None, projection: Option[Schema] = None): AlignmentRecordRDD

Loads alignment data from a Parquet file.
Loads alignment data from a Parquet file.
filePath
The path of the file to load.
predicate
An optional predicate to push down into the file.
projection
An optional schema designating the fields to project.
returns
Returns an AlignmentRecordRDD which wraps the RDD of reads, sequence dictionary representing the contigs these reads are aligned to if the reads are aligned, and the record group dictionary for the reads if one is available.

Note
The sequence dictionary is read from an avro file stored at filePath/_seqdict.avro and the record group dictionary is read from an avro file stored at filePath/_rgdict.avro. These files are pure avro, not Parquet.
See also
loadAlignments
def loadParquetContigFragments(filePath: String, predicate: Option[FilterPredicate] = None, projection: Option[Schema] = None): NucleotideContigFragmentRDD
def loadParquetFeatures(filePath: String, predicate: Option[FilterPredicate] = None, projection: Option[Schema] = None): FeatureRDD
def loadParquetFragments(filePath: String, predicate: Option[FilterPredicate] = None, projection: Option[Schema] = None): FragmentRDD
def loadParquetGenotypes(filePath: String, predicate: Option[FilterPredicate] = None, projection: Option[Schema] = None): GenotypeRDD
def loadParquetVariantAnnotations(filePath: String, predicate: Option[FilterPredicate] = None, projection: Option[Schema] = None): DatabaseVariantAnnotationRDD
def loadParquetVariants(filePath: String, predicate: Option[FilterPredicate] = None, projection: Option[Schema] = None): VariantRDD
def loadReferenceFile(filePath: String, fragmentLength: Long): ReferenceFile
def loadSequences(filePath: String, projection: Option[Schema] = None, fragmentLength: Long = 10000): NucleotideContigFragmentRDD
def loadUnpairedFastq(filePath: String, recordGroupOpt: Option[String] = None, setFirstOfPair: Boolean = false, setSecondOfPair: Boolean = false, stringency: ValidationStringency = ValidationStringency.STRICT): AlignmentRecordRDD
def loadVariantAnnotations(filePath: String, projection: Option[Schema] = None): DatabaseVariantAnnotationRDD
def loadVariants(filePath: String, projection: Option[Schema] = None): VariantRDD
def loadVcf(filePath: String): VariantContextRDD

Loads a VCF file into an RDD.
Loads a VCF file into an RDD.
filePath
The file to load.
returns
Returns a VariantContextRDD.
def loadVcfAnnotations(filePath: String): DatabaseVariantAnnotationRDD
def log: Logger

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logName: String

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
val sc: SparkContext
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Docs: object ADAMContext | package rdd

class ADAMContext extends Serializable with Logging

Instance Constructors

new ADAMContext(sc: SparkContext)

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def clone(): AnyRef

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

def findFiles(path: Path, regex: String): Seq[Path]

final def getClass(): Class[_]

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

def isTraceEnabled(): Boolean

def loadAlignments(filePath: String, projection: Option[Schema] = None, filePath2Opt: Option[String] = None, recordGroupOpt: Option[String] = None, stringency: ValidationStringency = ValidationStringency.STRICT): AlignmentRecordRDD

def loadAlignmentsFromPaths(paths: Seq[Path]): AlignmentRecordRDD

def loadBam(filePath: String, validationStringency: ValidationStringency = ValidationStringency.STRICT): AlignmentRecordRDD

def loadBed(filePath: String, minPartitions: Option[Int] = None): FeatureRDD

def loadCoverage(filePath: String): CoverageRDD

def loadDictionary[T](filePath: String)(implicit ev1: (T) ⇒ SpecificRecord, ev2: Manifest[T]): SequenceDictionary

def loadFasta(filePath: String, fragmentLength: Long): NucleotideContigFragmentRDD

def loadFastq(filePath1: String, filePath2Opt: Option[String], recordGroupOpt: Option[String] = None, stringency: ValidationStringency = ValidationStringency.STRICT): AlignmentRecordRDD

def loadFeatures(filePath: String, projection: Option[Schema] = None, minPartitions: Option[Int] = None): FeatureRDD

def loadFeatures(filePath: String, projection: Option[Schema], minPartitions: Int): FeatureRDD

def loadFragments(filePath: String): FragmentRDD

def loadGenes(filePath: String, projection: Option[Schema] = None): GeneRDD

def loadGenotypes(filePath: String, projection: Option[Schema] = None): GenotypeRDD

def loadGff3(filePath: String, minPartitions: Option[Int] = None): FeatureRDD

def loadGtf(filePath: String, minPartitions: Option[Int] = None): FeatureRDD

def loadIndexedBam(filePath: String, viewRegions: Iterable[ReferenceRegion])(implicit s: DummyImplicit): AlignmentRecordRDD

def loadIndexedBam(filePath: String, viewRegion: ReferenceRegion): AlignmentRecordRDD

def loadIndexedVcf(filePath: String, viewRegions: Iterable[ReferenceRegion])(implicit s: DummyImplicit): VariantContextRDD

def loadIndexedVcf(filePath: String, viewRegion: ReferenceRegion): VariantContextRDD

def loadInterleavedFastq(filePath: String): AlignmentRecordRDD

def loadInterleavedFastqAsFragments(filePath: String): FragmentRDD

def loadIntervalList(filePath: String, minPartitions: Option[Int] = None): FeatureRDD

def loadNarrowPeak(filePath: String, minPartitions: Option[Int] = None): FeatureRDD

def loadPairedFastq(filePath1: String, filePath2: String, recordGroupOpt: Option[String], stringency: ValidationStringency): AlignmentRecordRDD

def loadParquet[T](filePath: String, predicate: Option[FilterPredicate] = None, projection: Option[Schema] = None)(implicit ev1: (T) ⇒ SpecificRecord, ev2: Manifest[T]): RDD[T]

def loadParquetAlignments(filePath: String, predicate: Option[FilterPredicate] = None, projection: Option[Schema] = None): AlignmentRecordRDD

def loadParquetContigFragments(filePath: String, predicate: Option[FilterPredicate] = None, projection: Option[Schema] = None): NucleotideContigFragmentRDD

def loadParquetFeatures(filePath: String, predicate: Option[FilterPredicate] = None, projection: Option[Schema] = None): FeatureRDD

def loadParquetFragments(filePath: String, predicate: Option[FilterPredicate] = None, projection: Option[Schema] = None): FragmentRDD

def loadParquetGenotypes(filePath: String, predicate: Option[FilterPredicate] = None, projection: Option[Schema] = None): GenotypeRDD

def loadParquetVariantAnnotations(filePath: String, predicate: Option[FilterPredicate] = None, projection: Option[Schema] = None): DatabaseVariantAnnotationRDD

def loadParquetVariants(filePath: String, predicate: Option[FilterPredicate] = None, projection: Option[Schema] = None): VariantRDD

def loadReferenceFile(filePath: String, fragmentLength: Long): ReferenceFile

def loadSequences(filePath: String, projection: Option[Schema] = None, fragmentLength: Long = 10000): NucleotideContigFragmentRDD

def loadUnpairedFastq(filePath: String, recordGroupOpt: Option[String] = None, setFirstOfPair: Boolean = false, setSecondOfPair: Boolean = false, stringency: ValidationStringency = ValidationStringency.STRICT): AlignmentRecordRDD

def loadVariantAnnotations(filePath: String, projection: Option[Schema] = None): DatabaseVariantAnnotationRDD

def loadVariants(filePath: String, projection: Option[Schema] = None): VariantRDD

def loadVcf(filePath: String): VariantContextRDD

def loadVcfAnnotations(filePath: String): DatabaseVariantAnnotationRDD

def log: Logger

def logDebug(msg: ⇒ String, throwable: Throwable): Unit

def logDebug(msg: ⇒ String): Unit

def logError(msg: ⇒ String, throwable: Throwable): Unit

def logError(msg: ⇒ String): Unit

def logInfo(msg: ⇒ String, throwable: Throwable): Unit

def logInfo(msg: ⇒ String): Unit

def logName: String

def logTrace(msg: ⇒ String, throwable: Throwable): Unit

def logTrace(msg: ⇒ String): Unit

def logWarning(msg: ⇒ String, throwable: Throwable): Unit

def logWarning(msg: ⇒ String): Unit

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

val sc: SparkContext

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from Logging