ADAMContext

Instance Constructors

new ADAMContext(sc: SparkContext)

sc
The SparkContext to wrap.

Value Members

final def !=(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def !=(arg0: Any): Boolean

Definition Classes
Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def ==(arg0: Any): Boolean

Definition Classes
Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getFiles(path: Path, fs: FileSystem): Array[Path]

Elaborates out a directory/glob/plain path.
Elaborates out a directory/glob/plain path.
path
Path to elaborate.
fs
The underlying file system that this path is on.
returns
Returns an array of Paths to load.

Attributes
protected
Exceptions thrown
FileNotFoundException
if the path does not match any files.
See also
getFsAndFiles
def getFsAndFiles(path: Path): Array[Path]

Elaborates out a directory/glob/plain path.
Elaborates out a directory/glob/plain path.
path
Path to elaborate.
returns
Returns an array of Paths to load.

Attributes
protected
Exceptions thrown
FileNotFoundException
if the path does not match any files.
See also
getFiles
def getFsAndFilesWithFilter(pathName: String, filter: PathFilter): Array[Path]

Elaborates out a directory/glob/plain path name.
Elaborates out a directory/glob/plain path name.
pathName
Path name to elaborate.
filter
Filter to discard paths.
returns
Returns an array of Paths to load.

Attributes
protected
Exceptions thrown
FileNotFoundException
if the path does not match any files.
See also
getFiles
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def isTraceEnabled(): Boolean

Attributes
protected
Definition Classes
Logging
def loadAlignments(pathName: String, optPathName2: Option[String] = None, optRecordGroup: Option[String] = None, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None, stringency: ValidationStringency = ValidationStringency.STRICT): AlignmentRecordRDD

Load alignment records into an AlignmentRecordRDD.
Load alignment records into an AlignmentRecordRDD.
Loads path names ending in: * .bam/.cram/.sam as BAM/CRAM/SAM format, * .fa/.fasta as FASTA format, * .fq/.fastq as FASTQ format, and * .ifq as interleaved FASTQ format.
If none of these match, fall back to Parquet + Avro.
For FASTA, FASTQ, and interleaved FASTQ formats, compressed files are supported through compression codecs configured in Hadoop, which by default include .gz and .bz2, but can include more.
pathName
The path name to load alignment records from. Globs/directories are supported, although file extension must be present for BAM/CRAM/SAM, FASTA, and FASTQ formats.
optPathName2
The optional path name to load the second set of alignment records from, if loading paired FASTQ format. Globs/directories are supported, although file extension must be present. Defaults to None.
optRecordGroup
The optional record group name to associate to the alignment records. Defaults to None.
optPredicate
An optional pushdown predicate to use when reading Parquet + Avro. Defaults to None.
optProjection
An option projection schema to use when reading Parquet + Avro. Defaults to None.
stringency
The validation stringency to use when validating BAM/CRAM/SAM or FASTQ formats. Defaults to ValidationStringency.STRICT.
returns
Returns an AlignmentRecordRDD which wraps the RDD of alignment records, sequence dictionary representing contigs the alignment records may be aligned to, and the record group dictionary for the alignment records if one is available.

See also
loadParquetAlignments
loadInterleavedFastq
loadFasta
loadFastq
loadBam
def loadBam(pathName: String, stringency: ValidationStringency = ValidationStringency.STRICT): AlignmentRecordRDD

Load alignment records from BAM/CRAM/SAM into an AlignmentRecordRDD.
Load alignment records from BAM/CRAM/SAM into an AlignmentRecordRDD.
This reads the sequence and record group dictionaries from the BAM/CRAM/SAM file header. SAMRecords are read from the file and converted to the AlignmentRecord schema.
pathName
The path name to load BAM/CRAM/SAM formatted alignment records from. Globs/directories are supported.
stringency
The validation stringency to use when validating the BAM/CRAM/SAM format header. Defaults to ValidationStringency.STRICT.
returns
Returns an AlignmentRecordRDD which wraps the RDD of alignment records, sequence dictionary representing contigs the alignment records may be aligned to, and the record group dictionary for the alignment records if one is available.
def loadBed(pathName: String, optSequenceDictionary: Option[SequenceDictionary] = None, optMinPartitions: Option[Int] = None, stringency: ValidationStringency = ValidationStringency.STRICT): FeatureRDD

Load a path name in BED6/12 format into a FeatureRDD.
Load a path name in BED6/12 format into a FeatureRDD.
pathName
The path name to load features in BED6/12 format from. Globs/directories are supported.
optSequenceDictionary
Optional sequence dictionary. Defaults to None.
optMinPartitions
An optional minimum number of partitions to load. If not set, falls back to the configured Spark default parallelism. Defaults to None.
stringency
The validation stringency to use when validating BED6/12 format. Defaults to ValidationStringency.STRICT.
returns
Returns a FeatureRDD.
def loadContigFragments(pathName: String, maximumLength: Long = 10000L, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None): NucleotideContigFragmentRDD

Load nucleotide contig fragments into a NucleotideContigFragmentRDD.
Load nucleotide contig fragments into a NucleotideContigFragmentRDD.
If the path name has a .fa/.fasta extension, load as FASTA format. Else, fall back to Parquet + Avro.
For FASTA format, compressed files are supported through compression codecs configured in Hadoop, which by default include .gz and .bz2, but can include more.
pathName
The path name to load nucleotide contig fragments from. Globs/directories are supported, although file extension must be present for FASTA format.
maximumLength
Maximum fragment length. Defaults to 10000L. Values greater than 1e9 should be avoided.
optPredicate
An optional pushdown predicate to use when reading Parquet + Avro. Defaults to None.
optProjection
An option projection schema to use when reading Parquet + Avro. Defaults to None.
returns
Returns a NucleotideContigFragmentRDD.

See also
loadParquetContigFragments
loadFasta
def loadCoverage(pathName: String, optSequenceDictionary: Option[SequenceDictionary] = None, optMinPartitions: Option[Int] = None, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None, stringency: ValidationStringency = ValidationStringency.STRICT): CoverageRDD

Load features into a FeatureRDD and convert to a CoverageRDD.
Load features into a FeatureRDD and convert to a CoverageRDD. Coverage is stored in the score field of Feature.
Loads path names ending in: * .bed as BED6/12 format, * .gff3 as GFF3 format, * .gtf/.gff as GTF/GFF2 format, * .narrow[pP]eak as NarrowPeak format, and * .interval_list as IntervalList format.
If none of these match, fall back to Parquet + Avro.
For BED6/12, GFF3, GTF/GFF2, NarrowPeak, and IntervalList formats, compressed files are supported through compression codecs configured in Hadoop, which by default include .gz and .bz2, but can include more.
pathName
The path name to load features from. Globs/directories are supported, although file extension must be present for BED6/12, GFF3, GTF/GFF2, NarrowPeak, or IntervalList formats.
optSequenceDictionary
Optional sequence dictionary. Defaults to None.
optMinPartitions
An optional minimum number of partitions to use. For textual formats, if this is None, fall back to the Spark default parallelism. Defaults to None.
optPredicate
An optional pushdown predicate to use when reading Parquet + Avro. Defaults to None.
optProjection
An option projection schema to use when reading Parquet + Avro. Defaults to None.
stringency
The validation stringency to use when validating BED6/12, GFF3, GTF/GFF2, NarrowPeak, or IntervalList formats. Defaults to ValidationStringency.STRICT.
returns
Returns a FeatureRDD converted to a CoverageRDD.

See also
loadParquetFeatures
loadIntervalList
loadNarrowPeak
loadGff3
loadGtf
loadBed
def loadFasta(pathName: String, maximumLength: Long = 10000L): NucleotideContigFragmentRDD

Load nucleotide contig fragments from FASTA into a NucleotideContigFragmentRDD.
Load nucleotide contig fragments from FASTA into a NucleotideContigFragmentRDD.
pathName
The path name to load nucleotide contig fragments from. Globs/directories are supported.
maximumLength
Maximum fragment length. Defaults to 10000L. Values greater than 1e9 should be avoided.
returns
Returns a NucleotideContigFragmentRDD.
def loadFastq(pathName1: String, optPathName2: Option[String], optRecordGroup: Option[String] = None, stringency: ValidationStringency = ValidationStringency.STRICT): AlignmentRecordRDD

Load unaligned alignment records from (possibly paired) FASTQ into an AlignmentRecordRDD.
Load unaligned alignment records from (possibly paired) FASTQ into an AlignmentRecordRDD.
pathName1
The path name to load the first set of unaligned alignment records from. Globs/directories are supported.
optPathName2
The path name to load the second set of unaligned alignment records from, if provided. Globs/directories are supported.
optRecordGroup
The optional record group name to associate to the unaligned alignment records. Defaults to None.
stringency
The validation stringency to use when validating (possibly paired) FASTQ format. Defaults to ValidationStringency.STRICT.
returns
Returns an unaligned AlignmentRecordRDD.

See also
loadUnpairedFastq
loadPairedFastq
def loadFeatures(pathName: String, optSequenceDictionary: Option[SequenceDictionary] = None, optMinPartitions: Option[Int] = None, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None, stringency: ValidationStringency = ValidationStringency.STRICT): FeatureRDD

Load features into a FeatureRDD.
Load features into a FeatureRDD.
Loads path names ending in: * .bed as BED6/12 format, * .gff3 as GFF3 format, * .gtf/.gff as GTF/GFF2 format, * .narrow[pP]eak as NarrowPeak format, and * .interval_list as IntervalList format.
If none of these match, fall back to Parquet + Avro.
For BED6/12, GFF3, GTF/GFF2, NarrowPeak, and IntervalList formats, compressed files are supported through compression codecs configured in Hadoop, which by default include .gz and .bz2, but can include more.
pathName
The path name to load features from. Globs/directories are supported, although file extension must be present for BED6/12, GFF3, GTF/GFF2, NarrowPeak, or IntervalList formats.
optSequenceDictionary
Optional sequence dictionary. Defaults to None.
optMinPartitions
An optional minimum number of partitions to use. For textual formats, if this is None, fall back to the Spark default parallelism. Defaults to None.
optPredicate
An optional pushdown predicate to use when reading Parquet + Avro. Defaults to None.
optProjection
An option projection schema to use when reading Parquet + Avro. Defaults to None.
stringency
The validation stringency to use when validating BED6/12, GFF3, GTF/GFF2, NarrowPeak, or IntervalList formats. Defaults to ValidationStringency.STRICT.
returns
Returns a FeatureRDD.

See also
loadParquetFeatures
loadIntervalList
loadNarrowPeak
loadGff3
loadGtf
loadBed
def loadFragments(pathName: String, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None, stringency: ValidationStringency = ValidationStringency.STRICT): FragmentRDD

Load fragments into a FragmentRDD.
Load fragments into a FragmentRDD.
Loads path names ending in: * .bam/.cram/.sam as BAM/CRAM/SAM format and * .ifq as interleaved FASTQ format.
If none of these match, fall back to Parquet + Avro.
For interleaved FASTQ format, compressed files are supported through compression codecs configured in Hadoop, which by default include .gz and .bz2, but can include more.
pathName
The path name to load fragments from. Globs/directories are supported, although file extension must be present for BAM/CRAM/SAM and FASTQ formats.
optPredicate
An optional pushdown predicate to use when reading Parquet + Avro. Defaults to None.
optProjection
An option projection schema to use when reading Parquet + Avro. Defaults to None.
stringency
The validation stringency to use when validating BAM/CRAM/SAM or FASTQ formats. Defaults to ValidationStringency.STRICT.
returns
Returns a FragmentRDD.

See also
loadParquetFragments
loadInterleavedFastqAsFragments
loadAlignments
loadBam
def loadGenotypes(pathName: String, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None, stringency: ValidationStringency = ValidationStringency.STRICT): GenotypeRDD

Load genotypes into a GenotypeRDD.
Load genotypes into a GenotypeRDD.
If the path name has a .vcf/.vcf.gz/.vcf.bgz extension, load as VCF format. Else, fall back to Parquet + Avro.
pathName
The path name to load genotypes from. Globs/directories are supported, although file extension must be present for VCF format.
optPredicate
An optional pushdown predicate to use when reading Parquet + Avro. Defaults to None.
optProjection
An option projection schema to use when reading Parquet + Avro. Defaults to None.
stringency
The validation stringency to use when validating VCF format. Defaults to ValidationStringency.STRICT.
returns
Returns a GenotypeRDD.

See also
loadParquetGenotypes
loadVcf
def loadGff3(pathName: String, optSequenceDictionary: Option[SequenceDictionary] = None, optMinPartitions: Option[Int] = None, stringency: ValidationStringency = ValidationStringency.STRICT): FeatureRDD

Load a path name in GFF3 format into a FeatureRDD.
Load a path name in GFF3 format into a FeatureRDD.
pathName
The path name to load features in GFF3 format from. Globs/directories are supported.
optSequenceDictionary
Optional sequence dictionary. Defaults to None.
optMinPartitions
An optional minimum number of partitions to load. If not set, falls back to the configured Spark default parallelism. Defaults to None.
stringency
The validation stringency to use when validating GFF3 format. Defaults to ValidationStringency.STRICT.
returns
Returns a FeatureRDD.
def loadGtf(pathName: String, optSequenceDictionary: Option[SequenceDictionary] = None, optMinPartitions: Option[Int] = None, stringency: ValidationStringency = ValidationStringency.STRICT): FeatureRDD

Load a path name in GTF/GFF2 format into a FeatureRDD.
Load a path name in GTF/GFF2 format into a FeatureRDD.
pathName
The path name to load features in GTF/GFF2 format from. Globs/directories are supported.
optSequenceDictionary
Optional sequence dictionary. Defaults to None.
optMinPartitions
An optional minimum number of partitions to load. If not set, falls back to the configured Spark default parallelism. Defaults to None.
stringency
The validation stringency to use when validating GTF/GFF2 format. Defaults to ValidationStringency.STRICT.
returns
Returns a FeatureRDD.
def loadIndexedBam(pathName: String, viewRegions: Iterable[ReferenceRegion], stringency: ValidationStringency = ValidationStringency.STRICT)(implicit s: DummyImplicit): AlignmentRecordRDD

Functions like loadBam, but uses BAM index files to look at fewer blocks, and only returns records within the specified ReferenceRegions.
Functions like loadBam, but uses BAM index files to look at fewer blocks, and only returns records within the specified ReferenceRegions. BAM index file required.
pathName
The path name to load indexed BAM formatted alignment records from. Globs/directories are supported.
viewRegions
Iterable of ReferenceRegion we are filtering on.
stringency
The validation stringency to use when validating the BAM/CRAM/SAM format header. Defaults to ValidationStringency.STRICT.
returns
Returns an AlignmentRecordRDD which wraps the RDD of alignment records, sequence dictionary representing contigs the alignment records may be aligned to, and the record group dictionary for the alignment records if one is available.
def loadIndexedBam(pathName: String, viewRegion: ReferenceRegion): AlignmentRecordRDD

Functions like loadBam, but uses BAM index files to look at fewer blocks, and only returns records within a specified ReferenceRegion.
Functions like loadBam, but uses BAM index files to look at fewer blocks, and only returns records within a specified ReferenceRegion. BAM index file required.
pathName
The path name to load indexed BAM formatted alignment records from. Globs/directories are supported.
viewRegion
The ReferenceRegion we are filtering on.
returns
Returns an AlignmentRecordRDD which wraps the RDD of alignment records, sequence dictionary representing contigs the alignment records may be aligned to, and the record group dictionary for the alignment records if one is available.
def loadIndexedVcf(pathName: String, viewRegions: Iterable[ReferenceRegion], stringency: ValidationStringency = ValidationStringency.STRICT)(implicit s: DummyImplicit): VariantContextRDD

Load variant context records from VCF indexed by tabix (tbi) into a VariantContextRDD.
Load variant context records from VCF indexed by tabix (tbi) into a VariantContextRDD.
pathName
The path name to load VCF variant context records from. Globs/directories are supported.
viewRegions
Iterator of ReferenceRegions we are filtering on.
stringency
The validation stringency to use when validating VCF format. Defaults to ValidationStringency.STRICT.
returns
Returns a VariantContextRDD.
def loadIndexedVcf(pathName: String, viewRegion: ReferenceRegion): VariantContextRDD

Load variant context records from VCF indexed by tabix (tbi) into a VariantContextRDD.
Load variant context records from VCF indexed by tabix (tbi) into a VariantContextRDD.
pathName
The path name to load VCF variant context records from. Globs/directories are supported.
viewRegion
ReferenceRegion we are filtering on.
returns
Returns a VariantContextRDD.
def loadInterleavedFastq(pathName: String): AlignmentRecordRDD

Load unaligned alignment records from interleaved FASTQ into an AlignmentRecordRDD.
Load unaligned alignment records from interleaved FASTQ into an AlignmentRecordRDD.
In interleaved FASTQ, the two reads from a paired sequencing protocol are interleaved in a single file. This is a zipped representation of the typical paired FASTQ.
pathName
The path name to load unaligned alignment records from. Globs/directories are supported.
returns
Returns an unaligned AlignmentRecordRDD.
def loadInterleavedFastqAsFragments(pathName: String): FragmentRDD

Load paired unaligned alignment records grouped by sequencing fragment from interleaved FASTQ into an FragmentRDD.
Load paired unaligned alignment records grouped by sequencing fragment from interleaved FASTQ into an FragmentRDD.
In interleaved FASTQ, the two reads from a paired sequencing protocol are interleaved in a single file. This is a zipped representation of the typical paired FASTQ.
Fragments represent all of the reads from a single sequenced fragment as a single object, which is a useful representation for some tasks.
pathName
The path name to load unaligned alignment records from. Globs/directories are supported.
returns
Returns a FragmentRDD containing the paired reads grouped by sequencing fragment.
def loadIntervalList(pathName: String, optMinPartitions: Option[Int] = None, stringency: ValidationStringency = ValidationStringency.STRICT): FeatureRDD

Load a path name in IntervalList format into a FeatureRDD.
Load a path name in IntervalList format into a FeatureRDD.
pathName
The path name to load features in IntervalList format from. Globs/directories are supported.
optMinPartitions
An optional minimum number of partitions to load. If not set, falls back to the configured Spark default parallelism. Defaults to None.
stringency
The validation stringency to use when validating IntervalList format. Defaults to ValidationStringency.STRICT.
returns
Returns a FeatureRDD.
def loadNarrowPeak(pathName: String, optSequenceDictionary: Option[SequenceDictionary] = None, optMinPartitions: Option[Int] = None, stringency: ValidationStringency = ValidationStringency.STRICT): FeatureRDD

Load a path name in NarrowPeak format into a FeatureRDD.
Load a path name in NarrowPeak format into a FeatureRDD.
pathName
The path name to load features in NarrowPeak format from. Globs/directories are supported.
optSequenceDictionary
Optional sequence dictionary. Defaults to None.
optMinPartitions
An optional minimum number of partitions to load. If not set, falls back to the configured Spark default parallelism. Defaults to None.
stringency
The validation stringency to use when validating NarrowPeak format. Defaults to ValidationStringency.STRICT.
returns
Returns a FeatureRDD.
def loadPairedFastq(pathName1: String, pathName2: String, optRecordGroup: Option[String] = None, stringency: ValidationStringency = ValidationStringency.STRICT): AlignmentRecordRDD

Load unaligned alignment records from paired FASTQ into an AlignmentRecordRDD.
Load unaligned alignment records from paired FASTQ into an AlignmentRecordRDD.
pathName1
The path name to load the first set of unaligned alignment records from. Globs/directories are supported.
pathName2
The path name to load the second set of unaligned alignment records from. Globs/directories are supported.
optRecordGroup
The optional record group name to associate to the unaligned alignment records. Defaults to None.
stringency
The validation stringency to use when validating paired FASTQ format. Defaults to ValidationStringency.STRICT.
returns
Returns an unaligned AlignmentRecordRDD.
def loadParquet[T](pathName: String, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None)(implicit ev1: (T) ⇒ SpecificRecord, ev2: Manifest[T]): RDD[T]

Load a path name in Parquet + Avro format into an RDD.
Load a path name in Parquet + Avro format into an RDD.
T
The type of records to return.
pathName
The path name to load Parquet + Avro formatted data from. Globs/directories are supported.
optPredicate
An optional pushdown predicate to use when reading Parquet + Avro. Defaults to None.
optProjection
An option projection schema to use when reading Parquet + Avro. Defaults to None.
returns
An RDD with records of the specified type.
def loadParquetAlignments(pathName: String, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None): AlignmentRecordRDD

Load a path name in Parquet + Avro format into an AlignmentRecordRDD.
Load a path name in Parquet + Avro format into an AlignmentRecordRDD.
pathName
The path name to load alignment records from. Globs/directories are supported.
optPredicate
An optional pushdown predicate to use when reading Parquet + Avro. Defaults to None.
optProjection
An option projection schema to use when reading Parquet + Avro. Defaults to None.
returns
Returns an AlignmentRecordRDD which wraps the RDD of alignment records, sequence dictionary representing contigs the alignment records may be aligned to, and the record group dictionary for the alignment records if one is available.

Note
The sequence dictionary is read from an Avro file stored at pathName/_seqdict.avro and the record group dictionary is read from an Avro file stored at pathName/_rgdict.avro. These files are pure Avro, not Parquet + Avro.
def loadParquetContigFragments(pathName: String, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None): NucleotideContigFragmentRDD

Load a path name in Parquet + Avro format into a NucleotideContigFragmentRDD.
Load a path name in Parquet + Avro format into a NucleotideContigFragmentRDD.
pathName
The path name to load nucleotide contig fragments from. Globs/directories are supported.
optPredicate
An optional pushdown predicate to use when reading Parquet + Avro. Defaults to None.
optProjection
An option projection schema to use when reading Parquet + Avro. Defaults to None.
returns
Returns a NucleotideContigFragmentRDD.
def loadParquetCoverage(pathName: String, optPredicate: Option[FilterPredicate] = None, forceRdd: Boolean = false): CoverageRDD

Load a path name in Parquet + Avro format into a FeatureRDD and convert to a CoverageRDD.
Load a path name in Parquet + Avro format into a FeatureRDD and convert to a CoverageRDD. Coverage is stored in the score field of Feature.
pathName
The path name to load features from. Globs/directories are supported.
optPredicate
An optional pushdown predicate to use when reading Parquet + Avro. Defaults to None.
forceRdd
Forces loading the RDD.
returns
Returns a FeatureRDD converted to a CoverageRDD.
def loadParquetFeatures(pathName: String, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None): FeatureRDD

Load a path name in Parquet + Avro format into a FeatureRDD.
Load a path name in Parquet + Avro format into a FeatureRDD.
pathName
The path name to load features from. Globs/directories are supported.
optPredicate
An optional pushdown predicate to use when reading Parquet + Avro. Defaults to None.
optProjection
An option projection schema to use when reading Parquet + Avro. Defaults to None.
returns
Returns a FeatureRDD.
def loadParquetFragments(pathName: String, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None): FragmentRDD

Load a path name in Parquet + Avro format into a FragmentRDD.
Load a path name in Parquet + Avro format into a FragmentRDD.
pathName
The path name to load fragments from. Globs/directories are supported.
optPredicate
An optional pushdown predicate to use when reading Parquet + Avro. Defaults to None.
optProjection
An option projection schema to use when reading Parquet + Avro. Defaults to None.
returns
Returns a FragmentRDD.
def loadParquetGenotypes(pathName: String, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None): GenotypeRDD

Load a path name in Parquet + Avro format into a GenotypeRDD.
Load a path name in Parquet + Avro format into a GenotypeRDD.
pathName
The path name to load genotypes from. Globs/directories are supported.
optPredicate
An optional pushdown predicate to use when reading Parquet + Avro. Defaults to None.
optProjection
An option projection schema to use when reading Parquet + Avro. Defaults to None.
returns
Returns a GenotypeRDD.
def loadParquetVariants(pathName: String, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None): VariantRDD

Load a path name in Parquet + Avro format into a VariantRDD.
Load a path name in Parquet + Avro format into a VariantRDD.
pathName
The path name to load variants from. Globs/directories are supported.
optPredicate
An optional pushdown predicate to use when reading Parquet + Avro. Defaults to None.
optProjection
An option projection schema to use when reading Parquet + Avro. Defaults to None.
returns
Returns a VariantRDD.
def loadReferenceFile(pathName: String, maximumLength: Long): ReferenceFile

Load reference sequences into a broadcastable ReferenceFile.
Load reference sequences into a broadcastable ReferenceFile.
If the path name has a .2bit extension, loads a 2bit file. Else, uses loadContigFragments to load the reference as an RDD, which is then collected to the driver.
pathName
The path name to load reference sequences from. Globs/directories for 2bit format are not supported.
maximumLength
Maximum fragment length. Defaults to 10000L. Values greater than 1e9 should be avoided.
returns
Returns a broadcastable ReferenceFile.

See also
loadContigFragments
def loadSequenceDictionary(pathName: String): SequenceDictionary

Load a sequence dictionary.
Load a sequence dictionary.
Loads path names ending in: * .dict as HTSJDK sequence dictionary format, * .genome as Bedtools genome file format, * .txt as UCSC Genome Browser chromInfo files.
Compressed files are supported through compression codecs configured in Hadoop, which by default include .gz and .bz2, but can include more.
pathName
The path name to load a sequence dictionary from.
returns
Returns a sequence dictionary.

Exceptions thrown
IllegalArgumentException
if pathName file extension not one of .dict, .genome, or .txt
def loadUnpairedFastq(pathName: String, setFirstOfPair: Boolean = false, setSecondOfPair: Boolean = false, optRecordGroup: Option[String] = None, stringency: ValidationStringency = ValidationStringency.STRICT): AlignmentRecordRDD

Load unaligned alignment records from unpaired FASTQ into an AlignmentRecordRDD.
Load unaligned alignment records from unpaired FASTQ into an AlignmentRecordRDD.
pathName
The path name to load unaligned alignment records from. Globs/directories are supported.
setFirstOfPair
If true, sets the unaligned alignment record as first from the fragment. Defaults to false.
setSecondOfPair
If true, sets the unaligned alignment record as second from the fragment. Defaults to false.
optRecordGroup
The optional record group name to associate to the unaligned alignment records. Defaults to None.
stringency
The validation stringency to use when validating unpaired FASTQ format. Defaults to ValidationStringency.STRICT.
returns
Returns an unaligned AlignmentRecordRDD.
def loadVariants(pathName: String, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None, stringency: ValidationStringency = ValidationStringency.STRICT): VariantRDD

Load variants into a VariantRDD.
Load variants into a VariantRDD.
If the path name has a .vcf/.vcf.gz/.vcf.bgz extension, load as VCF format. Else, fall back to Parquet + Avro.
pathName
The path name to load variants from. Globs/directories are supported, although file extension must be present for VCF format.
optPredicate
An optional pushdown predicate to use when reading Parquet + Avro. Defaults to None.
optProjection
An option projection schema to use when reading Parquet + Avro. Defaults to None.
stringency
The validation stringency to use when validating VCF format. Defaults to ValidationStringency.STRICT.
returns
Returns a VariantRDD.

See also
loadParquetVariants
loadVcf
def loadVcf(pathName: String, stringency: ValidationStringency = ValidationStringency.STRICT): VariantContextRDD

Load variant context records from VCF into a VariantContextRDD.
Load variant context records from VCF into a VariantContextRDD.
pathName
The path name to load VCF variant context records from. Globs/directories are supported.
stringency
The validation stringency to use when validating VCF format. Defaults to ValidationStringency.STRICT.
returns
Returns a VariantContextRDD.
def loadVcfWithProjection(pathName: String, infoFields: Set[String], formatFields: Set[String], stringency: ValidationStringency = ValidationStringency.STRICT): VariantContextRDD

Load variant context records from VCF into a VariantContextRDD.
Load variant context records from VCF into a VariantContextRDD.
Only converts the core Genotype/Variant fields, and the fields set in the requested projection. Core variant fields include:
* Names (ID) * Filters (FILTER)
Core genotype fields include:
* Allelic depth (AD) * Read depth (DP) * Min read depth (MIN_DP) * Genotype quality (GQ) * Genotype likelihoods (GL/PL) * Strand bias components (SB) * Phase info (PS,PQ)
pathName
The path name to load VCF variant context records from. Globs/directories are supported.
infoFields
The info fields to include, in addition to the ID and FILTER attributes.
formatFields
The format fields to include, in addition to the core fields listed above.
stringency
The validation stringency to use when validating VCF format. Defaults to ValidationStringency.STRICT.
returns
Returns a VariantContextRDD.
def log: Logger

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logName: String

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
val sc: SparkContext

The SparkContext to wrap.
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

class ADAMContext extends Serializable with Logging

Instance Constructors

new ADAMContext(sc: SparkContext)

Value Members

final def !=(arg0: AnyRef): Boolean

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: AnyRef): Boolean

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def clone(): AnyRef

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def getFiles(path: Path, fs: FileSystem): Array[Path]

def getFsAndFiles(path: Path): Array[Path]

def getFsAndFilesWithFilter(pathName: String, filter: PathFilter): Array[Path]

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

def isTraceEnabled(): Boolean

def loadAlignments(pathName: String, optPathName2: Option[String] = None, optRecordGroup: Option[String] = None, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None, stringency: ValidationStringency = ValidationStringency.STRICT): AlignmentRecordRDD

def loadBam(pathName: String, stringency: ValidationStringency = ValidationStringency.STRICT): AlignmentRecordRDD

def loadBed(pathName: String, optSequenceDictionary: Option[SequenceDictionary] = None, optMinPartitions: Option[Int] = None, stringency: ValidationStringency = ValidationStringency.STRICT): FeatureRDD

def loadContigFragments(pathName: String, maximumLength: Long = 10000L, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None): NucleotideContigFragmentRDD

def loadCoverage(pathName: String, optSequenceDictionary: Option[SequenceDictionary] = None, optMinPartitions: Option[Int] = None, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None, stringency: ValidationStringency = ValidationStringency.STRICT): CoverageRDD

def loadFasta(pathName: String, maximumLength: Long = 10000L): NucleotideContigFragmentRDD

def loadFastq(pathName1: String, optPathName2: Option[String], optRecordGroup: Option[String] = None, stringency: ValidationStringency = ValidationStringency.STRICT): AlignmentRecordRDD

def loadFeatures(pathName: String, optSequenceDictionary: Option[SequenceDictionary] = None, optMinPartitions: Option[Int] = None, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None, stringency: ValidationStringency = ValidationStringency.STRICT): FeatureRDD

def loadFragments(pathName: String, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None, stringency: ValidationStringency = ValidationStringency.STRICT): FragmentRDD

def loadGenotypes(pathName: String, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None, stringency: ValidationStringency = ValidationStringency.STRICT): GenotypeRDD

def loadGff3(pathName: String, optSequenceDictionary: Option[SequenceDictionary] = None, optMinPartitions: Option[Int] = None, stringency: ValidationStringency = ValidationStringency.STRICT): FeatureRDD

def loadGtf(pathName: String, optSequenceDictionary: Option[SequenceDictionary] = None, optMinPartitions: Option[Int] = None, stringency: ValidationStringency = ValidationStringency.STRICT): FeatureRDD

def loadIndexedBam(pathName: String, viewRegions: Iterable[ReferenceRegion], stringency: ValidationStringency = ValidationStringency.STRICT)(implicit s: DummyImplicit): AlignmentRecordRDD

def loadIndexedBam(pathName: String, viewRegion: ReferenceRegion): AlignmentRecordRDD

def loadIndexedVcf(pathName: String, viewRegions: Iterable[ReferenceRegion], stringency: ValidationStringency = ValidationStringency.STRICT)(implicit s: DummyImplicit): VariantContextRDD

def loadIndexedVcf(pathName: String, viewRegion: ReferenceRegion): VariantContextRDD

def loadInterleavedFastq(pathName: String): AlignmentRecordRDD

def loadInterleavedFastqAsFragments(pathName: String): FragmentRDD

def loadIntervalList(pathName: String, optMinPartitions: Option[Int] = None, stringency: ValidationStringency = ValidationStringency.STRICT): FeatureRDD

def loadNarrowPeak(pathName: String, optSequenceDictionary: Option[SequenceDictionary] = None, optMinPartitions: Option[Int] = None, stringency: ValidationStringency = ValidationStringency.STRICT): FeatureRDD

def loadPairedFastq(pathName1: String, pathName2: String, optRecordGroup: Option[String] = None, stringency: ValidationStringency = ValidationStringency.STRICT): AlignmentRecordRDD

def loadParquet[T](pathName: String, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None)(implicit ev1: (T) ⇒ SpecificRecord, ev2: Manifest[T]): RDD[T]

def loadParquetAlignments(pathName: String, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None): AlignmentRecordRDD

def loadParquetContigFragments(pathName: String, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None): NucleotideContigFragmentRDD

def loadParquetCoverage(pathName: String, optPredicate: Option[FilterPredicate] = None, forceRdd: Boolean = false): CoverageRDD

def loadParquetFeatures(pathName: String, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None): FeatureRDD

def loadParquetFragments(pathName: String, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None): FragmentRDD

def loadParquetGenotypes(pathName: String, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None): GenotypeRDD

def loadParquetVariants(pathName: String, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None): VariantRDD

def loadReferenceFile(pathName: String, maximumLength: Long): ReferenceFile

def loadSequenceDictionary(pathName: String): SequenceDictionary

def loadUnpairedFastq(pathName: String, setFirstOfPair: Boolean = false, setSecondOfPair: Boolean = false, optRecordGroup: Option[String] = None, stringency: ValidationStringency = ValidationStringency.STRICT): AlignmentRecordRDD

def loadVariants(pathName: String, optPredicate: Option[FilterPredicate] = None, optProjection: Option[Schema] = None, stringency: ValidationStringency = ValidationStringency.STRICT): VariantRDD

def loadVcf(pathName: String, stringency: ValidationStringency = ValidationStringency.STRICT): VariantContextRDD

def loadVcfWithProjection(pathName: String, infoFields: Set[String], formatFields: Set[String], stringency: ValidationStringency = ValidationStringency.STRICT): VariantContextRDD

def log: Logger

def logDebug(msg: ⇒ String, throwable: Throwable): Unit

def logDebug(msg: ⇒ String): Unit

def logError(msg: ⇒ String, throwable: Throwable): Unit

def logError(msg: ⇒ String): Unit

def logInfo(msg: ⇒ String, throwable: Throwable): Unit

def logInfo(msg: ⇒ String): Unit

def logName: String

def logTrace(msg: ⇒ String, throwable: Throwable): Unit

def logTrace(msg: ⇒ String): Unit

def logWarning(msg: ⇒ String, throwable: Throwable): Unit

def logWarning(msg: ⇒ String): Unit

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

val sc: SparkContext

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from Logging

Inherited from Serializable