Class/Object

com.acervera.osm4scala.spark

OsmPbfFormat

Related Docs: object OsmPbfFormat | package spark

Permalink

class OsmPbfFormat extends FileFormat with DataSourceRegister

FileFormat implementation to read OSM Pbf files.

Basically, it is implemented in three steps:

  1. Take every split file
  2. Search first block of data per split. 2. Extract all entities that are present in the split, starting from the first block found and ending at the last block whose header is in the split.
Example:
  1. Its usage is like other Spark connectors:

    scala> spark.read.format("osm.pbf").load("").select("id","latitude","longitude","tags").filter("type == 0 and size(tags) > 0").show(false)
    +------+------------------+------------------+-------------------------------------------------------+
    |id    |latitude          |longitude         |tags                                                   |
    +------+------------------+------------------+-------------------------------------------------------+
    |272629|39.01344329999999 |-77.03783400000007|{ref -> 31, highway -> motorway_junction}              |
    |278454|38.99543909999999 |-76.87885730000008|{noref -> yes, highway -> motorway_junction}           |
    |278495|39.09727710000001 |-76.8004246000001 |{noref -> yes, highway -> motorway_junction}           |
    |278499|39.10949280000001 |-76.7842974000001 |{noref -> yes, highway -> motorway_junction}           |
    |278665|39.13675140000001 |-76.75700080000009|{highway -> motorway_junction, noref -> yes}           |
    |278679|39.16433720000001 |-76.73629450000011|{noref -> yes, highway -> motorway_junction}           |
    |278702|39.20996720000001 |-76.68302190000011|{noref -> yes, highway -> motorway_junction}           |
    |281260|39.047928000000006|-77.15067590000012|{noref -> yes, highway -> motorway_junction}           |
    |281323|39.1811582        |-77.2515329000001 |{ref -> 15A, highway -> motorway_junction}             |
    |281359|39.152438         |-77.29614050000009|{highway -> traffic_signals}                           |
    |287905|38.843457699999995|-77.1106591000001 |{highway -> traffic_signals, traffic_signals -> signal}|
    |287913|38.85178319999999 |-77.13165830000011|{highway -> traffic_signals}                           |
    |287943|38.876419799999994|-77.05647270000011|{curve_geometry -> yes}                                |
    |390841|38.41534949999998 |-77.42648520000014|{ref -> 140, highway -> motorway_junction}             |
    |390920|38.333087899999974|-77.49828340000015|{ref -> 133, highway -> motorway_junction}             |
    |390955|38.29694369999998 |-77.50489810000018|{ref -> 130B, highway -> motorway_junction}            |
    |391002|38.24086209999997 |-77.50026980000017|{ref -> 126B, highway -> motorway_junction}            |
    |396346|37.97631839999997 |-77.49251700000009|{noref -> yes, highway -> motorway_junction}           |
    |396542|37.93273709999998 |-77.46779110000014|{ref -> 104, highway -> motorway_junction}             |
    |396693|37.84187889999999 |-77.45102540000016|{ref -> 98, highway -> motorway_junction}              |
    +------+------------------+------------------+-------------------------------------------------------+
    only showing top 20 rows
Note

Dataframe schema used is:

root
 |-- id: long (nullable = true)
 |-- type: byte (nullable = true)
 |-- latitude: double (nullable = true)
 |-- longitude: double (nullable = true)
 |-- nodes: array (nullable = true)
 |    |-- element: long (containsNull = true)
 |-- relations: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- id: long (nullable = true)
 |    |    |-- relationType: byte (nullable = true)
 |    |    |-- role: string (nullable = true)
 |-- tags: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)
 |-- info: struct (nullable = true)
 |    |-- version: integer (nullable = true)
 |    |-- timestamp: timestamp (nullable = true)
 |    |-- changeset: long (nullable = true)
 |    |-- userId: integer (nullable = true)
 |    |-- userName: string (nullable = true)
 |    |-- visible: boolean (nullable = true)
Linear Supertypes
DataSourceRegister, FileFormat, AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. OsmPbfFormat
  2. DataSourceRegister
  3. FileFormat
  4. AnyRef
  5. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Instance Constructors

  1. new OsmPbfFormat()

    Permalink

Value Members

  1. final def !=(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  4. final def asInstanceOf[T0]: T0

    Permalink
    Definition Classes
    Any
  5. def buildReader(sparkSession: SparkSession, dataSchema: StructType, partitionSchema: StructType, requiredSchema: StructType, filters: Seq[Filter], options: Map[String, String], hadoopConf: Configuration): (PartitionedFile) ⇒ Iterator[InternalRow]

    Permalink
    Attributes
    protected
    Definition Classes
    OsmPbfFormat → FileFormat
  6. def buildReaderWithPartitionValues(sparkSession: SparkSession, dataSchema: StructType, partitionSchema: StructType, requiredSchema: StructType, filters: Seq[Filter], options: Map[String, String], hadoopConf: Configuration): (PartitionedFile) ⇒ Iterator[InternalRow]

    Permalink
    Definition Classes
    FileFormat
  7. def clone(): AnyRef

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  8. final def eq(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  9. def equals(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  10. def finalize(): Unit

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  11. final def getClass(): Class[_]

    Permalink
    Definition Classes
    AnyRef → Any
  12. def hashCode(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  13. def inferSchema(sparkSession: SparkSession, options: Map[String, String], files: Seq[FileStatus]): Option[StructType]

    Permalink
    Definition Classes
    OsmPbfFormat → FileFormat
  14. final def isInstanceOf[T0]: Boolean

    Permalink
    Definition Classes
    Any
  15. def isSplitable(sparkSession: SparkSession, options: Map[String, String], path: Path): Boolean

    Permalink
    Definition Classes
    OsmPbfFormat → FileFormat
  16. final def ne(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  17. final def notify(): Unit

    Permalink
    Definition Classes
    AnyRef
  18. final def notifyAll(): Unit

    Permalink
    Definition Classes
    AnyRef
  19. def prepareWrite(sparkSession: SparkSession, job: Job, options: Map[String, String], dataSchema: StructType): OutputWriterFactory

    Permalink
    Definition Classes
    OsmPbfFormat → FileFormat
  20. def shortName(): String

    Permalink
    Definition Classes
    OsmPbfFormat → DataSourceRegister
  21. def supportBatch(sparkSession: SparkSession, dataSchema: StructType): Boolean

    Permalink
    Definition Classes
    FileFormat
  22. def supportDataType(dataType: DataType, isReadPath: Boolean): Boolean

    Permalink
    Definition Classes
    FileFormat
  23. final def synchronized[T0](arg0: ⇒ T0): T0

    Permalink
    Definition Classes
    AnyRef
  24. def toString(): String

    Permalink
    Definition Classes
    AnyRef → Any
  25. def vectorTypes(requiredSchema: StructType, partitionSchema: StructType, sqlConf: SQLConf): Option[Seq[String]]

    Permalink
    Definition Classes
    FileFormat
  26. final def wait(): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  27. final def wait(arg0: Long, arg1: Int): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  28. final def wait(arg0: Long): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )

Inherited from DataSourceRegister

Inherited from FileFormat

Inherited from AnyRef

Inherited from Any

Ungrouped