Packages

class Asn1DataSource extends FileFormat with DataSourceRegister

Spark FileFormat data source for ASN.1-encoded files.

Usage:

spark.read
  .format("asn1")
  .option("asn1.schema", "/path/to/Schema.asn1")
  .option("asn1.type", "MyMessage")
  .option("asn1.encoding", "ber")  // ber | der | per-aligned | per-unaligned | xer
  .load("/data/messages.ber")

Splitting large files

BER/DER files become splittable once a sidecar index is present:

import io.github.sparkasn1.spark.asn1.util.Asn1Indexer
Asn1Indexer.buildIndex(new Path("/data/messages.ber"), spark.sparkContext.hadoopConfiguration)
// Now spark.read.format("asn1")…load("/data/messages.ber") uses multiple tasks

PER fixed-length files are always splittable without an index.

Source
Asn1DataSource.scala
Linear Supertypes
DataSourceRegister, FileFormat, AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. Asn1DataSource
  2. DataSourceRegister
  3. FileFormat
  4. AnyRef
  5. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. Protected

Instance Constructors

  1. new Asn1DataSource()

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##: Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  5. def buildReader(sparkSession: SparkSession, dataSchema: StructType, partitionSchema: StructType, requiredSchema: StructType, filters: Seq[Filter], options: Map[String, String], hadoopConf: Configuration): (PartitionedFile) => Iterator[InternalRow]
    Definition Classes
    Asn1DataSource → FileFormat
  6. def buildReaderWithPartitionValues(sparkSession: SparkSession, dataSchema: StructType, partitionSchema: StructType, requiredSchema: StructType, filters: Seq[Filter], options: Map[String, String], hadoopConf: Configuration): (PartitionedFile) => Iterator[InternalRow]
    Definition Classes
    FileFormat
  7. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.CloneNotSupportedException]) @native()
  8. def createFileMetadataCol(): AttributeReference
    Definition Classes
    FileFormat
  9. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  10. def equals(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef → Any
  11. def fileConstantMetadataExtractors: Map[String, (PartitionedFile) => Any]
    Definition Classes
    FileFormat
  12. final def getClass(): Class[_ <: AnyRef]
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  13. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  14. def inferSchema(sparkSession: SparkSession, options: Map[String, String], files: Seq[FileStatus]): Option[StructType]
    Definition Classes
    Asn1DataSource → FileFormat
  15. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  16. def isSplitable(sparkSession: SparkSession, options: Map[String, String], path: Path): Boolean
    Definition Classes
    Asn1DataSource → FileFormat
  17. def metadataSchemaFields: Seq[StructField]
    Definition Classes
    FileFormat
  18. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  19. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  20. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  21. def prepareWrite(sparkSession: SparkSession, job: Job, options: Map[String, String], dataType: StructType): OutputWriterFactory
    Definition Classes
    Asn1DataSource → FileFormat
  22. def shortName(): String
    Definition Classes
    Asn1DataSource → DataSourceRegister
  23. def supportBatch(sparkSession: SparkSession, dataSchema: StructType): Boolean
    Definition Classes
    Asn1DataSource → FileFormat
  24. def supportDataType(dataType: DataType): Boolean
    Definition Classes
    FileFormat
  25. def supportFieldName(name: String): Boolean
    Definition Classes
    FileFormat
  26. final def synchronized[T0](arg0: => T0): T0
    Definition Classes
    AnyRef
  27. def toString(): String
    Definition Classes
    Asn1DataSource → AnyRef → Any
  28. def vectorTypes(requiredSchema: StructType, partitionSchema: StructType, sqlConf: SQLConf): Option[Seq[String]]
    Definition Classes
    FileFormat
  29. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.InterruptedException])
  30. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.InterruptedException])
  31. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.InterruptedException]) @native()

Deprecated Value Members

  1. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.Throwable]) @Deprecated
    Deprecated

    (Since version 9)

Inherited from DataSourceRegister

Inherited from FileFormat

Inherited from AnyRef

Inherited from Any

Ungrouped