MaRe

Instance Constructors

new MaRe(rdd: RDD[T])(implicit arg0: ClassTag[T])

rdd
input RDD

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def cache: MaRe[T]

Caches the underlying RDD in memory.
Caches the underlying RDD in memory.
returns
new MaRe object
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def collectReduce(inputMountPoint: MountPoint[T], outputMountPoint: MountPoint[T], imageName: String, command: String, localOutPath: String, forcePull: Boolean = false, intermediateStorageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK): Unit

:: Experimental :: First collects the data locally on disk, and then reduces and writes it to a local output path using a Docker container command.
:: Experimental :: First collects the data locally on disk, and then reduces and writes it to a local output path using a Docker container command. This is an experimental feature (use at your own risk).
inputMountPoint
mount point for the partitions that is passed to the containers
outputMountPoint
mount point where the processed partition is read back to Spark
imageName
Docker image name
command
Docker command
localOutPath
local output path
forcePull
if set to true the Docker image will be pulled even if present locally
intermediateStorageLevel
intermediate results storage level (default: MEMORY_AND_DISK)

Annotations
@Experimental()
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getNumPartitions: Int

Returns the number of partitions of the underlying RDD.
Returns the number of partitions of the underlying RDD.
returns
number of partitions of the underlying RDD
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
lazy val log: Logger

Attributes
protected
def map[U](inputMountPoint: MountPoint[T], outputMountPoint: MountPoint[U], imageName: String, command: String, forcePull: Boolean = false)(implicit arg0: ClassTag[U]): MaRe[U]

Maps each RDD partition through a Docker container command.
Maps each RDD partition through a Docker container command.
inputMountPoint
mount point for the partitions that is passed to the containers
outputMountPoint
mount point where the processed partition is read back to Spark
imageName
Docker image name
command
Docker command
forcePull
if set to true the Docker image will be pulled even if present locally
returns
new MaRe object
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
val rdd: RDD[T]

input RDD
def reduce(inputMountPoint: MountPoint[T], outputMountPoint: MountPoint[T], imageName: String, command: String, depth: Int = 2, forcePull: Boolean = false): MaRe[T]

Reduces the data to a single partition using a Docker container command.
Reduces the data to a single partition using a Docker container command. The command is applied using a tree reduce strategy.
inputMountPoint
mount point for the partitions that is passed to the containers
outputMountPoint
mount point where the processed partition is read back to Spark
imageName
Docker image name
command
Docker command
depth
depth of the reduce tree (default: 2, must be greater than or equal to 2)
forcePull
if set to true the Docker image will be pulled even if present locally
returns
new MaRe object
def repartition(numPartitions: Int): MaRe[T]

Repartitions the underlying RDD to the specified number of partitions.
Repartitions the underlying RDD to the specified number of partitions.
numPartitions
number of partitions for the underlying RDD
returns
new MaRe object
def repartitionBy(keyBy: (T) ⇒ Any, numPartitions: Int): MaRe[T]

Repartitions data according to keyBy and org.apache.spark.HashPartitioner.
Repartitions data according to keyBy and org.apache.spark.HashPartitioner.
keyBy
given a record computes a key
numPartitions
number of partitions for the resulting RDD
def repartitionBy(keyBy: (T) ⇒ Any, partitioner: Partitioner): MaRe[T]

Repartitions data according to keyBy and a custom partitioner.
Repartitions data according to keyBy and a custom partitioner.
keyBy
given a record computes a key
partitioner
custom partitioner
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Doc: package mare

class MaRe[T] extends Serializable

Instance Constructors

new MaRe(rdd: RDD[T])(implicit arg0: ClassTag[T])

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def cache: MaRe[T]

def clone(): AnyRef

def collectReduce(inputMountPoint: MountPoint[T], outputMountPoint: MountPoint[T], imageName: String, command: String, localOutPath: String, forcePull: Boolean = false, intermediateStorageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK): Unit

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def getNumPartitions: Int

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

lazy val log: Logger

def map[U](inputMountPoint: MountPoint[T], outputMountPoint: MountPoint[U], imageName: String, command: String, forcePull: Boolean = false)(implicit arg0: ClassTag[U]): MaRe[U]

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

val rdd: RDD[T]

def reduce(inputMountPoint: MountPoint[T], outputMountPoint: MountPoint[T], imageName: String, command: String, depth: Int = 2, forcePull: Boolean = false): MaRe[T]

def repartition(numPartitions: Int): MaRe[T]

def repartitionBy(keyBy: (T) ⇒ Any, numPartitions: Int): MaRe[T]

def repartitionBy(keyBy: (T) ⇒ Any, partitioner: Partitioner): MaRe[T]

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from Serializable

Inherited from Serializable

Inherited from AnyRef

Inherited from Any

Ungrouped