public class BulkDataSetWriter
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
protected DataSet |
dataSet |
protected long |
heapConglom |
protected static org.apache.log4j.Logger |
LOG |
protected OperationContext |
operationContext |
protected TxnView |
txn |
Constructor and Description |
---|
BulkDataSetWriter() |
BulkDataSetWriter(DataSet dataset,
OperationContext operationContext,
long heapConglom,
TxnView txn) |
Modifier and Type | Method and Description |
---|---|
protected void |
bulkLoad(java.util.List<BulkImportPartition> bulkImportPartitions,
java.lang.String bulkImportDirectory,
java.lang.String prefix) |
protected void |
dumpCutPoints(java.util.List<scala.Tuple2<java.lang.Long,byte[][]>> cutPointsList,
java.lang.String bulkImportDirectory)
Output cut points to files
|
protected void |
getAllConglomerates(java.util.List<java.lang.Long> allCongloms,
java.util.ArrayList<com.splicemachine.ddl.DDLMessage.TentativeIndex> tentativeIndexList) |
static java.util.List<BulkImportPartition> |
getBulkImportPartitions(java.util.List<java.lang.Long> congloms,
java.lang.String bulkImportDirectory)
Get actual partition boundaries for each table and index
|
protected java.util.List<scala.Tuple2<java.lang.Long,byte[][]>> |
getCutPoints(double sampleFraction,
java.util.List<scala.Tuple2<java.lang.Long,scala.Tuple2<java.lang.Double,ColumnStatisticsImpl>>> statistics)
Calculate cut points according to statistics.
|
static org.apache.hadoop.fs.Path |
getRandomFilename(org.apache.hadoop.fs.Path dir)
Generate a file name
|
protected java.util.Map<java.lang.Long,scala.Tuple2<java.lang.Double,ColumnStatisticsImpl>> |
mergeResults(java.util.List<scala.Tuple2<java.lang.Long,scala.Tuple2<java.lang.Double,ColumnStatisticsImpl>>> tuples)
Merge statistics from each RDD partition
|
protected void |
partitionUsingRDDSortUsingDataFrame(java.util.List<BulkImportPartition> bulkImportPartitions,
DataSet rowAndIndexes,
HFileGenerationFunction hfileGenerationFunction) |
protected void |
splitTables(java.util.List<scala.Tuple2<java.lang.Long,byte[][]>> cutPointsList)
Split a table using cut points
|
protected DataSet dataSet
protected OperationContext operationContext
protected long heapConglom
protected TxnView txn
protected static final org.apache.log4j.Logger LOG
public BulkDataSetWriter()
public BulkDataSetWriter(DataSet dataset, OperationContext operationContext, long heapConglom, TxnView txn)
public static java.util.List<BulkImportPartition> getBulkImportPartitions(java.util.List<java.lang.Long> congloms, java.lang.String bulkImportDirectory) throws StandardException
congloms
- StandardException
public static org.apache.hadoop.fs.Path getRandomFilename(org.apache.hadoop.fs.Path dir) throws java.io.IOException
dir
- java.io.IOException
protected void partitionUsingRDDSortUsingDataFrame(java.util.List<BulkImportPartition> bulkImportPartitions, DataSet rowAndIndexes, HFileGenerationFunction hfileGenerationFunction)
protected void bulkLoad(java.util.List<BulkImportPartition> bulkImportPartitions, java.lang.String bulkImportDirectory, java.lang.String prefix) throws StandardException
bulkImportPartitions
- StandardException
protected void getAllConglomerates(java.util.List<java.lang.Long> allCongloms, java.util.ArrayList<com.splicemachine.ddl.DDLMessage.TentativeIndex> tentativeIndexList) throws StandardException
StandardException
protected java.util.List<scala.Tuple2<java.lang.Long,byte[][]>> getCutPoints(double sampleFraction, java.util.List<scala.Tuple2<java.lang.Long,scala.Tuple2<java.lang.Double,ColumnStatisticsImpl>>> statistics) throws StandardException
statistics
- StandardException
protected void splitTables(java.util.List<scala.Tuple2<java.lang.Long,byte[][]>> cutPointsList) throws StandardException
cutPointsList
- StandardException
protected void dumpCutPoints(java.util.List<scala.Tuple2<java.lang.Long,byte[][]>> cutPointsList, java.lang.String bulkImportDirectory) throws StandardException
cutPointsList
- java.io.IOException
StandardException
protected java.util.Map<java.lang.Long,scala.Tuple2<java.lang.Double,ColumnStatisticsImpl>> mergeResults(java.util.List<scala.Tuple2<java.lang.Long,scala.Tuple2<java.lang.Double,ColumnStatisticsImpl>>> tuples) throws StandardException
tuples
- StandardException