public class SparkSortStrategy extends SortStrategy
Modifier and Type | Field and Description |
---|---|
static java.lang.String |
COMPRESSION_FACTOR
The number of shuffle partitions and consequently the number of output files created by the
Spark Sort is based on the size of the input data files used in this rewrite operation.
|
DELETE_FILE_THRESHOLD, DELETE_FILE_THRESHOLD_DEFAULT, MAX_FILE_SIZE_BYTES, MAX_FILE_SIZE_DEFAULT_RATIO, MIN_FILE_SIZE_BYTES, MIN_FILE_SIZE_DEFAULT_RATIO, MIN_INPUT_FILES, MIN_INPUT_FILES_DEFAULT, REWRITE_ALL, REWRITE_ALL_DEFAULT
Constructor and Description |
---|
SparkSortStrategy(Table table,
org.apache.spark.sql.SparkSession spark) |
Modifier and Type | Method and Description |
---|---|
protected FileScanTaskSetManager |
manager() |
RewriteStrategy |
options(java.util.Map<java.lang.String,java.lang.String> options)
Sets options to be used with this strategy
|
protected FileRewriteCoordinator |
rewriteCoordinator() |
java.util.Set<DataFile> |
rewriteFiles(java.util.List<FileScanTask> filesToRewrite)
Method which will rewrite files based on this particular RewriteStrategy's algorithm.
|
protected double |
sizeEstimateMultiple() |
protected org.apache.spark.sql.catalyst.plans.logical.LogicalPlan |
sortPlan(org.apache.spark.sql.connector.distributions.Distribution distribution,
org.apache.spark.sql.connector.expressions.SortOrder[] ordering,
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan plan,
org.apache.spark.sql.internal.SQLConf conf) |
protected org.apache.spark.sql.SparkSession |
spark() |
Table |
table()
Returns the table being modified by this rewrite strategy
|
protected SparkTableCache |
tableCache() |
java.util.Set<java.lang.String> |
validOptions()
Returns a set of options which this rewrite strategy can use.
|
name, sortOrder, sortOrder, validateOptions
inputFileSize, numOutputFiles, planFileGroups, selectFilesToRewrite, splitSize, targetFileSize, writeMaxFileSize
public static final java.lang.String COMPRESSION_FACTOR
public SparkSortStrategy(Table table, org.apache.spark.sql.SparkSession spark)
public Table table()
RewriteStrategy
public java.util.Set<java.lang.String> validOptions()
RewriteStrategy
validOptions
in interface RewriteStrategy
validOptions
in class SortStrategy
public RewriteStrategy options(java.util.Map<java.lang.String,java.lang.String> options)
RewriteStrategy
options
in interface RewriteStrategy
options
in class SortStrategy
public java.util.Set<DataFile> rewriteFiles(java.util.List<FileScanTask> filesToRewrite)
RewriteStrategy
filesToRewrite
- a group of files to be rewritten togetherprotected org.apache.spark.sql.SparkSession spark()
protected org.apache.spark.sql.catalyst.plans.logical.LogicalPlan sortPlan(org.apache.spark.sql.connector.distributions.Distribution distribution, org.apache.spark.sql.connector.expressions.SortOrder[] ordering, org.apache.spark.sql.catalyst.plans.logical.LogicalPlan plan, org.apache.spark.sql.internal.SQLConf conf)
protected double sizeEstimateMultiple()
protected SparkTableCache tableCache()
protected FileScanTaskSetManager manager()
protected FileRewriteCoordinator rewriteCoordinator()