public class BaseRewriteDataFilesSpark3Action
extends java.lang.Object
RewriteDataFiles.FileGroupInfo, RewriteDataFiles.FileGroupRewriteResult, RewriteDataFiles.Result
MAX_CONCURRENT_FILE_GROUP_REWRITES, MAX_CONCURRENT_FILE_GROUP_REWRITES_DEFAULT, MAX_FILE_GROUP_SIZE_BYTES, MAX_FILE_GROUP_SIZE_BYTES_DEFAULT, PARTIAL_PROGRESS_ENABLED, PARTIAL_PROGRESS_ENABLED_DEFAULT, PARTIAL_PROGRESS_MAX_COMMITS, PARTIAL_PROGRESS_MAX_COMMITS_DEFAULT, TARGET_FILE_SIZE_BYTES, USE_STARTING_SEQUENCE_NUMBER, USE_STARTING_SEQUENCE_NUMBER_DEFAULT
Modifier | Constructor and Description |
---|---|
protected |
BaseRewriteDataFilesSpark3Action(org.apache.spark.sql.SparkSession spark,
Table table) |
Modifier and Type | Method and Description |
---|---|
RewriteDataFiles |
binPack()
Choose BINPACK as a strategy for this rewrite operation
|
protected BinPackStrategy |
binPackStrategy()
The framework specific
BinPackStrategy |
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
buildManifestFileDF(Table table) |
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
buildManifestListDF(Table table) |
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
buildOtherMetadataFileDF(Table table) |
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
buildValidDataFileDF(Table table) |
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
buildValidMetadataFileDF(Table table) |
protected void |
commit(SnapshotUpdate<?> update) |
RewriteDataFiles.Result |
execute()
Executes this action.
|
RewriteDataFiles |
filter(Expression expression)
A user provided filter for determining which files will be considered by the rewrite strategy.
|
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
loadMetadataTable(Table table,
MetadataTableType type) |
protected JobGroupInfo |
newJobGroupInfo(java.lang.String groupId,
java.lang.String desc) |
protected Table |
newStaticTable(TableMetadata metadata,
FileIO io) |
ThisT |
option(java.lang.String name,
java.lang.String value)
Configures this action with an extra option.
|
protected java.util.Map<java.lang.String,java.lang.String> |
options() |
ThisT |
options(java.util.Map<java.lang.String,java.lang.String> newOptions)
Configures this action with extra options.
|
protected RewriteDataFiles |
self() |
ThisT |
snapshotProperty(java.lang.String property,
java.lang.String value)
Sets a summary property in the snapshot produced by this action.
|
RewriteDataFiles |
sort()
Choose SORT as a strategy for this rewrite operation using the table's sortOrder
|
RewriteDataFiles |
sort(SortOrder sortOrder)
Choose SORT as a strategy for this rewrite operation and manually specify the sortOrder to use
|
protected SortStrategy |
sortStrategy()
The framework specific
SortStrategy |
protected org.apache.spark.sql.SparkSession |
spark() |
protected org.apache.spark.api.java.JavaSparkContext |
sparkContext() |
protected Table |
table() |
protected <T> T |
withJobGroupInfo(JobGroupInfo info,
java.util.function.Supplier<T> supplier) |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
snapshotProperty
protected BaseRewriteDataFilesSpark3Action(org.apache.spark.sql.SparkSession spark, Table table)
protected BinPackStrategy binPackStrategy()
BinPackStrategy
protected SortStrategy sortStrategy()
SortStrategy
protected RewriteDataFiles self()
protected Table table()
public RewriteDataFiles binPack()
RewriteDataFiles
binPack
in interface RewriteDataFiles
public RewriteDataFiles sort(SortOrder sortOrder)
RewriteDataFiles
sort
in interface RewriteDataFiles
sortOrder
- user defined sortOrderpublic RewriteDataFiles sort()
RewriteDataFiles
sort
in interface RewriteDataFiles
public RewriteDataFiles filter(Expression expression)
RewriteDataFiles
filter
in interface RewriteDataFiles
expression
- An iceberg expression used to determine which files will be considered for rewritingpublic RewriteDataFiles.Result execute()
Action
execute
in interface Action<RewriteDataFiles,RewriteDataFiles.Result>
public ThisT snapshotProperty(java.lang.String property, java.lang.String value)
SnapshotUpdate
snapshotProperty
in interface SnapshotUpdate<ThisT,R>
property
- a snapshot property namevalue
- a snapshot property valueprotected void commit(SnapshotUpdate<?> update)
protected org.apache.spark.sql.SparkSession spark()
protected org.apache.spark.api.java.JavaSparkContext sparkContext()
public ThisT option(java.lang.String name, java.lang.String value)
Action
Certain actions allow users to control internal details of their execution via options.
public ThisT options(java.util.Map<java.lang.String,java.lang.String> newOptions)
Action
Certain actions allow users to control internal details of their execution via options.
protected java.util.Map<java.lang.String,java.lang.String> options()
protected <T> T withJobGroupInfo(JobGroupInfo info, java.util.function.Supplier<T> supplier)
protected JobGroupInfo newJobGroupInfo(java.lang.String groupId, java.lang.String desc)
protected Table newStaticTable(TableMetadata metadata, FileIO io)
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> buildValidDataFileDF(Table table)
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> buildManifestFileDF(Table table)
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> buildManifestListDF(Table table)
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> buildOtherMetadataFileDF(Table table)
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> buildValidMetadataFileDF(Table table)
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> loadMetadataTable(Table table, MetadataTableType type)