public class BaseRewriteDataFilesSpark3Action
extends java.lang.Object
RewriteDataFiles.FileGroupInfo, RewriteDataFiles.FileGroupRewriteResult, RewriteDataFiles.ResultMAX_CONCURRENT_FILE_GROUP_REWRITES, MAX_CONCURRENT_FILE_GROUP_REWRITES_DEFAULT, MAX_FILE_GROUP_SIZE_BYTES, MAX_FILE_GROUP_SIZE_BYTES_DEFAULT, PARTIAL_PROGRESS_ENABLED, PARTIAL_PROGRESS_ENABLED_DEFAULT, PARTIAL_PROGRESS_MAX_COMMITS, PARTIAL_PROGRESS_MAX_COMMITS_DEFAULT, TARGET_FILE_SIZE_BYTES, USE_STARTING_SEQUENCE_NUMBER, USE_STARTING_SEQUENCE_NUMBER_DEFAULT| Modifier | Constructor and Description |
|---|---|
protected |
BaseRewriteDataFilesSpark3Action(org.apache.spark.sql.SparkSession spark,
Table table) |
| Modifier and Type | Method and Description |
|---|---|
RewriteDataFiles |
binPack()
Choose BINPACK as a strategy for this rewrite operation
|
protected BinPackStrategy |
binPackStrategy()
The framework specific
BinPackStrategy |
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
buildManifestFileDF(Table table) |
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
buildManifestListDF(Table table) |
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
buildOtherMetadataFileDF(Table table) |
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
buildValidDataFileDF(Table table) |
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
buildValidMetadataFileDF(Table table) |
protected void |
commit(SnapshotUpdate<?> update) |
RewriteDataFiles.Result |
execute()
Executes this action.
|
RewriteDataFiles |
filter(Expression expression)
A user provided filter for determining which files will be considered by the rewrite strategy.
|
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
loadMetadataTable(Table table,
MetadataTableType type) |
protected JobGroupInfo |
newJobGroupInfo(java.lang.String groupId,
java.lang.String desc) |
protected Table |
newStaticTable(TableMetadata metadata,
FileIO io) |
ThisT |
option(java.lang.String name,
java.lang.String value)
Configures this action with an extra option.
|
protected java.util.Map<java.lang.String,java.lang.String> |
options() |
ThisT |
options(java.util.Map<java.lang.String,java.lang.String> newOptions)
Configures this action with extra options.
|
protected RewriteDataFiles |
self() |
ThisT |
snapshotProperty(java.lang.String property,
java.lang.String value)
Sets a summary property in the snapshot produced by this action.
|
RewriteDataFiles |
sort()
Choose SORT as a strategy for this rewrite operation using the table's sortOrder
|
RewriteDataFiles |
sort(SortOrder sortOrder)
Choose SORT as a strategy for this rewrite operation and manually specify the sortOrder to use
|
protected SortStrategy |
sortStrategy()
The framework specific
SortStrategy |
protected org.apache.spark.sql.SparkSession |
spark() |
protected org.apache.spark.api.java.JavaSparkContext |
sparkContext() |
protected Table |
table() |
protected <T> T |
withJobGroupInfo(JobGroupInfo info,
java.util.function.Supplier<T> supplier) |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, waitsnapshotPropertyprotected BaseRewriteDataFilesSpark3Action(org.apache.spark.sql.SparkSession spark,
Table table)
protected BinPackStrategy binPackStrategy()
BinPackStrategyprotected SortStrategy sortStrategy()
SortStrategyprotected RewriteDataFiles self()
protected Table table()
public RewriteDataFiles binPack()
RewriteDataFilesbinPack in interface RewriteDataFilespublic RewriteDataFiles sort(SortOrder sortOrder)
RewriteDataFilessort in interface RewriteDataFilessortOrder - user defined sortOrderpublic RewriteDataFiles sort()
RewriteDataFilessort in interface RewriteDataFilespublic RewriteDataFiles filter(Expression expression)
RewriteDataFilesfilter in interface RewriteDataFilesexpression - An iceberg expression used to determine which files will be considered for rewritingpublic RewriteDataFiles.Result execute()
Actionexecute in interface Action<RewriteDataFiles,RewriteDataFiles.Result>public ThisT snapshotProperty(java.lang.String property,
java.lang.String value)
SnapshotUpdatesnapshotProperty in interface SnapshotUpdate<ThisT,R>property - a snapshot property namevalue - a snapshot property valueprotected void commit(SnapshotUpdate<?> update)
protected org.apache.spark.sql.SparkSession spark()
protected org.apache.spark.api.java.JavaSparkContext sparkContext()
public ThisT option(java.lang.String name,
java.lang.String value)
ActionCertain actions allow users to control internal details of their execution via options.
public ThisT options(java.util.Map<java.lang.String,java.lang.String> newOptions)
ActionCertain actions allow users to control internal details of their execution via options.
protected java.util.Map<java.lang.String,java.lang.String> options()
protected <T> T withJobGroupInfo(JobGroupInfo info, java.util.function.Supplier<T> supplier)
protected JobGroupInfo newJobGroupInfo(java.lang.String groupId, java.lang.String desc)
protected Table newStaticTable(TableMetadata metadata, FileIO io)
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> buildValidDataFileDF(Table table)
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> buildManifestFileDF(Table table)
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> buildManifestListDF(Table table)
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> buildOtherMetadataFileDF(Table table)
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> buildValidMetadataFileDF(Table table)
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> loadMetadataTable(Table table, MetadataTableType type)