java.lang.Object

org.apache.iceberg.spark.actions.SnapshotTableSparkAction

All Implemented Interfaces:: Action<SnapshotTable,SnapshotTable.Result>, SnapshotTable

public class SnapshotTableSparkAction extends Object implements SnapshotTable

Creates a new Iceberg table based on a source Spark table. The new Iceberg table will have a different data and metadata directory allowing it to exist independently of the source table.

Nested Class Summary

Nested classes/interfaces inherited from interface org.apache.iceberg.actions.SnapshotTable
SnapshotTable.Result
Field Summary

Fields

Modifier and Type

Field

Description

protected static final org.apache.iceberg.relocated.com.google.common.base.Joiner

COMMA_JOINER

protected static final org.apache.iceberg.relocated.com.google.common.base.Splitter

COMMA_SPLITTER

protected static final List<String>

EXCLUDED_PROPERTIES

protected static final String

FILE_PATH

protected static final String

ICEBERG_METADATA_FOLDER

protected static final String

LAST_MODIFIED

protected static final String

LOCATION

protected static final String

MANIFEST

protected static final String

MANIFEST_LIST

protected static final String

OTHERS

protected static final String

STATISTICS_FILES
Method Summary

Modifier and Type

Method

Description

protected Map<String,String>

additionalProperties()

protected org.apache.spark.sql.Dataset<FileInfo>

allReachableOtherMetadataFileDS(Table table)

SnapshotTableSparkAction

as(String ident)

Sets the table identifier for the newly created Iceberg table.

protected org.apache.spark.sql.connector.catalog.StagingTableCatalog

checkDestinationCatalog(org.apache.spark.sql.connector.catalog.CatalogPlugin catalog)

protected org.apache.spark.sql.connector.catalog.TableCatalog

checkSourceCatalog(org.apache.spark.sql.connector.catalog.CatalogPlugin catalog)

protected org.apache.spark.sql.Dataset<FileInfo>

contentFileDS(Table table)

protected org.apache.spark.sql.Dataset<FileInfo>

contentFileDS(Table table, Set<Long> snapshotIds)

protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary

deleteFiles(ExecutorService executorService, Consumer<String> deleteFunc, Iterator<FileInfo> files)

Deletes files and keeps track of how many files were removed for each file type.

protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary

deleteFiles(SupportsBulkOperations io, Iterator<FileInfo> files)

protected org.apache.spark.sql.connector.catalog.StagingTableCatalog

destCatalog()

protected org.apache.spark.sql.connector.catalog.Identifier

destTableIdent()

protected Map<String,String>

destTableProps()

protected void

ensureNameMappingPresent(Table table)

SnapshotTable.Result

execute()

Executes this action.

SnapshotTableSparkAction

executeWith(ExecutorService service)

Sets the executor service to use for parallel file reading.

protected String

getMetadataLocation(Table table)

protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>

loadMetadataTable(Table table, MetadataTableType type)

protected org.apache.spark.sql.Dataset<FileInfo>

manifestDS(Table table)

protected org.apache.spark.sql.Dataset<FileInfo>

manifestDS(Table table, Set<Long> snapshotIds)

protected org.apache.spark.sql.Dataset<FileInfo>

manifestListDS(Table table)

protected org.apache.spark.sql.Dataset<FileInfo>

manifestListDS(Table table, Set<Long> snapshotIds)

protected JobGroupInfo

newJobGroupInfo(String groupId, String desc)

protected Table

newStaticTable(TableMetadata metadata, FileIO io)

SnapshotTableSparkAction

option(String name, String value)

protected Map<String,String>

options()

SnapshotTableSparkAction

options(Map<String,String> newOptions)

protected org.apache.spark.sql.Dataset<FileInfo>

otherMetadataFileDS(Table table)

protected SnapshotTableSparkAction

self()

protected void

setProperties(Map<String,String> properties)

protected void

setProperty(String key, String value)

protected org.apache.spark.sql.connector.catalog.TableCatalog

sourceCatalog()

protected org.apache.spark.sql.connector.catalog.Identifier

sourceTableIdent()

protected String

sourceTableLocation()

protected org.apache.spark.sql.SparkSession

spark()

protected org.apache.spark.api.java.JavaSparkContext

sparkContext()

protected StagedSparkTable

stageDestTable()

protected org.apache.spark.sql.Dataset<FileInfo>

statisticsFileDS(Table table, Set<Long> snapshotIds)

SnapshotTableSparkAction

tableLocation(String location)

Sets the table location for the newly created Iceberg table.

SnapshotTableSparkAction

tableProperties(Map<String,String> properties)

Sets table properties in the newly created Iceberg table.

SnapshotTableSparkAction

tableProperty(String property, String value)

Sets a table property in the newly created Iceberg table.

protected org.apache.spark.sql.catalyst.catalog.CatalogTable

v1SourceTable()

protected <T> T

withJobGroupInfo(JobGroupInfo info, Supplier<T> supplier)

Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait

Methods inherited from interface org.apache.iceberg.actions.Action
option, options

Field Details
- LOCATION
  
  protected static final String LOCATION
  See Also:
  
  Constant Field Values
- ICEBERG_METADATA_FOLDER
  
  protected static final String ICEBERG_METADATA_FOLDER
  See Also:
  
  Constant Field Values
- EXCLUDED_PROPERTIES
  
  protected static final List<String> EXCLUDED_PROPERTIES
- MANIFEST
  
  protected static final String MANIFEST
  See Also:
  
  Constant Field Values
- MANIFEST_LIST
  
  protected static final String MANIFEST_LIST
  See Also:
  
  Constant Field Values
- STATISTICS_FILES
  
  protected static final String STATISTICS_FILES
  See Also:
  
  Constant Field Values
- OTHERS
  
  protected static final String OTHERS
  See Also:
  
  Constant Field Values
- FILE_PATH
  
  protected static final String FILE_PATH
  See Also:
  
  Constant Field Values
- LAST_MODIFIED
  
  protected static final String LAST_MODIFIED
  See Also:
  
  Constant Field Values
- COMMA_SPLITTER
  
  protected static final org.apache.iceberg.relocated.com.google.common.base.Splitter COMMA_SPLITTER
- COMMA_JOINER
  
  protected static final org.apache.iceberg.relocated.com.google.common.base.Joiner COMMA_JOINER
Method Details
- self
  
  protected SnapshotTableSparkAction self()
- destCatalog
  
  protected org.apache.spark.sql.connector.catalog.StagingTableCatalog destCatalog()
- destTableIdent
  
  protected org.apache.spark.sql.connector.catalog.Identifier destTableIdent()
- as
  
  public SnapshotTableSparkAction as(String ident)
  
  Description copied from interface: SnapshotTable
  
  Sets the table identifier for the newly created Iceberg table.
  
  Specified by:
  
  as in interface SnapshotTable
  
  Parameters:
  
  ident - the destination table identifier
  
  Returns:
  
  this for method chaining
- tableProperties
  
  public SnapshotTableSparkAction tableProperties(Map<String,String> properties)
  
  Description copied from interface: SnapshotTable
  
  Sets table properties in the newly created Iceberg table. Any properties with the same key name will be overwritten.
  
  Specified by:
  
  tableProperties in interface SnapshotTable
  
  Parameters:
  
  properties - a map of properties to be included
  
  Returns:
  
  this for method chaining
- tableProperty
  
  public SnapshotTableSparkAction tableProperty(String property, String value)
  
  Description copied from interface: SnapshotTable
  
  Sets a table property in the newly created Iceberg table. Any properties with the same key name will be overwritten.
  
  Specified by:
  
  tableProperty in interface SnapshotTable
  
  Parameters:
  
  property - the key of the property to add
  
  value - the value of the property to add
  
  Returns:
  
  this for method chaining
- executeWith
  
  public SnapshotTableSparkAction executeWith(ExecutorService service)
  
  Description copied from interface: SnapshotTable
  
  Sets the executor service to use for parallel file reading. The default is not using executor service.
  
  Specified by:
  
  executeWith in interface SnapshotTable
  
  Parameters:
  
  service - executor service
  
  Returns:
  
  this for method chaining
- execute
  
  public SnapshotTable.Result execute()
  
  Description copied from interface: Action
  
  Executes this action.
  
  Specified by:
  
  execute in interface Action<SnapshotTable,SnapshotTable.Result>
  
  Returns:
  
  the result of this action
- destTableProps
  
  protected Map<String,String> destTableProps()
- checkSourceCatalog
  
  protected org.apache.spark.sql.connector.catalog.TableCatalog checkSourceCatalog(org.apache.spark.sql.connector.catalog.CatalogPlugin catalog)
- tableLocation
  
  public SnapshotTableSparkAction tableLocation(String location)
  
  Description copied from interface: SnapshotTable
  
  Sets the table location for the newly created Iceberg table.
  
  Specified by:
  
  tableLocation in interface SnapshotTable
  
  Parameters:
  
  location - a table location
  
  Returns:
  
  this for method chaining
- sourceTableLocation
  
  protected String sourceTableLocation()
- v1SourceTable
  
  protected org.apache.spark.sql.catalyst.catalog.CatalogTable v1SourceTable()
- sourceCatalog
  
  protected org.apache.spark.sql.connector.catalog.TableCatalog sourceCatalog()
- sourceTableIdent
  
  protected org.apache.spark.sql.connector.catalog.Identifier sourceTableIdent()
- setProperties
  
  protected void setProperties(Map<String,String> properties)
- setProperty
  
  protected void setProperty(String key, String value)
- additionalProperties
  
  protected Map<String,String> additionalProperties()
- checkDestinationCatalog
  
  protected org.apache.spark.sql.connector.catalog.StagingTableCatalog checkDestinationCatalog(org.apache.spark.sql.connector.catalog.CatalogPlugin catalog)
- stageDestTable
  
  protected StagedSparkTable stageDestTable()
- ensureNameMappingPresent
  
  protected void ensureNameMappingPresent(Table table)
- getMetadataLocation
  
  protected String getMetadataLocation(Table table)
- spark
  
  protected org.apache.spark.sql.SparkSession spark()
- sparkContext
  
  protected org.apache.spark.api.java.JavaSparkContext sparkContext()
- option
  
  public SnapshotTableSparkAction option(String name, String value)
- options
  
  public SnapshotTableSparkAction options(Map<String,String> newOptions)
- options
  
  protected Map<String,String> options()
- withJobGroupInfo
  
  protected <T> T withJobGroupInfo(JobGroupInfo info, Supplier<T> supplier)
- newJobGroupInfo
  
  protected JobGroupInfo newJobGroupInfo(String groupId, String desc)
- newStaticTable
  
  protected Table newStaticTable(TableMetadata metadata, FileIO io)
- contentFileDS
  
  protected org.apache.spark.sql.Dataset<FileInfo> contentFileDS(Table table)
- contentFileDS
  
  protected org.apache.spark.sql.Dataset<FileInfo> contentFileDS(Table table, Set<Long> snapshotIds)
- manifestDS
  
  protected org.apache.spark.sql.Dataset<FileInfo> manifestDS(Table table)
- manifestDS
  
  protected org.apache.spark.sql.Dataset<FileInfo> manifestDS(Table table, Set<Long> snapshotIds)
- manifestListDS
  
  protected org.apache.spark.sql.Dataset<FileInfo> manifestListDS(Table table)
- manifestListDS
  
  protected org.apache.spark.sql.Dataset<FileInfo> manifestListDS(Table table, Set<Long> snapshotIds)
- statisticsFileDS
  
  protected org.apache.spark.sql.Dataset<FileInfo> statisticsFileDS(Table table, Set<Long> snapshotIds)
- otherMetadataFileDS
  
  protected org.apache.spark.sql.Dataset<FileInfo> otherMetadataFileDS(Table table)
- allReachableOtherMetadataFileDS
  
  protected org.apache.spark.sql.Dataset<FileInfo> allReachableOtherMetadataFileDS(Table table)
- loadMetadataTable
  
  protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> loadMetadataTable(Table table, MetadataTableType type)
- deleteFiles
  
  protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary deleteFiles(ExecutorService executorService, Consumer<String> deleteFunc, Iterator<FileInfo> files)
  
  Deletes files and keeps track of how many files were removed for each file type.
  
  Parameters:
  
  executorService - an executor service to use for parallel deletes
  
  deleteFunc - a delete func
  
  files - an iterator of Spark rows of the structure (path: String, type: String)
  
  Returns:
  
  stats on which files were deleted
- deleteFiles
  
  protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary deleteFiles(SupportsBulkOperations io, Iterator<FileInfo> files)

Class SnapshotTableSparkAction

Nested Class Summary

Nested classes/interfaces inherited from interface org.apache.iceberg.actions.SnapshotTable

Field Summary

Method Summary

Methods inherited from class java.lang.Object

Methods inherited from interface org.apache.iceberg.actions.Action

Field Details

LOCATION

ICEBERG_METADATA_FOLDER

EXCLUDED_PROPERTIES

MANIFEST

MANIFEST_LIST

STATISTICS_FILES

OTHERS

FILE_PATH

LAST_MODIFIED

COMMA_SPLITTER

COMMA_JOINER

Method Details

self

destCatalog

destTableIdent

as

tableProperties

tableProperty

executeWith

execute

destTableProps

checkSourceCatalog

tableLocation

sourceTableLocation

v1SourceTable

sourceCatalog

sourceTableIdent

setProperties

setProperty

additionalProperties

checkDestinationCatalog

stageDestTable

ensureNameMappingPresent

getMetadataLocation

spark

sparkContext

option

options

options

withJobGroupInfo

newJobGroupInfo

newStaticTable

contentFileDS

contentFileDS

manifestDS

manifestDS

manifestListDS

manifestListDS

statisticsFileDS

otherMetadataFileDS

allReachableOtherMetadataFileDS

loadMetadataTable

deleteFiles

deleteFiles