Package org.apache.iceberg.spark.actions
Class MigrateTableSparkAction
- java.lang.Object
-
- org.apache.iceberg.spark.actions.MigrateTableSparkAction
-
- All Implemented Interfaces:
Action<MigrateTable,MigrateTable.Result>,MigrateTable
public class MigrateTableSparkAction extends java.lang.Object implements MigrateTable
Takes a Spark table in the source catalog and attempts to transform it into an Iceberg table in the same location with the same identifier. Once complete the identifier which previously referred to a non-Iceberg table will refer to the newly migrated Iceberg table.
-
-
Nested Class Summary
-
Nested classes/interfaces inherited from interface org.apache.iceberg.actions.MigrateTable
MigrateTable.Result
-
-
Field Summary
Fields Modifier and Type Field Description protected static java.lang.StringCONTENT_FILEprotected static java.util.List<java.lang.String>EXCLUDED_PROPERTIESprotected static java.lang.StringFILE_PATHprotected static java.lang.StringFILE_TYPEprotected static java.lang.StringICEBERG_METADATA_FOLDERprotected static java.lang.StringLAST_MODIFIEDprotected static java.lang.StringLOCATIONprotected static java.lang.StringMANIFESTprotected static java.lang.StringMANIFEST_LISTprotected static java.lang.StringOTHERS
-
Method Summary
All Methods Instance Methods Concrete Methods Modifier and Type Method Description protected java.util.Map<java.lang.String,java.lang.String>additionalProperties()protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>buildAllReachableOtherMetadataFileDF(Table table)protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>buildManifestFileDF(Table table)protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>buildManifestListDF(Table table)protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>buildOtherMetadataFileDF(Table table)protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>buildValidContentFileDF(Table table)protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>buildValidContentFileWithTypeDF(Table table)protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>buildValidMetadataFileDF(Table table)protected org.apache.spark.sql.connector.catalog.StagingTableCatalogcheckDestinationCatalog(org.apache.spark.sql.connector.catalog.CatalogPlugin catalog)protected org.apache.spark.sql.connector.catalog.TableCatalogcheckSourceCatalog(org.apache.spark.sql.connector.catalog.CatalogPlugin catalog)protected org.apache.spark.sql.connector.catalog.StagingTableCatalogdestCatalog()protected org.apache.spark.sql.connector.catalog.IdentifierdestTableIdent()protected java.util.Map<java.lang.String,java.lang.String>destTableProps()protected voidensureNameMappingPresent(Table table)MigrateTable.Resultexecute()Executes this action.protected java.lang.StringgetMetadataLocation(Table table)protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>loadMetadataTable(Table table, MetadataTableType type)protected JobGroupInfonewJobGroupInfo(java.lang.String groupId, java.lang.String desc)protected TablenewStaticTable(TableMetadata metadata, FileIO io)ThisToption(java.lang.String name, java.lang.String value)protected java.util.Map<java.lang.String,java.lang.String>options()ThisToptions(java.util.Map<java.lang.String,java.lang.String> newOptions)protected MigrateTableSparkActionself()protected voidsetProperties(java.util.Map<java.lang.String,java.lang.String> properties)protected voidsetProperty(java.lang.String key, java.lang.String value)protected org.apache.spark.sql.connector.catalog.TableCatalogsourceCatalog()protected org.apache.spark.sql.connector.catalog.IdentifiersourceTableIdent()protected java.lang.StringsourceTableLocation()protected org.apache.spark.sql.SparkSessionspark()protected org.apache.spark.api.java.JavaSparkContextsparkContext()protected StagedSparkTablestageDestTable()MigrateTableSparkActiontableProperties(java.util.Map<java.lang.String,java.lang.String> properties)Sets table properties in the newly created Iceberg table.MigrateTableSparkActiontableProperty(java.lang.String property, java.lang.String value)Sets a table property in the newly created Iceberg table.protected org.apache.spark.sql.catalyst.catalog.CatalogTablev1SourceTable()protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>withFileType(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> ds, java.lang.String type)protected <T> TwithJobGroupInfo(JobGroupInfo info, java.util.function.Supplier<T> supplier)
-
-
-
Field Detail
-
LOCATION
protected static final java.lang.String LOCATION
- See Also:
- Constant Field Values
-
ICEBERG_METADATA_FOLDER
protected static final java.lang.String ICEBERG_METADATA_FOLDER
- See Also:
- Constant Field Values
-
EXCLUDED_PROPERTIES
protected static final java.util.List<java.lang.String> EXCLUDED_PROPERTIES
-
CONTENT_FILE
protected static final java.lang.String CONTENT_FILE
- See Also:
- Constant Field Values
-
MANIFEST
protected static final java.lang.String MANIFEST
- See Also:
- Constant Field Values
-
MANIFEST_LIST
protected static final java.lang.String MANIFEST_LIST
- See Also:
- Constant Field Values
-
OTHERS
protected static final java.lang.String OTHERS
- See Also:
- Constant Field Values
-
FILE_PATH
protected static final java.lang.String FILE_PATH
- See Also:
- Constant Field Values
-
FILE_TYPE
protected static final java.lang.String FILE_TYPE
- See Also:
- Constant Field Values
-
LAST_MODIFIED
protected static final java.lang.String LAST_MODIFIED
- See Also:
- Constant Field Values
-
-
Method Detail
-
self
protected MigrateTableSparkAction self()
-
destCatalog
protected org.apache.spark.sql.connector.catalog.StagingTableCatalog destCatalog()
-
destTableIdent
protected org.apache.spark.sql.connector.catalog.Identifier destTableIdent()
-
tableProperties
public MigrateTableSparkAction tableProperties(java.util.Map<java.lang.String,java.lang.String> properties)
Description copied from interface:MigrateTableSets table properties in the newly created Iceberg table. Any properties with the same key name will be overwritten.- Specified by:
tablePropertiesin interfaceMigrateTable- Parameters:
properties- a map of properties to set- Returns:
- this for method chaining
-
tableProperty
public MigrateTableSparkAction tableProperty(java.lang.String property, java.lang.String value)
Description copied from interface:MigrateTableSets a table property in the newly created Iceberg table. Any properties with the same key will be overwritten.- Specified by:
tablePropertyin interfaceMigrateTable- Parameters:
property- a table property namevalue- a table property value- Returns:
- this for method chaining
-
execute
public MigrateTable.Result execute()
Description copied from interface:ActionExecutes this action.- Specified by:
executein interfaceAction<MigrateTable,MigrateTable.Result>- Returns:
- the result of this action
-
destTableProps
protected java.util.Map<java.lang.String,java.lang.String> destTableProps()
-
checkSourceCatalog
protected org.apache.spark.sql.connector.catalog.TableCatalog checkSourceCatalog(org.apache.spark.sql.connector.catalog.CatalogPlugin catalog)
-
sourceTableLocation
protected java.lang.String sourceTableLocation()
-
v1SourceTable
protected org.apache.spark.sql.catalyst.catalog.CatalogTable v1SourceTable()
-
sourceCatalog
protected org.apache.spark.sql.connector.catalog.TableCatalog sourceCatalog()
-
sourceTableIdent
protected org.apache.spark.sql.connector.catalog.Identifier sourceTableIdent()
-
setProperties
protected void setProperties(java.util.Map<java.lang.String,java.lang.String> properties)
-
setProperty
protected void setProperty(java.lang.String key, java.lang.String value)
-
additionalProperties
protected java.util.Map<java.lang.String,java.lang.String> additionalProperties()
-
checkDestinationCatalog
protected org.apache.spark.sql.connector.catalog.StagingTableCatalog checkDestinationCatalog(org.apache.spark.sql.connector.catalog.CatalogPlugin catalog)
-
stageDestTable
protected StagedSparkTable stageDestTable()
-
ensureNameMappingPresent
protected void ensureNameMappingPresent(Table table)
-
getMetadataLocation
protected java.lang.String getMetadataLocation(Table table)
-
spark
protected org.apache.spark.sql.SparkSession spark()
-
sparkContext
protected org.apache.spark.api.java.JavaSparkContext sparkContext()
-
option
public ThisT option(java.lang.String name, java.lang.String value)
-
options
public ThisT options(java.util.Map<java.lang.String,java.lang.String> newOptions)
-
options
protected java.util.Map<java.lang.String,java.lang.String> options()
-
withJobGroupInfo
protected <T> T withJobGroupInfo(JobGroupInfo info, java.util.function.Supplier<T> supplier)
-
newJobGroupInfo
protected JobGroupInfo newJobGroupInfo(java.lang.String groupId, java.lang.String desc)
-
newStaticTable
protected Table newStaticTable(TableMetadata metadata, FileIO io)
-
buildValidContentFileWithTypeDF
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> buildValidContentFileWithTypeDF(Table table)
-
buildValidContentFileDF
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> buildValidContentFileDF(Table table)
-
buildManifestFileDF
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> buildManifestFileDF(Table table)
-
buildManifestListDF
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> buildManifestListDF(Table table)
-
buildOtherMetadataFileDF
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> buildOtherMetadataFileDF(Table table)
-
buildAllReachableOtherMetadataFileDF
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> buildAllReachableOtherMetadataFileDF(Table table)
-
buildValidMetadataFileDF
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> buildValidMetadataFileDF(Table table)
-
withFileType
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> withFileType(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> ds, java.lang.String type)
-
loadMetadataTable
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> loadMetadataTable(Table table, MetadataTableType type)
-
-