public class SparkTableUtil
extends java.lang.Object
Modifier and Type | Class and Description |
---|---|
static class |
SparkTableUtil.SparkPartition
Class representing a table partition.
|
Modifier and Type | Method and Description |
---|---|
static java.util.List<SparkTableUtil.SparkPartition> |
filterPartitions(java.util.List<SparkTableUtil.SparkPartition> partitions,
java.util.Map<java.lang.String,java.lang.String> partitionFilter) |
static java.util.List<SparkTableUtil.SparkPartition> |
getPartitions(org.apache.spark.sql.SparkSession spark,
java.lang.String table)
Returns all partitions in the table.
|
static java.util.List<SparkTableUtil.SparkPartition> |
getPartitions(org.apache.spark.sql.SparkSession spark,
org.apache.spark.sql.catalyst.TableIdentifier tableIdent,
java.util.Map<java.lang.String,java.lang.String> partitionFilter)
Returns all partitions in the table.
|
static java.util.List<SparkTableUtil.SparkPartition> |
getPartitionsByFilter(org.apache.spark.sql.SparkSession spark,
java.lang.String table,
java.lang.String predicate)
Returns partitions that match the specified 'predicate'.
|
static java.util.List<SparkTableUtil.SparkPartition> |
getPartitionsByFilter(org.apache.spark.sql.SparkSession spark,
org.apache.spark.sql.catalyst.TableIdentifier tableIdent,
org.apache.spark.sql.catalyst.expressions.Expression predicateExpr)
Returns partitions that match the specified 'predicate'.
|
static void |
importSparkPartitions(org.apache.spark.sql.SparkSession spark,
java.util.List<SparkTableUtil.SparkPartition> partitions,
Table targetTable,
PartitionSpec spec,
java.lang.String stagingDir)
Import files from given partitions to an Iceberg table.
|
static void |
importSparkPartitions(org.apache.spark.sql.SparkSession spark,
java.util.List<SparkTableUtil.SparkPartition> partitions,
Table targetTable,
PartitionSpec spec,
java.lang.String stagingDir,
boolean checkDuplicateFiles)
Import files from given partitions to an Iceberg table.
|
static void |
importSparkTable(org.apache.spark.sql.SparkSession spark,
org.apache.spark.sql.catalyst.TableIdentifier sourceTableIdent,
Table targetTable,
java.lang.String stagingDir)
Import files from an existing Spark table to an Iceberg table.
|
static void |
importSparkTable(org.apache.spark.sql.SparkSession spark,
org.apache.spark.sql.catalyst.TableIdentifier sourceTableIdent,
Table targetTable,
java.lang.String stagingDir,
boolean checkDuplicateFiles)
Import files from an existing Spark table to an Iceberg table.
|
static void |
importSparkTable(org.apache.spark.sql.SparkSession spark,
org.apache.spark.sql.catalyst.TableIdentifier sourceTableIdent,
Table targetTable,
java.lang.String stagingDir,
java.util.Map<java.lang.String,java.lang.String> partitionFilter,
boolean checkDuplicateFiles)
Import files from an existing Spark table to an Iceberg table.
|
static java.util.List<DataFile> |
listPartition(SparkTableUtil.SparkPartition partition,
PartitionSpec spec,
SerializableConfiguration conf,
MetricsConfig metricsConfig)
|
static java.util.List<DataFile> |
listPartition(SparkTableUtil.SparkPartition partition,
PartitionSpec spec,
SerializableConfiguration conf,
MetricsConfig metricsConfig,
NameMapping mapping)
|
static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
loadCatalogMetadataTable(org.apache.spark.sql.SparkSession spark,
Table table,
MetadataTableType type) |
static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
loadMetadataTable(org.apache.spark.sql.SparkSession spark,
Table table,
MetadataTableType type) |
static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
partitionDF(org.apache.spark.sql.SparkSession spark,
java.lang.String table)
Returns a DataFrame with a row for each partition in the table.
|
static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
partitionDFByFilter(org.apache.spark.sql.SparkSession spark,
java.lang.String table,
java.lang.String expression)
Returns a DataFrame with a row for each partition that matches the specified 'expression'.
|
public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> partitionDF(org.apache.spark.sql.SparkSession spark, java.lang.String table)
spark
- a Spark sessiontable
- a table name and (optional) databasepublic static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> partitionDFByFilter(org.apache.spark.sql.SparkSession spark, java.lang.String table, java.lang.String expression)
spark
- a Spark session.table
- name of the table.expression
- The expression whose matching partitions are returned.public static java.util.List<SparkTableUtil.SparkPartition> getPartitions(org.apache.spark.sql.SparkSession spark, java.lang.String table)
spark
- a Spark sessiontable
- a table name and (optional) databasepublic static java.util.List<SparkTableUtil.SparkPartition> getPartitions(org.apache.spark.sql.SparkSession spark, org.apache.spark.sql.catalyst.TableIdentifier tableIdent, java.util.Map<java.lang.String,java.lang.String> partitionFilter)
spark
- a Spark sessiontableIdent
- a table identifierpartitionFilter
- partition filter, or null if no filterpublic static java.util.List<SparkTableUtil.SparkPartition> getPartitionsByFilter(org.apache.spark.sql.SparkSession spark, java.lang.String table, java.lang.String predicate)
spark
- a Spark sessiontable
- a table name and (optional) databasepredicate
- a predicate on partition columnspublic static java.util.List<SparkTableUtil.SparkPartition> getPartitionsByFilter(org.apache.spark.sql.SparkSession spark, org.apache.spark.sql.catalyst.TableIdentifier tableIdent, org.apache.spark.sql.catalyst.expressions.Expression predicateExpr)
spark
- a Spark sessiontableIdent
- a table identifierpredicateExpr
- a predicate expression on partition columns@Deprecated public static java.util.List<DataFile> listPartition(SparkTableUtil.SparkPartition partition, PartitionSpec spec, SerializableConfiguration conf, MetricsConfig metricsConfig)
TableMigrationUtil.listPartition(Map, String, String, PartitionSpec, Configuration,
MetricsConfig, NameMapping)
partition
- a partitionconf
- a serializable Hadoop confmetricsConfig
- a metrics conf@Deprecated public static java.util.List<DataFile> listPartition(SparkTableUtil.SparkPartition partition, PartitionSpec spec, SerializableConfiguration conf, MetricsConfig metricsConfig, NameMapping mapping)
TableMigrationUtil.listPartition(Map, String, String, PartitionSpec, Configuration,
MetricsConfig, NameMapping)
partition
- a partitionconf
- a serializable Hadoop confmetricsConfig
- a metrics confmapping
- a name mappingpublic static void importSparkTable(org.apache.spark.sql.SparkSession spark, org.apache.spark.sql.catalyst.TableIdentifier sourceTableIdent, Table targetTable, java.lang.String stagingDir, java.util.Map<java.lang.String,java.lang.String> partitionFilter, boolean checkDuplicateFiles)
spark
- a Spark sessionsourceTableIdent
- an identifier of the source Spark tabletargetTable
- an Iceberg table where to import the datastagingDir
- a staging directory to store temporary manifest filespartitionFilter
- only import partitions whose values match those in the map, can be partially definedcheckDuplicateFiles
- if true, throw exception if import results in a duplicate data filepublic static void importSparkTable(org.apache.spark.sql.SparkSession spark, org.apache.spark.sql.catalyst.TableIdentifier sourceTableIdent, Table targetTable, java.lang.String stagingDir, boolean checkDuplicateFiles)
spark
- a Spark sessionsourceTableIdent
- an identifier of the source Spark tabletargetTable
- an Iceberg table where to import the datastagingDir
- a staging directory to store temporary manifest filescheckDuplicateFiles
- if true, throw exception if import results in a duplicate data filepublic static void importSparkTable(org.apache.spark.sql.SparkSession spark, org.apache.spark.sql.catalyst.TableIdentifier sourceTableIdent, Table targetTable, java.lang.String stagingDir)
spark
- a Spark sessionsourceTableIdent
- an identifier of the source Spark tabletargetTable
- an Iceberg table where to import the datastagingDir
- a staging directory to store temporary manifest filespublic static void importSparkPartitions(org.apache.spark.sql.SparkSession spark, java.util.List<SparkTableUtil.SparkPartition> partitions, Table targetTable, PartitionSpec spec, java.lang.String stagingDir, boolean checkDuplicateFiles)
spark
- a Spark sessionpartitions
- partitions to importtargetTable
- an Iceberg table where to import the dataspec
- a partition specstagingDir
- a staging directory to store temporary manifest filescheckDuplicateFiles
- if true, throw exception if import results in a duplicate data filepublic static void importSparkPartitions(org.apache.spark.sql.SparkSession spark, java.util.List<SparkTableUtil.SparkPartition> partitions, Table targetTable, PartitionSpec spec, java.lang.String stagingDir)
spark
- a Spark sessionpartitions
- partitions to importtargetTable
- an Iceberg table where to import the dataspec
- a partition specstagingDir
- a staging directory to store temporary manifest filespublic static java.util.List<SparkTableUtil.SparkPartition> filterPartitions(java.util.List<SparkTableUtil.SparkPartition> partitions, java.util.Map<java.lang.String,java.lang.String> partitionFilter)
public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> loadCatalogMetadataTable(org.apache.spark.sql.SparkSession spark, Table table, MetadataTableType type)
public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> loadMetadataTable(org.apache.spark.sql.SparkSession spark, Table table, MetadataTableType type)