public class SparkTableUtil
extends java.lang.Object
| Modifier and Type | Class and Description |
|---|---|
static class |
SparkTableUtil.SparkPartition
Class representing a table partition.
|
| Modifier and Type | Method and Description |
|---|---|
static java.util.List<SparkTableUtil.SparkPartition> |
filterPartitions(java.util.List<SparkTableUtil.SparkPartition> partitions,
java.util.Map<java.lang.String,java.lang.String> partitionFilter) |
static java.util.List<SparkTableUtil.SparkPartition> |
getPartitions(org.apache.spark.sql.SparkSession spark,
java.lang.String table)
Returns all partitions in the table.
|
static java.util.List<SparkTableUtil.SparkPartition> |
getPartitions(org.apache.spark.sql.SparkSession spark,
org.apache.spark.sql.catalyst.TableIdentifier tableIdent,
java.util.Map<java.lang.String,java.lang.String> partitionFilter)
Returns all partitions in the table.
|
static java.util.List<SparkTableUtil.SparkPartition> |
getPartitionsByFilter(org.apache.spark.sql.SparkSession spark,
java.lang.String table,
java.lang.String predicate)
Returns partitions that match the specified 'predicate'.
|
static java.util.List<SparkTableUtil.SparkPartition> |
getPartitionsByFilter(org.apache.spark.sql.SparkSession spark,
org.apache.spark.sql.catalyst.TableIdentifier tableIdent,
org.apache.spark.sql.catalyst.expressions.Expression predicateExpr)
Returns partitions that match the specified 'predicate'.
|
static void |
importSparkPartitions(org.apache.spark.sql.SparkSession spark,
java.util.List<SparkTableUtil.SparkPartition> partitions,
Table targetTable,
PartitionSpec spec,
java.lang.String stagingDir)
Import files from given partitions to an Iceberg table.
|
static void |
importSparkPartitions(org.apache.spark.sql.SparkSession spark,
java.util.List<SparkTableUtil.SparkPartition> partitions,
Table targetTable,
PartitionSpec spec,
java.lang.String stagingDir,
boolean checkDuplicateFiles)
Import files from given partitions to an Iceberg table.
|
static void |
importSparkTable(org.apache.spark.sql.SparkSession spark,
org.apache.spark.sql.catalyst.TableIdentifier sourceTableIdent,
Table targetTable,
java.lang.String stagingDir)
Import files from an existing Spark table to an Iceberg table.
|
static void |
importSparkTable(org.apache.spark.sql.SparkSession spark,
org.apache.spark.sql.catalyst.TableIdentifier sourceTableIdent,
Table targetTable,
java.lang.String stagingDir,
boolean checkDuplicateFiles)
Import files from an existing Spark table to an Iceberg table.
|
static void |
importSparkTable(org.apache.spark.sql.SparkSession spark,
org.apache.spark.sql.catalyst.TableIdentifier sourceTableIdent,
Table targetTable,
java.lang.String stagingDir,
java.util.Map<java.lang.String,java.lang.String> partitionFilter,
boolean checkDuplicateFiles)
Import files from an existing Spark table to an Iceberg table.
|
static java.util.List<DataFile> |
listPartition(SparkTableUtil.SparkPartition partition,
PartitionSpec spec,
SerializableConfiguration conf,
MetricsConfig metricsConfig)
|
static java.util.List<DataFile> |
listPartition(SparkTableUtil.SparkPartition partition,
PartitionSpec spec,
SerializableConfiguration conf,
MetricsConfig metricsConfig,
NameMapping mapping)
|
static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
loadCatalogMetadataTable(org.apache.spark.sql.SparkSession spark,
Table table,
MetadataTableType type) |
static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
loadMetadataTable(org.apache.spark.sql.SparkSession spark,
Table table,
MetadataTableType type) |
static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
partitionDF(org.apache.spark.sql.SparkSession spark,
java.lang.String table)
Returns a DataFrame with a row for each partition in the table.
|
static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
partitionDFByFilter(org.apache.spark.sql.SparkSession spark,
java.lang.String table,
java.lang.String expression)
Returns a DataFrame with a row for each partition that matches the specified 'expression'.
|
public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> partitionDF(org.apache.spark.sql.SparkSession spark,
java.lang.String table)
spark - a Spark sessiontable - a table name and (optional) databasepublic static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> partitionDFByFilter(org.apache.spark.sql.SparkSession spark,
java.lang.String table,
java.lang.String expression)
spark - a Spark session.table - name of the table.expression - The expression whose matching partitions are returned.public static java.util.List<SparkTableUtil.SparkPartition> getPartitions(org.apache.spark.sql.SparkSession spark, java.lang.String table)
spark - a Spark sessiontable - a table name and (optional) databasepublic static java.util.List<SparkTableUtil.SparkPartition> getPartitions(org.apache.spark.sql.SparkSession spark, org.apache.spark.sql.catalyst.TableIdentifier tableIdent, java.util.Map<java.lang.String,java.lang.String> partitionFilter)
spark - a Spark sessiontableIdent - a table identifierpartitionFilter - partition filter, or null if no filterpublic static java.util.List<SparkTableUtil.SparkPartition> getPartitionsByFilter(org.apache.spark.sql.SparkSession spark, java.lang.String table, java.lang.String predicate)
spark - a Spark sessiontable - a table name and (optional) databasepredicate - a predicate on partition columnspublic static java.util.List<SparkTableUtil.SparkPartition> getPartitionsByFilter(org.apache.spark.sql.SparkSession spark, org.apache.spark.sql.catalyst.TableIdentifier tableIdent, org.apache.spark.sql.catalyst.expressions.Expression predicateExpr)
spark - a Spark sessiontableIdent - a table identifierpredicateExpr - a predicate expression on partition columns@Deprecated public static java.util.List<DataFile> listPartition(SparkTableUtil.SparkPartition partition, PartitionSpec spec, SerializableConfiguration conf, MetricsConfig metricsConfig)
TableMigrationUtil.listPartition(Map, String, String, PartitionSpec, Configuration,
MetricsConfig, NameMapping)partition - a partitionconf - a serializable Hadoop confmetricsConfig - a metrics conf@Deprecated public static java.util.List<DataFile> listPartition(SparkTableUtil.SparkPartition partition, PartitionSpec spec, SerializableConfiguration conf, MetricsConfig metricsConfig, NameMapping mapping)
TableMigrationUtil.listPartition(Map, String, String, PartitionSpec, Configuration,
MetricsConfig, NameMapping)partition - a partitionconf - a serializable Hadoop confmetricsConfig - a metrics confmapping - a name mappingpublic static void importSparkTable(org.apache.spark.sql.SparkSession spark,
org.apache.spark.sql.catalyst.TableIdentifier sourceTableIdent,
Table targetTable,
java.lang.String stagingDir,
java.util.Map<java.lang.String,java.lang.String> partitionFilter,
boolean checkDuplicateFiles)
spark - a Spark sessionsourceTableIdent - an identifier of the source Spark tabletargetTable - an Iceberg table where to import the datastagingDir - a staging directory to store temporary manifest filespartitionFilter - only import partitions whose values match those in the map, can be partially definedcheckDuplicateFiles - if true, throw exception if import results in a duplicate data filepublic static void importSparkTable(org.apache.spark.sql.SparkSession spark,
org.apache.spark.sql.catalyst.TableIdentifier sourceTableIdent,
Table targetTable,
java.lang.String stagingDir,
boolean checkDuplicateFiles)
spark - a Spark sessionsourceTableIdent - an identifier of the source Spark tabletargetTable - an Iceberg table where to import the datastagingDir - a staging directory to store temporary manifest filescheckDuplicateFiles - if true, throw exception if import results in a duplicate data filepublic static void importSparkTable(org.apache.spark.sql.SparkSession spark,
org.apache.spark.sql.catalyst.TableIdentifier sourceTableIdent,
Table targetTable,
java.lang.String stagingDir)
spark - a Spark sessionsourceTableIdent - an identifier of the source Spark tabletargetTable - an Iceberg table where to import the datastagingDir - a staging directory to store temporary manifest filespublic static void importSparkPartitions(org.apache.spark.sql.SparkSession spark,
java.util.List<SparkTableUtil.SparkPartition> partitions,
Table targetTable,
PartitionSpec spec,
java.lang.String stagingDir,
boolean checkDuplicateFiles)
spark - a Spark sessionpartitions - partitions to importtargetTable - an Iceberg table where to import the dataspec - a partition specstagingDir - a staging directory to store temporary manifest filescheckDuplicateFiles - if true, throw exception if import results in a duplicate data filepublic static void importSparkPartitions(org.apache.spark.sql.SparkSession spark,
java.util.List<SparkTableUtil.SparkPartition> partitions,
Table targetTable,
PartitionSpec spec,
java.lang.String stagingDir)
spark - a Spark sessionpartitions - partitions to importtargetTable - an Iceberg table where to import the dataspec - a partition specstagingDir - a staging directory to store temporary manifest filespublic static java.util.List<SparkTableUtil.SparkPartition> filterPartitions(java.util.List<SparkTableUtil.SparkPartition> partitions, java.util.Map<java.lang.String,java.lang.String> partitionFilter)
public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> loadCatalogMetadataTable(org.apache.spark.sql.SparkSession spark,
Table table,
MetadataTableType type)
public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> loadMetadataTable(org.apache.spark.sql.SparkSession spark,
Table table,
MetadataTableType type)