public class Spark3Util
extends java.lang.Object
Modifier and Type | Class and Description |
---|---|
static class |
Spark3Util.CatalogAndIdentifier
This mimics a class inside of Spark which is private inside of LookupCatalog.
|
static class |
Spark3Util.DescribeSchemaVisitor |
Modifier and Type | Method and Description |
---|---|
static UpdateProperties |
applyPropertyChanges(UpdateProperties pendingUpdate,
java.util.List<org.apache.spark.sql.connector.catalog.TableChange> changes)
Applies a list of Spark table changes to an
UpdateProperties operation. |
static UpdateSchema |
applySchemaChanges(UpdateSchema pendingUpdate,
java.util.List<org.apache.spark.sql.connector.catalog.TableChange> changes)
Applies a list of Spark table changes to an
UpdateSchema operation. |
static Spark3Util.CatalogAndIdentifier |
catalogAndIdentifier(org.apache.spark.sql.SparkSession spark,
java.util.List<java.lang.String> nameParts) |
static Spark3Util.CatalogAndIdentifier |
catalogAndIdentifier(org.apache.spark.sql.SparkSession spark,
java.util.List<java.lang.String> nameParts,
org.apache.spark.sql.connector.catalog.CatalogPlugin defaultCatalog)
A modified version of Spark's LookupCatalog.CatalogAndIdentifier.unapply Attempts to find the
catalog and identifier a multipart identifier represents
|
static Spark3Util.CatalogAndIdentifier |
catalogAndIdentifier(org.apache.spark.sql.SparkSession spark,
java.lang.String name) |
static Spark3Util.CatalogAndIdentifier |
catalogAndIdentifier(org.apache.spark.sql.SparkSession spark,
java.lang.String name,
org.apache.spark.sql.connector.catalog.CatalogPlugin defaultCatalog) |
static Spark3Util.CatalogAndIdentifier |
catalogAndIdentifier(java.lang.String description,
org.apache.spark.sql.SparkSession spark,
java.lang.String name) |
static Spark3Util.CatalogAndIdentifier |
catalogAndIdentifier(java.lang.String description,
org.apache.spark.sql.SparkSession spark,
java.lang.String name,
org.apache.spark.sql.connector.catalog.CatalogPlugin defaultCatalog) |
static java.lang.String |
describe(Expression expr) |
static java.lang.String |
describe(Schema schema) |
static java.lang.String |
describe(SortOrder order) |
static java.lang.String |
describe(Type type) |
static boolean |
extensionsEnabled(org.apache.spark.sql.SparkSession spark) |
static java.util.List<SparkTableUtil.SparkPartition> |
getPartitions(org.apache.spark.sql.SparkSession spark,
org.apache.hadoop.fs.Path rootPath,
java.lang.String format,
java.util.Map<java.lang.String,java.lang.String> partitionFilter)
Use Spark to list all partitions in the table.
|
static TableIdentifier |
identifierToTableIdentifier(org.apache.spark.sql.connector.catalog.Identifier identifier) |
static Catalog |
loadIcebergCatalog(org.apache.spark.sql.SparkSession spark,
java.lang.String catalogName)
Returns the underlying Iceberg Catalog object represented by a Spark Catalog
|
static Table |
loadIcebergTable(org.apache.spark.sql.SparkSession spark,
java.lang.String name)
Returns an Iceberg Table by its name from a Spark V2 Catalog.
|
static java.lang.String |
quotedFullIdentifier(java.lang.String catalogName,
org.apache.spark.sql.connector.catalog.Identifier identifier) |
static java.util.Map<java.lang.String,java.lang.String> |
rebuildCreateProperties(java.util.Map<java.lang.String,java.lang.String> createProperties) |
static org.apache.spark.sql.util.CaseInsensitiveStringMap |
setOption(java.lang.String key,
java.lang.String value,
org.apache.spark.sql.util.CaseInsensitiveStringMap options) |
static Table |
toIcebergTable(org.apache.spark.sql.connector.catalog.Table table) |
static Term |
toIcebergTerm(org.apache.spark.sql.connector.expressions.Expression expr) |
static org.apache.spark.sql.connector.expressions.NamedReference |
toNamedReference(java.lang.String name) |
static PartitionSpec |
toPartitionSpec(Schema schema,
org.apache.spark.sql.connector.expressions.Transform[] partitioning)
Converts Spark transforms into a
PartitionSpec . |
static org.apache.spark.sql.connector.expressions.Transform[] |
toTransforms(PartitionSpec spec)
Converts a PartitionSpec to Spark transforms.
|
static org.apache.spark.sql.catalyst.TableIdentifier |
toV1TableIdentifier(org.apache.spark.sql.connector.catalog.Identifier identifier) |
public static org.apache.spark.sql.util.CaseInsensitiveStringMap setOption(java.lang.String key, java.lang.String value, org.apache.spark.sql.util.CaseInsensitiveStringMap options)
public static java.util.Map<java.lang.String,java.lang.String> rebuildCreateProperties(java.util.Map<java.lang.String,java.lang.String> createProperties)
public static UpdateProperties applyPropertyChanges(UpdateProperties pendingUpdate, java.util.List<org.apache.spark.sql.connector.catalog.TableChange> changes)
UpdateProperties
operation.pendingUpdate
- an uncommitted UpdateProperties operation to configurechanges
- a list of Spark table changespublic static UpdateSchema applySchemaChanges(UpdateSchema pendingUpdate, java.util.List<org.apache.spark.sql.connector.catalog.TableChange> changes)
UpdateSchema
operation.pendingUpdate
- an uncommitted UpdateSchema operation to configurechanges
- a list of Spark table changespublic static Table toIcebergTable(org.apache.spark.sql.connector.catalog.Table table)
public static org.apache.spark.sql.connector.expressions.Transform[] toTransforms(PartitionSpec spec)
spec
- a PartitionSpecpublic static org.apache.spark.sql.connector.expressions.NamedReference toNamedReference(java.lang.String name)
public static Term toIcebergTerm(org.apache.spark.sql.connector.expressions.Expression expr)
public static PartitionSpec toPartitionSpec(Schema schema, org.apache.spark.sql.connector.expressions.Transform[] partitioning)
PartitionSpec
.schema
- the table schemapartitioning
- Spark Transformspublic static java.lang.String describe(Expression expr)
public static java.lang.String describe(Schema schema)
public static java.lang.String describe(Type type)
public static java.lang.String describe(SortOrder order)
public static boolean extensionsEnabled(org.apache.spark.sql.SparkSession spark)
public static Table loadIcebergTable(org.apache.spark.sql.SparkSession spark, java.lang.String name) throws org.apache.spark.sql.catalyst.parser.ParseException, org.apache.spark.sql.catalyst.analysis.NoSuchTableException
SparkCatalog
, the TableOperations
of the table may be stale, please refresh the table
to get the latest one.spark
- SparkSession used for looking up catalog references and tablesname
- The multipart identifier of the Iceberg tableorg.apache.spark.sql.catalyst.parser.ParseException
org.apache.spark.sql.catalyst.analysis.NoSuchTableException
public static Catalog loadIcebergCatalog(org.apache.spark.sql.SparkSession spark, java.lang.String catalogName)
spark
- SparkSession used for looking up catalog referencecatalogName
- The name of the Spark Catalog being referencedpublic static Spark3Util.CatalogAndIdentifier catalogAndIdentifier(org.apache.spark.sql.SparkSession spark, java.lang.String name) throws org.apache.spark.sql.catalyst.parser.ParseException
org.apache.spark.sql.catalyst.parser.ParseException
public static Spark3Util.CatalogAndIdentifier catalogAndIdentifier(org.apache.spark.sql.SparkSession spark, java.lang.String name, org.apache.spark.sql.connector.catalog.CatalogPlugin defaultCatalog) throws org.apache.spark.sql.catalyst.parser.ParseException
org.apache.spark.sql.catalyst.parser.ParseException
public static Spark3Util.CatalogAndIdentifier catalogAndIdentifier(java.lang.String description, org.apache.spark.sql.SparkSession spark, java.lang.String name)
public static Spark3Util.CatalogAndIdentifier catalogAndIdentifier(java.lang.String description, org.apache.spark.sql.SparkSession spark, java.lang.String name, org.apache.spark.sql.connector.catalog.CatalogPlugin defaultCatalog)
public static Spark3Util.CatalogAndIdentifier catalogAndIdentifier(org.apache.spark.sql.SparkSession spark, java.util.List<java.lang.String> nameParts)
public static Spark3Util.CatalogAndIdentifier catalogAndIdentifier(org.apache.spark.sql.SparkSession spark, java.util.List<java.lang.String> nameParts, org.apache.spark.sql.connector.catalog.CatalogPlugin defaultCatalog)
spark
- Spark session to use for resolutionnameParts
- Multipart identifier representing a tabledefaultCatalog
- Catalog to use if none is specifiedpublic static TableIdentifier identifierToTableIdentifier(org.apache.spark.sql.connector.catalog.Identifier identifier)
public static java.lang.String quotedFullIdentifier(java.lang.String catalogName, org.apache.spark.sql.connector.catalog.Identifier identifier)
public static java.util.List<SparkTableUtil.SparkPartition> getPartitions(org.apache.spark.sql.SparkSession spark, org.apache.hadoop.fs.Path rootPath, java.lang.String format, java.util.Map<java.lang.String,java.lang.String> partitionFilter)
spark
- a Spark sessionrootPath
- a table identifierformat
- format of the filepartitionFilter
- partitionFilter of the filepublic static org.apache.spark.sql.catalyst.TableIdentifier toV1TableIdentifier(org.apache.spark.sql.connector.catalog.Identifier identifier)