public class IcebergSource
extends java.lang.Object
implements org.apache.spark.sql.sources.DataSourceRegister, org.apache.spark.sql.connector.catalog.SupportsCatalogOptions
How paths/tables are loaded when using spark.read().format("iceberg").load(table)
table = "file:///path/to/table" -> loads a HadoopTable at given path table = "tablename" -> loads currentCatalog.currentNamespace.tablename table = "catalog.tablename" -> load "tablename" from the specified catalog. table = "namespace.tablename" -> load "namespace.tablename" from current catalog table = "catalog.namespace.tablename" -> "namespace.tablename" from the specified catalog. table = "namespace1.namespace2.tablename" -> load "namespace1.namespace2.tablename" from current catalog
The above list is in order of priority. For example: a matching catalog will take priority over any namespace resolution.
| Constructor and Description |
|---|
IcebergSource() |
| Modifier and Type | Method and Description |
|---|---|
java.lang.String |
extractCatalog(org.apache.spark.sql.util.CaseInsensitiveStringMap options) |
org.apache.spark.sql.connector.catalog.Identifier |
extractIdentifier(org.apache.spark.sql.util.CaseInsensitiveStringMap options) |
java.util.Optional<java.lang.String> |
extractTimeTravelTimestamp(org.apache.spark.sql.util.CaseInsensitiveStringMap options) |
java.util.Optional<java.lang.String> |
extractTimeTravelVersion(org.apache.spark.sql.util.CaseInsensitiveStringMap options) |
org.apache.spark.sql.connector.catalog.Table |
getTable(org.apache.spark.sql.types.StructType schema,
org.apache.spark.sql.connector.expressions.Transform[] partitioning,
java.util.Map<java.lang.String,java.lang.String> options) |
org.apache.spark.sql.connector.expressions.Transform[] |
inferPartitioning(org.apache.spark.sql.util.CaseInsensitiveStringMap options) |
org.apache.spark.sql.types.StructType |
inferSchema(org.apache.spark.sql.util.CaseInsensitiveStringMap options) |
java.lang.String |
shortName() |
boolean |
supportsExternalMetadata() |
public java.lang.String shortName()
shortName in interface org.apache.spark.sql.sources.DataSourceRegisterpublic org.apache.spark.sql.types.StructType inferSchema(org.apache.spark.sql.util.CaseInsensitiveStringMap options)
inferSchema in interface org.apache.spark.sql.connector.catalog.TableProviderpublic org.apache.spark.sql.connector.expressions.Transform[] inferPartitioning(org.apache.spark.sql.util.CaseInsensitiveStringMap options)
inferPartitioning in interface org.apache.spark.sql.connector.catalog.TableProviderpublic boolean supportsExternalMetadata()
supportsExternalMetadata in interface org.apache.spark.sql.connector.catalog.TableProviderpublic org.apache.spark.sql.connector.catalog.Table getTable(org.apache.spark.sql.types.StructType schema,
org.apache.spark.sql.connector.expressions.Transform[] partitioning,
java.util.Map<java.lang.String,java.lang.String> options)
getTable in interface org.apache.spark.sql.connector.catalog.TableProviderpublic org.apache.spark.sql.connector.catalog.Identifier extractIdentifier(org.apache.spark.sql.util.CaseInsensitiveStringMap options)
extractIdentifier in interface org.apache.spark.sql.connector.catalog.SupportsCatalogOptionspublic java.lang.String extractCatalog(org.apache.spark.sql.util.CaseInsensitiveStringMap options)
extractCatalog in interface org.apache.spark.sql.connector.catalog.SupportsCatalogOptionspublic java.util.Optional<java.lang.String> extractTimeTravelVersion(org.apache.spark.sql.util.CaseInsensitiveStringMap options)
extractTimeTravelVersion in interface org.apache.spark.sql.connector.catalog.SupportsCatalogOptionspublic java.util.Optional<java.lang.String> extractTimeTravelTimestamp(org.apache.spark.sql.util.CaseInsensitiveStringMap options)
extractTimeTravelTimestamp in interface org.apache.spark.sql.connector.catalog.SupportsCatalogOptions