public class SparkDataFile extends java.lang.Object implements DataFile
COLUMN_SIZES, CONTENT, EQUALITY_IDS, FILE_FORMAT, FILE_PATH, FILE_SIZE, KEY_METADATA, LOWER_BOUNDS, NAN_VALUE_COUNTS, NULL_VALUE_COUNTS, PARTITION_DOC, PARTITION_ID, PARTITION_NAME, RECORD_COUNT, SORT_ORDER_ID, SPEC_ID, SPLIT_OFFSETS, UPPER_BOUNDS, VALUE_COUNTS
Constructor and Description |
---|
SparkDataFile(Types.StructType type,
org.apache.spark.sql.types.StructType sparkType) |
SparkDataFile(Types.StructType type,
Types.StructType projectedType,
org.apache.spark.sql.types.StructType sparkType) |
Modifier and Type | Method and Description |
---|---|
java.util.Map<java.lang.Integer,java.lang.Long> |
columnSizes()
Returns if collected, map from column ID to the size of the column in bytes, null otherwise.
|
DataFile |
copy()
Copies this file.
|
DataFile |
copyWithoutStats()
Copies this file without file stats.
|
long |
fileSizeInBytes()
Returns the file size in bytes.
|
FileFormat |
format()
Returns format of the file.
|
java.nio.ByteBuffer |
keyMetadata()
Returns metadata about how this file is encrypted, or null if the file is stored in plain text.
|
java.util.Map<java.lang.Integer,java.nio.ByteBuffer> |
lowerBounds()
Returns if collected, map from column ID to value lower bounds, null otherwise.
|
java.util.Map<java.lang.Integer,java.lang.Long> |
nanValueCounts()
Returns if collected, map from column ID to its NaN value count, null otherwise.
|
java.util.Map<java.lang.Integer,java.lang.Long> |
nullValueCounts()
Returns if collected, map from column ID to its null value count, null otherwise.
|
StructLike |
partition()
Returns partition for this file as a
StructLike . |
java.lang.CharSequence |
path()
Returns fully qualified path to the file, suitable for constructing a Hadoop Path.
|
java.lang.Long |
pos()
Returns the ordinal position of the file in a manifest, or null if it was not read from a
manifest.
|
long |
recordCount()
Returns the number of top-level records in the file.
|
java.lang.Integer |
sortOrderId()
Returns the sort order id of this file, which describes how the file is ordered.
|
int |
specId()
Returns id of the partition spec used for partition metadata.
|
java.util.List<java.lang.Long> |
splitOffsets()
Returns list of recommended split locations, if applicable, null otherwise.
|
java.util.Map<java.lang.Integer,java.nio.ByteBuffer> |
upperBounds()
Returns if collected, map from column ID to value upper bounds, null otherwise.
|
java.util.Map<java.lang.Integer,java.lang.Long> |
valueCounts()
Returns if collected, map from column ID to the count of its non-null values, null otherwise.
|
SparkDataFile |
wrap(org.apache.spark.sql.Row row) |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
content, equalityFieldIds, getType
copy
public SparkDataFile(Types.StructType type, org.apache.spark.sql.types.StructType sparkType)
public SparkDataFile(Types.StructType type, Types.StructType projectedType, org.apache.spark.sql.types.StructType sparkType)
public SparkDataFile wrap(org.apache.spark.sql.Row row)
public java.lang.Long pos()
ContentFile
pos
in interface ContentFile<DataFile>
public int specId()
ContentFile
specId
in interface ContentFile<DataFile>
public java.lang.CharSequence path()
ContentFile
path
in interface ContentFile<DataFile>
public FileFormat format()
ContentFile
format
in interface ContentFile<DataFile>
public StructLike partition()
ContentFile
StructLike
.partition
in interface ContentFile<DataFile>
public long recordCount()
ContentFile
recordCount
in interface ContentFile<DataFile>
public long fileSizeInBytes()
ContentFile
fileSizeInBytes
in interface ContentFile<DataFile>
public java.util.Map<java.lang.Integer,java.lang.Long> columnSizes()
ContentFile
columnSizes
in interface ContentFile<DataFile>
public java.util.Map<java.lang.Integer,java.lang.Long> valueCounts()
ContentFile
valueCounts
in interface ContentFile<DataFile>
public java.util.Map<java.lang.Integer,java.lang.Long> nullValueCounts()
ContentFile
nullValueCounts
in interface ContentFile<DataFile>
public java.util.Map<java.lang.Integer,java.lang.Long> nanValueCounts()
ContentFile
nanValueCounts
in interface ContentFile<DataFile>
public java.util.Map<java.lang.Integer,java.nio.ByteBuffer> lowerBounds()
ContentFile
lowerBounds
in interface ContentFile<DataFile>
public java.util.Map<java.lang.Integer,java.nio.ByteBuffer> upperBounds()
ContentFile
upperBounds
in interface ContentFile<DataFile>
public java.nio.ByteBuffer keyMetadata()
ContentFile
keyMetadata
in interface ContentFile<DataFile>
public DataFile copy()
ContentFile
copy
in interface ContentFile<DataFile>
public DataFile copyWithoutStats()
ContentFile
copyWithoutStats
in interface ContentFile<DataFile>
public java.util.List<java.lang.Long> splitOffsets()
ContentFile
When available, this information is used for planning scan tasks whose boundaries are determined by these offsets. The returned list must be sorted in ascending order.
splitOffsets
in interface ContentFile<DataFile>
public java.lang.Integer sortOrderId()
ContentFile
sortOrderId
in interface ContentFile<DataFile>