public class BaseOverwriteFiles extends java.lang.Object implements OverwriteFiles
Modifier | Constructor and Description |
---|---|
protected |
BaseOverwriteFiles(java.lang.String tableName,
TableOperations ops) |
Modifier and Type | Method and Description |
---|---|
protected void |
add(DataFile file)
Add a data file to the new snapshot.
|
protected void |
add(DeleteFile file)
Add a delete file to the new snapshot.
|
protected void |
add(ManifestFile manifest)
Add all files in a manifest to the new snapshot.
|
protected java.util.List<DataFile> |
addedFiles() |
OverwriteFiles |
addFile(DataFile file)
Add a
DataFile to the table. |
Snapshot |
apply()
Apply the pending changes and return the uncommitted changes for validation.
|
java.util.List<ManifestFile> |
apply(TableMetadata base)
Apply the update's changes to the base table metadata and return the new manifest list.
|
ThisT |
caseSensitive(boolean isCaseSensitive) |
protected void |
cleanAll() |
protected void |
cleanUncommitted(java.util.Set<ManifestFile> committed)
Clean up any uncommitted manifests that were created.
|
void |
commit()
Apply the pending changes and commit.
|
OverwriteFiles |
conflictDetectionFilter(Expression newConflictDetectionFilter)
Sets a conflict detection filter used to validate concurrently added data and delete files.
|
protected TableMetadata |
current() |
protected PartitionSpec |
dataSpec() |
protected void |
delete(java.lang.CharSequence path)
Add a specific data path to be deleted in the new snapshot.
|
protected void |
delete(DataFile file)
Add a specific data file to be deleted in the new snapshot.
|
protected void |
delete(DeleteFile file)
Add a specific delete file to be deleted in the new snapshot.
|
protected void |
deleteByRowFilter(Expression expr)
Add a filter to match files to delete.
|
OverwriteFiles |
deleteFile(DataFile file)
Delete a
DataFile from the table. |
protected void |
deleteFile(java.lang.String path) |
ThisT |
deleteWith(java.util.function.Consumer<java.lang.String> deleteCallback)
Set a callback to delete files instead of the table's default.
|
protected void |
dropPartition(int specId,
StructLike partition)
Add a partition tuple to drop from the table during the delete phase.
|
protected void |
failAnyDelete() |
protected void |
failMissingDeletePaths() |
protected boolean |
isCaseSensitive() |
protected OutputFile |
manifestListPath() |
protected ManifestReader<DeleteFile> |
newDeleteManifestReader(ManifestFile manifest) |
protected ManifestWriter<DeleteFile> |
newDeleteManifestWriter(PartitionSpec spec) |
protected OutputFile |
newManifestOutput() |
protected ManifestReader<DataFile> |
newManifestReader(ManifestFile manifest) |
protected ManifestWriter<DataFile> |
newManifestWriter(PartitionSpec spec) |
protected java.lang.String |
operation()
A string that describes the action that produced the new snapshot.
|
OverwriteFiles |
overwriteByRowFilter(Expression expr)
Delete files that match an
Expression on data rows from the table. |
protected TableMetadata |
refresh() |
protected Expression |
rowFilter() |
protected OverwriteFiles |
self() |
ThisT |
set(java.lang.String property,
java.lang.String value)
Set a summary property in the snapshot produced by this update.
|
protected void |
setNewFilesSequenceNumber(long sequenceNumber) |
protected long |
snapshotId() |
ThisT |
stageOnly()
Called to stage a snapshot in table metadata, but not update the current snapshot id.
|
protected java.util.Map<java.lang.String,java.lang.String> |
summary() |
java.lang.Object |
updateEvent()
Generates update event to notify about metadata changes
|
protected void |
validate(TableMetadata base)
Validate the current metadata.
|
protected void |
validateAddedDataFiles(TableMetadata base,
java.lang.Long startingSnapshotId,
Expression conflictDetectionFilter)
Validates that no files matching a filter have been added to the table since a starting snapshot.
|
OverwriteFiles |
validateAddedFilesMatchOverwriteFilter()
Signal that each file added to the table must match the overwrite expression.
|
protected void |
validateDataFilesExist(TableMetadata base,
java.lang.Long startingSnapshotId,
CharSequenceSet requiredDataFiles,
boolean skipDeletes,
Expression conflictDetectionFilter) |
OverwriteFiles |
validateFromSnapshot(long snapshotId)
Set the snapshot ID used in any reads for this operation.
|
OverwriteFiles |
validateNoConflictingData()
Enables validation that data added concurrently does not conflict with this commit's operation.
|
OverwriteFiles |
validateNoConflictingDeletes()
Enables validation that deletes that happened concurrently do not conflict with this commit's operation.
|
protected void |
validateNoNewDeleteFiles(TableMetadata base,
java.lang.Long startingSnapshotId,
Expression dataFilter)
Validates that no delete files matching a filter have been added to the table since a starting snapshot.
|
protected void |
validateNoNewDeletesForDataFiles(TableMetadata base,
java.lang.Long startingSnapshotId,
Expression dataFilter,
java.lang.Iterable<DataFile> dataFiles)
Validates that no new delete files that must be applied to the given data files have been added to the table since
a starting snapshot.
|
protected void |
validateNoNewDeletesForDataFiles(TableMetadata base,
java.lang.Long startingSnapshotId,
java.lang.Iterable<DataFile> dataFiles)
Validates that no new delete files that must be applied to the given data files have been added to the table since
a starting snapshot.
|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
caseSensitive, validateNoConflictingAppends
deleteWith, set, stageOnly
apply, commit, updateEvent
protected BaseOverwriteFiles(java.lang.String tableName, TableOperations ops)
protected OverwriteFiles self()
protected java.lang.String operation()
public OverwriteFiles overwriteByRowFilter(Expression expr)
OverwriteFiles
Expression
on data rows from the table.
A file is selected to be deleted by the expression if it could contain any rows that match the
expression (candidate files are selected using an
inclusive projection
). These candidate files are
deleted if all of the rows in the file must match the expression (the partition data matches
the expression's Projections.strict(PartitionSpec)
strict projection}). This guarantees
that files are deleted if and only if all rows in the file must match the expression.
Files that may contain some rows that match the expression and some rows that do not will
result in a ValidationException
.
overwriteByRowFilter
in interface OverwriteFiles
expr
- an expression on rows in the tablepublic OverwriteFiles addFile(DataFile file)
OverwriteFiles
DataFile
to the table.addFile
in interface OverwriteFiles
file
- a data filepublic OverwriteFiles deleteFile(DataFile file)
OverwriteFiles
DataFile
from the table.deleteFile
in interface OverwriteFiles
file
- a data filepublic OverwriteFiles validateAddedFilesMatchOverwriteFilter()
OverwriteFiles
If this method is called, each added file is validated on commit to ensure that it matches the overwrite row filter. This is used to ensure that writes are idempotent: that files cannot be added during a commit that would not be removed if the operation were run a second time.
validateAddedFilesMatchOverwriteFilter
in interface OverwriteFiles
public OverwriteFiles validateFromSnapshot(long snapshotId)
OverwriteFiles
Validations will check changes after this snapshot ID. If the from snapshot is not set, all ancestor snapshots through the table's initial snapshot are validated.
validateFromSnapshot
in interface OverwriteFiles
snapshotId
- a snapshot IDpublic OverwriteFiles conflictDetectionFilter(Expression newConflictDetectionFilter)
OverwriteFiles
conflictDetectionFilter
in interface OverwriteFiles
newConflictDetectionFilter
- an expression on rows in the tablepublic OverwriteFiles validateNoConflictingData()
OverwriteFiles
This method should be called while committing non-idempotent overwrite operations. If a concurrent operation commits a new file after the data was read and that file might contain rows matching the specified conflict detection filter, the overwrite operation will detect this and fail.
Calling this method with a correct conflict detection filter is required to maintain isolation for non-idempotent overwrite operations.
Validation uses the conflict detection filter passed to OverwriteFiles.conflictDetectionFilter(Expression)
and
applies to operations that happened after the snapshot passed to OverwriteFiles.validateFromSnapshot(long)
.
If the conflict detection filter is not set, any new data added concurrently will fail this
overwrite operation.
validateNoConflictingData
in interface OverwriteFiles
public OverwriteFiles validateNoConflictingDeletes()
OverwriteFiles
Validating concurrent deletes is required during non-idempotent overwrite operations. If a concurrent operation deletes data in one of the files being overwritten, the overwrite operation must be aborted as it may undelete rows that were removed concurrently.
Calling this method with a correct conflict detection filter is required to maintain isolation for non-idempotent overwrite operations.
Validation uses the conflict detection filter passed to OverwriteFiles.conflictDetectionFilter(Expression)
and
applies to operations that happened after the snapshot passed to OverwriteFiles.validateFromSnapshot(long)
.
If the conflict detection filter is not set, this operation will use the row filter provided
in OverwriteFiles.overwriteByRowFilter(Expression)
to check for new delete files and will ensure
there are no conflicting deletes for data files removed via OverwriteFiles.deleteFile(DataFile)
.
validateNoConflictingDeletes
in interface OverwriteFiles
protected void validate(TableMetadata base)
Child operations can override this to add custom validation.
base
- current table metadata to validatepublic ThisT set(java.lang.String property, java.lang.String value)
SnapshotUpdate
property
- a String property namevalue
- a String property valuepublic ThisT caseSensitive(boolean isCaseSensitive)
protected boolean isCaseSensitive()
protected PartitionSpec dataSpec()
protected Expression rowFilter()
protected java.util.List<DataFile> addedFiles()
protected void failAnyDelete()
protected void failMissingDeletePaths()
protected void deleteByRowFilter(Expression expr)
expr
- an expression to match rows.protected void dropPartition(int specId, StructLike partition)
protected void delete(DataFile file)
protected void delete(DeleteFile file)
protected void delete(java.lang.CharSequence path)
protected void add(DataFile file)
protected void add(DeleteFile file)
protected void add(ManifestFile manifest)
protected void validateAddedDataFiles(TableMetadata base, java.lang.Long startingSnapshotId, Expression conflictDetectionFilter)
base
- table metadata to validatestartingSnapshotId
- id of the snapshot current at the start of the operationconflictDetectionFilter
- an expression used to find new conflicting data filesprotected void validateNoNewDeletesForDataFiles(TableMetadata base, java.lang.Long startingSnapshotId, java.lang.Iterable<DataFile> dataFiles)
base
- table metadata to validatestartingSnapshotId
- id of the snapshot current at the start of the operationdataFiles
- data files to validate have no new row deletesprotected void validateNoNewDeletesForDataFiles(TableMetadata base, java.lang.Long startingSnapshotId, Expression dataFilter, java.lang.Iterable<DataFile> dataFiles)
base
- table metadata to validatestartingSnapshotId
- id of the snapshot current at the start of the operationdataFilter
- a data filterdataFiles
- data files to validate have no new row deletesprotected void validateNoNewDeleteFiles(TableMetadata base, java.lang.Long startingSnapshotId, Expression dataFilter)
base
- table metadata to validatestartingSnapshotId
- id of the snapshot current at the start of the operationdataFilter
- an expression used to find new conflicting delete filesprotected void setNewFilesSequenceNumber(long sequenceNumber)
protected void validateDataFilesExist(TableMetadata base, java.lang.Long startingSnapshotId, CharSequenceSet requiredDataFiles, boolean skipDeletes, Expression conflictDetectionFilter)
protected java.util.Map<java.lang.String,java.lang.String> summary()
public java.util.List<ManifestFile> apply(TableMetadata base)
base
- the base table metadata to apply changes topublic java.lang.Object updateEvent()
PendingUpdate
protected void cleanUncommitted(java.util.Set<ManifestFile> committed)
Manifests may not be committed if apply is called more because a commit conflict has occurred. Implementations may keep around manifests because the same changes will be made by both apply calls. This method instructs the implementation to clean up those manifests and passes the paths of the manifests that were actually committed.
committed
- a set of manifest paths that were actually committedpublic ThisT stageOnly()
SnapshotUpdate
stageOnly
in interface SnapshotUpdate<ThisT>
public ThisT deleteWith(java.util.function.Consumer<java.lang.String> deleteCallback)
SnapshotUpdate
deleteWith
in interface SnapshotUpdate<ThisT>
deleteCallback
- a String consumer used to delete locations.public Snapshot apply()
PendingUpdate
This does not result in a permanent update.
apply
in interface PendingUpdate<Snapshot>
PendingUpdate.commit()
protected TableMetadata current()
protected TableMetadata refresh()
public void commit()
PendingUpdate
Changes are committed by calling the underlying table's commit method.
Once the commit is successful, the updated table will be refreshed.
commit
in interface PendingUpdate<Snapshot>
protected void cleanAll()
protected void deleteFile(java.lang.String path)
protected OutputFile manifestListPath()
protected OutputFile newManifestOutput()
protected ManifestWriter<DataFile> newManifestWriter(PartitionSpec spec)
protected ManifestWriter<DeleteFile> newDeleteManifestWriter(PartitionSpec spec)
protected ManifestReader<DataFile> newManifestReader(ManifestFile manifest)
protected ManifestReader<DeleteFile> newDeleteManifestReader(ManifestFile manifest)
protected long snapshotId()