Class RewriteManifestsSparkAction

    • Field Detail

      • STATISTICS_FILES

        protected static final java.lang.String STATISTICS_FILES
        See Also:
        Constant Field Values
      • COMMA_SPLITTER

        protected static final org.apache.iceberg.relocated.com.google.common.base.Splitter COMMA_SPLITTER
      • COMMA_JOINER

        protected static final org.apache.iceberg.relocated.com.google.common.base.Joiner COMMA_JOINER
    • Method Detail

      • specId

        public RewriteManifestsSparkAction specId​(int specId)
        Description copied from interface: RewriteManifests
        Rewrites manifests for a given spec id.

        If not set, defaults to the table's default spec ID.

        Specified by:
        specId in interface RewriteManifests
        Parameters:
        specId - a spec id
        Returns:
        this for method chaining
      • rewriteIf

        public RewriteManifestsSparkAction rewriteIf​(java.util.function.Predicate<ManifestFile> newPredicate)
        Description copied from interface: RewriteManifests
        Rewrites only manifests that match the given predicate.

        If not set, all manifests will be rewritten.

        Specified by:
        rewriteIf in interface RewriteManifests
        Parameters:
        newPredicate - a predicate
        Returns:
        this for method chaining
      • stagingLocation

        public RewriteManifestsSparkAction stagingLocation​(java.lang.String newStagingLocation)
        Description copied from interface: RewriteManifests
        Passes a location where the staged manifests should be written.

        If not set, defaults to the table's metadata location.

        Specified by:
        stagingLocation in interface RewriteManifests
        Parameters:
        newStagingLocation - a staging location
        Returns:
        this for method chaining
      • snapshotProperty

        public ThisT snapshotProperty​(java.lang.String property,
                                      java.lang.String value)
      • commitSummary

        protected java.util.Map<java.lang.String,​java.lang.String> commitSummary()
      • spark

        protected org.apache.spark.sql.SparkSession spark()
      • sparkContext

        protected org.apache.spark.api.java.JavaSparkContext sparkContext()
      • option

        public ThisT option​(java.lang.String name,
                            java.lang.String value)
      • options

        public ThisT options​(java.util.Map<java.lang.String,​java.lang.String> newOptions)
      • options

        protected java.util.Map<java.lang.String,​java.lang.String> options()
      • withJobGroupInfo

        protected <T> T withJobGroupInfo​(JobGroupInfo info,
                                         java.util.function.Supplier<T> supplier)
      • newJobGroupInfo

        protected JobGroupInfo newJobGroupInfo​(java.lang.String groupId,
                                               java.lang.String desc)
      • contentFileDS

        protected org.apache.spark.sql.Dataset<FileInfo> contentFileDS​(Table table)
      • contentFileDS

        protected org.apache.spark.sql.Dataset<FileInfo> contentFileDS​(Table table,
                                                                       java.util.Set<java.lang.Long> snapshotIds)
      • manifestDS

        protected org.apache.spark.sql.Dataset<FileInfo> manifestDS​(Table table)
      • manifestDS

        protected org.apache.spark.sql.Dataset<FileInfo> manifestDS​(Table table,
                                                                    java.util.Set<java.lang.Long> snapshotIds)
      • manifestListDS

        protected org.apache.spark.sql.Dataset<FileInfo> manifestListDS​(Table table)
      • manifestListDS

        protected org.apache.spark.sql.Dataset<FileInfo> manifestListDS​(Table table,
                                                                        java.util.Set<java.lang.Long> snapshotIds)
      • statisticsFileDS

        protected org.apache.spark.sql.Dataset<FileInfo> statisticsFileDS​(Table table,
                                                                          java.util.Set<java.lang.Long> snapshotIds)
      • otherMetadataFileDS

        protected org.apache.spark.sql.Dataset<FileInfo> otherMetadataFileDS​(Table table)
      • allReachableOtherMetadataFileDS

        protected org.apache.spark.sql.Dataset<FileInfo> allReachableOtherMetadataFileDS​(Table table)
      • loadMetadataTable

        protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> loadMetadataTable​(Table table,
                                                                                           MetadataTableType type)
      • deleteFiles

        protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary deleteFiles​(java.util.concurrent.ExecutorService executorService,
                                                                                             java.util.function.Consumer<java.lang.String> deleteFunc,
                                                                                             java.util.Iterator<FileInfo> files)
        Deletes files and keeps track of how many files were removed for each file type.
        Parameters:
        executorService - an executor service to use for parallel deletes
        deleteFunc - a delete func
        files - an iterator of Spark rows of the structure (path: String, type: String)
        Returns:
        stats on which files were deleted
      • deleteFiles

        protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary deleteFiles​(SupportsBulkOperations io,
                                                                                             java.util.Iterator<FileInfo> files)