diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/source/SparkScan.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/source/SparkScan.java index ae29e05d4..7d93ad66e 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/source/SparkScan.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/source/SparkScan.java @@ -148,7 +148,9 @@ protected Statistics estimateStatistics(Snapshot snapshot) { for (CombinedScanTask task : tasks()) { for (FileScanTask file : task.files()) { - numRows += file.file().recordCount(); + // TODO: if possible, take deletes also into consideration. + double fractionOfFileScanned = ((double) file.length()) / file.file().fileSizeInBytes(); + numRows += (fractionOfFileScanned * file.file().recordCount()); } }