diff --git a/src/datatrove/pipeline/readers/base.py b/src/datatrove/pipeline/readers/base.py
index dec3e78a..e7f4569b 100644
--- a/src/datatrove/pipeline/readers/base.py
+++ b/src/datatrove/pipeline/readers/base.py
@@ -190,6 +190,7 @@ def read_files_shard(self, shard: list[str]) -> DocumentsPipeline:
                 self.stat_update("input_files")
                 logger.info(f"Reading input file {filepath}, {i+1}/{len(shard)}")
                 di = 0
+                ndocs = 0
                 for di, document in enumerate(self.read_file(filepath)):
                     if skipped < self.skip:
                         skipped += 1
@@ -199,11 +200,9 @@ def read_files_shard(self, shard: list[str]) -> DocumentsPipeline:
                     yield document
                     doc_pbar.update()
                     li += 1
+                    ndocs += 1
                 file_pbar.update()
-                # document count is non-zero, increment di and store the number
-                # of documents instead of the index of the last document
-                di += min(di, 1)
-                self.stat_update("documents", value=di, unit="input_file")
+                self.stat_update("documents", value=ndocs, unit="input_file")
                 if self.limit != -1 and li >= self.limit:
                     break