Skip to content

Commit

Permalink
PrefetchingResultSetIterator: fix bug w/r/t intermediate zero-row pages
Browse files Browse the repository at this point in the history
Zero row pages can sometimes happen, for example when using "filtering".
The iterator has a logic bug which make it throw an exception when encountering
this.

Fix this issue with minimal code changes and add associated unit test.
  • Loading branch information
eappere committed Nov 25, 2024
1 parent 7ae4271 commit a63795a
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ class PrefetchingResultSetIterator(resultSet: AsyncResultSet, timer: Option[Time
}

private def maybePrefetch(): Unit = {
if (!currentIterator.hasNext && currentResultSet.hasMorePages) {
// It is sometimes possible to see pages in the middle have zero
// elements, hence iterating until we get at least one element is
// needed (and it needs to be done before calling .next(), too)
while (!currentIterator.hasNext && currentResultSet.hasMorePages) {
currentResultSet = Await.result(nextResultSet.get, Duration.Inf)
currentIterator = currentResultSet.currentPage().iterator()
nextResultSet = fetchNextPage()
Expand All @@ -48,8 +51,8 @@ class PrefetchingResultSetIterator(resultSet: AsyncResultSet, timer: Option[Time
currentIterator.hasNext || currentResultSet.hasMorePages

override def next(): Row = {
val row = currentIterator.next() // let's try to exhaust the current iterator first
maybePrefetch()
val row = currentIterator.next()
row
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package com.datastax.spark.connector.rdd.reader

import com.datastax.oss.driver.api.core.cql.{AsyncResultSet, Row}
import org.mockito.Mockito._
import org.scalatest.{FlatSpec, Matchers}
import org.scalatestplus.mockito.MockitoSugar

import java.util.concurrent.CompletableFuture
import scala.jdk.CollectionConverters._

class PrefetchingResultSetIteratorSpec extends FlatSpec with Matchers with MockitoSugar {

"PrefetchingResultSetIterator" should "handle empty pages that are not the last with AsyncResultSet" in {
val row1 = mock[Row]
val row2 = mock[Row]
val row3 = mock[Row]

val asyncResultSet1 = mock[AsyncResultSet]
val asyncResultSet2 = mock[AsyncResultSet]
val asyncResultSet3 = mock[AsyncResultSet]

// First page has data
when(asyncResultSet1.currentPage()).thenReturn(Seq(row1).asJava)
when(asyncResultSet1.hasMorePages()).thenReturn(true)
when(asyncResultSet1.fetchNextPage()).thenReturn(CompletableFuture.completedFuture(asyncResultSet2))

// Second page is empty
when(asyncResultSet2.currentPage()).thenReturn(Seq.empty[Row].asJava)
when(asyncResultSet2.hasMorePages()).thenReturn(true)
when(asyncResultSet2.fetchNextPage()).thenReturn(CompletableFuture.completedFuture(asyncResultSet3))

// Third page has data
when(asyncResultSet3.currentPage()).thenReturn(Seq(row2, row3).asJava)
when(asyncResultSet3.hasMorePages()).thenReturn(false)

val iterator = new PrefetchingResultSetIterator(asyncResultSet1)

val rows = iterator.toList

rows should contain theSameElementsInOrderAs Seq(row1, row2, row3)

verify(asyncResultSet1).fetchNextPage()
verify(asyncResultSet2).fetchNextPage()
verify(asyncResultSet3, never()).fetchNextPage()
}
}

0 comments on commit a63795a

Please sign in to comment.