Skip to content

Commit

Permalink
fix: delete by expr failed at retry progress (#35241) (#35421)
Browse files Browse the repository at this point in the history
issue: #35240
pr: #35241
delete by expr shard the same err object between channels, so if one
channel's query failed, it will fail all channel, which will break
channel level retry policy, and make delete operation failed.

Signed-off-by: Wei Liu <[email protected]>
  • Loading branch information
weiliu1031 authored Aug 12, 2024
1 parent 8a7c00d commit bfd20d5
Showing 1 changed file with 13 additions and 18 deletions.
31 changes: 13 additions & 18 deletions internal/proxy/task_delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,6 @@ type deleteRunner struct {
ts uint64
lb LBPolicy
count atomic.Int64
err error

// task queue
queue *dmTaskQueue
Expand Down Expand Up @@ -430,7 +429,11 @@ func (dr *deleteRunner) getStreamingQueryAndDelteFunc(plan *planpb.PlanNode) exe
}

taskCh := make(chan *deleteTask, 256)
go dr.receiveQueryResult(ctx, client, taskCh)
var receiveErr error
go func() {
receiveErr = dr.receiveQueryResult(ctx, client, taskCh)
close(taskCh)
}()
// wait all task finish
for task := range taskCh {
err := task.WaitToFinish()
Expand All @@ -441,50 +444,42 @@ func (dr *deleteRunner) getStreamingQueryAndDelteFunc(plan *planpb.PlanNode) exe
}

// query or produce task failed
if dr.err != nil {
return dr.err
if receiveErr != nil {
return receiveErr
}
return nil
}
}

func (dr *deleteRunner) receiveQueryResult(ctx context.Context, client querypb.QueryNode_QueryStreamClient, taskCh chan *deleteTask) {
defer func() {
close(taskCh)
}()

func (dr *deleteRunner) receiveQueryResult(ctx context.Context, client querypb.QueryNode_QueryStreamClient, taskCh chan *deleteTask) error {
for {
result, err := client.Recv()
if err != nil {
if err == io.EOF {
log.Debug("query stream for delete finished", zap.Int64("msgID", dr.msgID))
return
return nil
}
dr.err = err
return
return err
}

err = merr.Error(result.GetStatus())
if err != nil {
dr.err = err
log.Warn("query stream for delete get error status", zap.Int64("msgID", dr.msgID), zap.Error(err))
return
return err
}

if dr.limiter != nil {
err := dr.limiter.Alloc(ctx, []int64{dr.collectionID}, internalpb.RateType_DMLDelete, proto.Size(result.GetIds()))
if err != nil {
dr.err = err
log.Warn("query stream for delete failed because rate limiter", zap.Int64("msgID", dr.msgID), zap.Error(err))
return
return err
}
}

task, err := dr.produce(ctx, result.GetIds())
if err != nil {
dr.err = err
log.Warn("produce delete task failed", zap.Error(err))
return
return err
}

taskCh <- task
Expand Down

0 comments on commit bfd20d5

Please sign in to comment.