Skip to content

Commit

Permalink
fix: sparse inc mast less than uint32 max
Browse files Browse the repository at this point in the history
Signed-off-by: aoiasd <[email protected]>
  • Loading branch information
aoiasd committed Dec 6, 2024
1 parent 18bef5e commit 2401d30
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
2 changes: 1 addition & 1 deletion internal/util/function/bm25_function.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ func (v *BM25FunctionRunner) run(data []string, dst []map[uint32]float32) error
for tokenStream.Advance() {
token := tokenStream.Token()
// TODO More Hash Option
hash := typeutil.HashString2Uint32(token)
hash := typeutil.HashString2LessUint32(token)
embeddingMap[hash] += 1
}
dst[i] = embeddingMap
Expand Down
11 changes: 11 additions & 0 deletions pkg/util/typeutil/hash.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package typeutil
import (
"fmt"
"hash/crc32"
"math"
"strconv"
"strings"
"unsafe"
Expand Down Expand Up @@ -75,6 +76,16 @@ func HashString2Uint32(v string) uint32 {
return crc32.ChecksumIEEE([]byte(subString))
}

// HashString2LessUint32 hashing a string to uint32 but less than uint32 max
func HashString2LessUint32(v string) uint32 {
subString := v
if len(v) > substringLengthForCRC {
subString = v[:substringLengthForCRC]
}

return crc32.ChecksumIEEE([]byte(subString)) % math.MaxUint32
}

// HashPK2Channels hash primary keys to channels
func HashPK2Channels(primaryKeys *schemapb.IDs, shardNames []string) []uint32 {
numShard := uint32(len(shardNames))
Expand Down

0 comments on commit 2401d30

Please sign in to comment.