Skip to content

Commit

Permalink
Add improved naturalsort
Browse files Browse the repository at this point in the history
  • Loading branch information
johnnovak committed Mar 30, 2024
1 parent 0bddd54 commit 611f0bf
Showing 1 changed file with 124 additions and 56 deletions.
180 changes: 124 additions & 56 deletions src/naturalsort.nim
Original file line number Diff line number Diff line change
Expand Up @@ -8,91 +8,159 @@ import utils
# Original code by Alogani
# From https://github.com/nim-lang/Nim/issues/23462

# {{{ cmpNaturalAscii*()
proc cmpNaturalAscii*(a, b: string): int =
var
ai = 0
bi = 0
func cmpIgnoreCase(a, b: char): int =
ord(toLowerAscii(a)) - ord(toLowerAscii(b))

while true:
if ai > a.high or bi > b.high:
return a.len - ai - b.len + bi
func cmp(a, b: Rune): int =
a.int - b.int

func cmpIgnoreCase(a, b: Rune): int =
a.toLower().int - b.toLower().int

if not (a[ai].isDigit and b[bi].isDigit):
let diff = cmp(a[ai], b[bi])
template cmpNaturalImpl(a, b: string, comparator: untyped): auto =
var ai = 0
var bi = 0
while true:
if ai > high(a) or bi > high(b):
return a.len() - ai - b.len() + bi
if not (a[ai].isDigit() and b[bi].isDigit()):
let diff = comparator(a[ai], b[bi])
if diff != 0:
return diff

inc(ai)
inc(bi)
ai += 1; bi += 1
else:
var
aNum: int
bNum: int
ai += parseInt(a[ai..^1], aNum)
bi += parseInt(b[bi..^1], bNum)

ai += parseInt(a[ai .. ^1], aNum)
bi += parseInt(b[bi .. ^1], bNum)
let diff = cmp(aNum, bNum)
if diff != 0:
return diff

# }}}
# {{{ cmpNatural*()
proc cmpNatural*(a, b: seq[Rune]): int =
var
ai = 0
bi = 0
func cmpNatural*(a, b: string): int =
cmpNaturalImpl(a, b, cmp)

while true:
if ai > a.high or bi > b.high:
return a.len-ai - b.len+bi
func cmpNaturalIgnoreCase*(a, b: string): int =
cmpNaturalImpl(a, b, cmpIgnoreCase)

if not (a[ai].isDigit and b[bi].isDigit):
let diff = if a[ai] == b[bi]: 0
elif a[ai] <% b[bi]: -1
else: 1
if diff != 0:
return diff
func naturalSort*(l: openArray[string]): seq[string] =
l.sorted(cmpNatural)

inc(ai)
inc(bi)
else:
var aNum, bNum: int
ai += parseInt($(a[ai..^1]), aNum)
bi += parseInt($(b[bi..^1]), bNum)
func naturalSortIgnoreCase*(l: openArray[string]): seq[string] =
l.sorted(cmpNaturalIgnoreCase)

proc integerOutOfRangeError() {.noinline.} =
raise newException(ValueError, "Parsed integer outside of valid range")

# List of zero converted from https://www.fileformat.info/info/unicode/category/Nd/list.htm
const allZeros = [
48, 1632, 1776, 1984, 2406, 2534, 2662, 2790, 2918, 3046, 3174, 3302, 3430,
3558, 3664, 3792, 3872, 4160, 4240, 6112, 6160, 6470, 6608, 6784, 6800,
6992, 7088, 7232, 7248, 42528, 43216, 43264, 43472, 43504, 43600, 44016,
65296, 4170, 4307, 4358, 4367, 4371, 4381, 4399, 4421, 4429, 4453, 4460,
4467, 4494, 4501, 4549, 4565, 4570, 4597, 5798, 5804, 5813, 7548, 7549,
7550, 7550, 7551, 7700, 7727, 7759, 7829, 8127
]

func toDigitImpl(r: Rune): int =
let codePoint = ord(r)
for z in allZeros:
# not a binary search, because first runes are more common
if codePoint > z:
return codePoint - z
return -1

func isDigit*(r: Rune): bool =
let digit = r.toDigitImpl()
digit in {0..9}

func toDigit*(r: Rune): int =
result = r.toDigitImpl()
if result notin {0..9}:
raise newException(RangeDefect, "rune is not a valid digit")

proc rawParseInt(s: openArray[Rune], b: var BiggestInt): int =
var
sign: BiggestInt = -1
i = 0
if i < s.len:
if s[i] == '+'.Rune: inc(i)
elif s[i] == '-'.Rune:
inc(i)
sign = 1

if i < s.len:
b = 0
while i < s.len and (let c = toDigitImpl(s[i]); c in {0..9}):
if b >= (low(BiggestInt) + c) div 10:
b = b * 10 - c
else:
integerOutOfRangeError()
inc(i)
while i < s.len and s[i] == '_'.Rune: inc(i) # underscores are allowed and ignored
if sign == -1 and b == low(BiggestInt):
integerOutOfRangeError()
else:
b = b * sign
result = i

template cmpNaturalImpl(a, b: seq[Rune], comparator: untyped): auto =
var ai = 0
var bi = 0
while true:
if ai > high(a) or bi > high(b):
return a.len() - ai - b.len() + bi
if not(a[ai].isDigit() and b[bi].isDigit()):
let diff = comparator(a[ai], b[bi])
if diff != 0:
return diff
ai += 1; bi += 1
else:
var
aNum: Biggestint
bNum: Biggestint
ai += rawParseInt(a[ai .. ^1], aNum)
bi += rawParseInt(b[bi .. ^1], bNum)
let diff = cmp(aNum, bNum)
if diff != 0:
return diff

func cmpNatural*(a, b: seq[Rune]): int =
cmpNaturalImpl(a, b, cmp)

func cmpNaturalIgnoreCase*(a, b: seq[Rune]): int =
cmpNaturalImpl(a, b, cmpIgnoreCase)

func naturalSort*(l: openArray[seq[Rune]]): seq[seq[Rune]] =
l.sorted(cmpNatural)

let diff = cmp(aNum, bNum)
if diff != 0:
return diff
func naturalSortIgnoreCase*(l: openArray[seq[Rune]]): seq[seq[Rune]] =
l.sorted(cmpNaturalIgnoreCase)

# }}}
# {{{ cmpNaturalIgnoreCase*()
proc cmpNaturalIgnoreCase*(a, b: seq[Rune]): int =
cmpNatural(a, b)
# }}}
proc naturalSortUtf8*(l: openArray[string]): seq[string] =
var rl = newSeq[seq[Rune]](l.len)
for i in 0..<l.len:
rl[i] = l[i].toRunes

# {{{ naturalSortAscii*()
proc naturalSortAscii*(l: openArray[string]): seq[string] =
l.sorted(cmpNaturalAscii)
var sorted = naturalSort(rl)

result = newSeq[string](sorted.len)
for i in 0..<sorted.len:
result[i] = $sorted[i]

# }}}
# {{{ naturalSort*()
proc naturalSort*(l: openArray[seq[Rune]]): seq[seq[Rune]] =
l.sorted(cmpNatural)

proc naturalSort*(l: openArray[string]): seq[string] =
proc naturalSortIgnoreCase*(l: openArray[string]): seq[string] =
var rl = newSeq[seq[Rune]](l.len)
for i in 0..<l.len:
rl[i] = l[i].toRunes

var sorted = naturalSort(rl)
var sorted = naturalSortIgnoreCase(rl)

result = newSeq[string](sorted.len)
for i in 0..<sorted.len:
result[i] = $sorted[i]

# }}}


# {{{ Tests
when isMainModule:
Expand Down

0 comments on commit 611f0bf

Please sign in to comment.