Skip to content

Commit

Permalink
junk filter: fix adjusting word counts after train/untrain
Browse files Browse the repository at this point in the history
after seeing some junk messages pass the filter, i investigated word counts in
junkfilter.db. i had seen suspicious counts that were just around powers of
two. did not make sense at the time. more investigating makes it clear: instead
of setting new word counts when updating the junk filter, we were adding the
new value to the current value (instead of just setting the new value). so the
counts got approximately doubled when being updated.

users should retrain the junk filter after this update using the "retrain"
subcommand.

this also adds logging for the hypothetical case where numbers would get
decreased below zero (which would wrap around due to uints).

and this fixes junk filter tests that were passing wrong parameters to
train/untrain...
  • Loading branch information
mjl- committed Dec 7, 2024
1 parent 69a4995 commit 17baf9a
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 9 deletions.
2 changes: 2 additions & 0 deletions ctl.go
Original file line number Diff line number Diff line change
Expand Up @@ -1348,6 +1348,8 @@ func servectlcmd(ctx context.Context, ctl *ctl, shutdown func()) {
}
}()

// todo: can we retrain an account without holding a write lock? perhaps by writing a junkfilter to a new location, and staying informed of message changes while we go through all messages in the account?

acc.WithWLock(func() {
conf, _ := acc.Conf()
if conf.JunkFilter == nil {
Expand Down
22 changes: 17 additions & 5 deletions junk/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ func (f *Filter) Save() error {
} else if err != nil {
return err
}
return tx.Update(&wordscore{w, wc.Ham + ham, wc.Spam + spam})
return tx.Update(&wordscore{w, ham, spam})
}
if err := update("-", f.hams, f.spams); err != nil {
return fmt.Errorf("storing total ham/spam message count: %s", err)
Expand Down Expand Up @@ -621,10 +621,16 @@ func (f *Filter) Untrain(ctx context.Context, ham bool, words map[string]struct{

// Modify the message count.
f.modified = true
var fv *uint32
if ham {
f.hams--
fv = &f.hams
} else {
f.spams--
fv = &f.spams
}
if *fv == 0 {
f.log.Error("attempt to decrease ham/spam message count while already zero", slog.Bool("ham", ham))
} else {
*fv -= 1
}

// Decrease the word counts.
Expand All @@ -633,10 +639,16 @@ func (f *Filter) Untrain(ctx context.Context, ham bool, words map[string]struct{
if !ok {
continue
}
var v *uint32
if ham {
c.Ham--
v = &c.Ham
} else {
v = &c.Spam
}
if *v == 0 {
f.log.Error("attempt to decrease ham/spam word count while already zero", slog.String("word", w), slog.Bool("ham", ham))
} else {
c.Spam--
*v -= 1
}
f.cache[w] = c
f.changed[w] = c
Expand Down
8 changes: 4 additions & 4 deletions junk/filter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ func TestFilter(t *testing.T) {
tcheck(t, err, "train spam message")
_, err = spamf.Seek(0, 0)
tcheck(t, err, "seek spam message")
err = f.TrainMessage(ctxbg, spamf, spamsize, true)
err = f.TrainMessage(ctxbg, spamf, spamsize, false)
tcheck(t, err, "train spam message")

if !f.modified {
Expand Down Expand Up @@ -166,16 +166,16 @@ func TestFilter(t *testing.T) {
tcheck(t, err, "untrain ham message")
_, err = hamf.Seek(0, 0)
tcheck(t, err, "seek ham message")
err = f.UntrainMessage(ctxbg, hamf, spamsize, true)
err = f.UntrainMessage(ctxbg, hamf, hamsize, true)
tcheck(t, err, "untrain ham message")

_, err = spamf.Seek(0, 0)
tcheck(t, err, "seek spam message")
err = f.UntrainMessage(ctxbg, spamf, spamsize, true)
err = f.UntrainMessage(ctxbg, spamf, spamsize, false)
tcheck(t, err, "untrain spam message")
_, err = spamf.Seek(0, 0)
tcheck(t, err, "seek spam message")
err = f.UntrainMessage(ctxbg, spamf, spamsize, true)
err = f.UntrainMessage(ctxbg, spamf, spamsize, false)
tcheck(t, err, "untrain spam message")

if !f.modified {
Expand Down

0 comments on commit 17baf9a

Please sign in to comment.