Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FuzzyMatching implemented using a BkTree #284

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
6 changes: 3 additions & 3 deletions src/DevChatter.Bot.Core/Commands/BaseCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public abstract class BaseCommand : IBotCommand
{
protected readonly IRepository Repository;
protected DateTimeOffset _timeCommandLastInvoked;
private bool _isEnabled;
public bool IsEnabled { get; private set; }
public UserRole RoleRequired { get; private set; }
public TimeSpan Cooldown { get; private set; } = TimeSpan.Zero;
public string PrimaryCommandText => CommandWords.FirstOrDefault().Word;
Expand All @@ -42,7 +42,7 @@ private void RefreshCommandData()
cmdInfo.Insert(0, (command.CommandWord, new List<string>()));

RoleRequired = command.RequiredRole;
_isEnabled = command.IsEnabled;
IsEnabled = command.IsEnabled;
HelpText = command.HelpText;
Cooldown = command.Cooldown;
CommandWords = cmdInfo;
Expand All @@ -53,7 +53,7 @@ private void RefreshCommandData()
public bool ShouldExecute(string commandText, out IList<string> args)
{
args = new List<string>();
if (_isEnabled)
if (IsEnabled)
{
if (CommandWords.Any(x => x.Word.EqualsIns(commandText)))
{
Expand Down
3 changes: 3 additions & 0 deletions src/DevChatter.Bot.Core/Commands/HelpCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using System;
using System.Collections.Generic;
using System.Linq;
using DevChatter.Bot.Core.Commands.Trackers;

namespace DevChatter.Bot.Core.Commands
{
Expand All @@ -20,6 +21,8 @@ public HelpCommand(IRepository repository, IServiceProvider provider)
}

private IList<IBotCommand> _allCommands;

// todo refactor to DevChatter.Bot.Core.Commands.Trackers.CommandList to allow fuzzy search
public IList<IBotCommand> AllCommands
{
get { return _allCommands ?? (_allCommands = _provider.GetService<IList<IBotCommand>>()); }
Expand Down
2 changes: 2 additions & 0 deletions src/DevChatter.Bot.Core/Commands/IBotCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,7 @@ public interface IBotCommand
CommandUsage Process(IChatClient chatClient, CommandReceivedEventArgs eventArgs);
TimeSpan GetCooldownTimeRemaining();
bool IsActiveGame();
IList<(string Word, IList<string> Args)> CommandWords { get; }
bool IsEnabled { get; }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ namespace DevChatter.Bot.Core.Commands.Operations
public class DeleteCommandOperation : BaseCommandOperation
{
private readonly IRepository _repository;

// todo refactor to DevChatter.Bot.Core.Commands.Trackers.CommandList to allow fuzzy search
private readonly IList<IBotCommand> _allCommands;

public DeleteCommandOperation(IRepository repository, IList<IBotCommand> allCommands)
Expand Down
6 changes: 6 additions & 0 deletions src/DevChatter.Bot.Core/Commands/SimpleCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ public SimpleCommand(string commandText, string staticResponse,
StaticResponse = staticResponse;
RoleRequired = roleRequired;
CommandText = commandText;
CommandWords = new List<(string Word, IList<string> Args)>
{
(commandText, new List<string>())
};
}

public string StaticResponse { get; protected set; }
Expand All @@ -33,6 +37,8 @@ public SimpleCommand(string commandText, string staticResponse,
public string CommandText { get; protected set; }
public string HelpText { get; protected set; } = "No help text for this command yet.";
public string FullHelpText => HelpText;
public IList<(string Word, IList<string> Args)> CommandWords { get; }
public Boolean IsEnabled => true;

public bool ShouldExecute(string commandText, out IList<string> args)
{
Expand Down
32 changes: 31 additions & 1 deletion src/DevChatter.Bot.Core/Commands/Trackers/CommandList.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,27 @@
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using DevChatter.Bot.Core.Util.FuzzyMatching;

namespace DevChatter.Bot.Core.Commands.Trackers
{
public class CommandList : IEnumerable<IBotCommand>
{
private readonly IList<IBotCommand> _list;
private IBkTree<string, (IBotCommand command, IList<string> arguments)> BkTree { get; }

public CommandList(IList<IBotCommand> list)
private int FuzzySearchMaxDistance { get; }

public CommandList(IList<IBotCommand> list, int fuzzySearchMaxDistance = 2)
{
_list = list ?? throw new ArgumentNullException(nameof(list));

FuzzySearchMaxDistance = fuzzySearchMaxDistance;

var bkTree =
new MutableBkTree<string, (IBotCommand command, IList<string> arguments)>(new CaseInsensitiveMetric(new DamerauLevenshteinMetric()));
bkTree.AddAll(_list.SelectMany(command => command.CommandWords.Select(word => (word.Word, (command, word.Args)))));
BkTree = bkTree;
}

public T GetCommandByType<T>() where T : class, IBotCommand
Expand All @@ -33,5 +44,24 @@ public IBotCommand GetCommandByFullTypeName(string fullTypeName)
{
return _list.SingleOrDefault(x => x.GetType().FullName == fullTypeName);
}

public IBotCommand FindCommandByKeyword(string keyword, out IList<string> args)
{
var searcher = new BkTreeSearcher<string, (IBotCommand command, IList<string> arguments)>(BkTree);

var command = searcher
.Search(keyword, FuzzySearchMaxDistance)
.FirstOrDefault(it => it.MatchValue.command.IsEnabled);

if (command == null)
{
args = new List<string>();
return null;
}

args = command.MatchValue.arguments ?? new List<string>();

return command.MatchValue.command;
}
}
}
2 changes: 1 addition & 1 deletion src/DevChatter.Bot.Core/Events/CommandHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ public void CommandReceivedHandler(object sender, CommandReceivedEventArgs e)
}

IList<string> args = new List<string>();
IBotCommand botCommand = _commandList.FirstOrDefault(c => c.ShouldExecute(e.CommandWord, out args));
IBotCommand botCommand = _commandList.FindCommandByKeyword(e.CommandWord, out args);
if (botCommand == null)
{
return;
Expand Down
146 changes: 146 additions & 0 deletions src/DevChatter.Bot.Core/Util/FuzzyMatching/BkTreeSearcher.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/*
* Copyright 2013 Georgia Tech Applied Research Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace DevChatter.Bot.Core.Util.FuzzyMatching
{
/**
* Searches a {@link BkTree}.
*
* @param <E> type of elements in the searched tree
*/
public sealed class BkTreeSearcher<TKey, TValue>
{
public IBkTree<TKey, TValue> Tree { get; }

/**
* Constructs a searcher that orders matches in increasing order of
* distance from the query.
*
* @param tree tree to search
*/
public BkTreeSearcher(IBkTree<TKey, TValue> tree)
{
Tree = tree ?? throw new ArgumentNullException(nameof(tree));
}

/**
* Searches the tree for elements whose distance from the given query
* is less than or equal to the given maximum distance.
*
* @param query query against which to match tree elements
* @param maxDistance non-negative maximum distance of matching elements from query
* @return matching elements in no particular order
*/
public ISet<SearchMatch<TKey, TValue>> Search(TKey query, Int32 maxDistance)
{
if (query == null) throw new ArgumentNullException(nameof(query));
if (maxDistance < 0) throw new ArgumentException("maxDistance must be non-negative");

var metric = Tree.Metric;

ISet<SearchMatch<TKey, TValue>> matches = new HashSet<SearchMatch<TKey, TValue>>();

var queue = new Queue<IBkTreeNode<TKey, TValue>>();
queue.Enqueue(Tree.Root);

while (queue.Count() != 0)
{
var node = queue.Dequeue();
var element = node.Key;

var distance = metric.Distance(element, query);
if (distance < 0)
throw new IllegalMetricException($"negative distance ({distance}) defined between element `{element}` and query `{query}`");

if (distance <= maxDistance)
matches.Add(new SearchMatch<TKey, TValue>(element, node.Value, distance));

var minSearchDistance = Math.Max(distance - maxDistance, 0);
var maxSearchDistance = distance + maxDistance;

for (var searchDistance = minSearchDistance; searchDistance <= maxSearchDistance; ++searchDistance)
{
var childNode = node.GetChildNode(searchDistance);
if (childNode != null)
{
queue.Enqueue(childNode);
}
}
}

return matches;
}
}

/**
* An element matching a query.
*
* @param <E> type of matching element
*/
public sealed class SearchMatch<TKey, TValue>
{
public TKey MatchKey { get; }
public TValue MatchValue { get; }
public Int32 Distance { get; }

/**
* @param match matching element
* @param distance distance of the matching element from the search query
*/
public SearchMatch(TKey matchKey, TValue matchValue, Int32 distance)
{
if (matchKey == null) throw new ArgumentNullException(nameof(matchKey));
if (distance < 0) throw new ArgumentException("distance must be non-negative");

MatchKey = matchKey;
MatchValue = matchValue;
Distance = distance;
}

private Boolean Equals(SearchMatch<TKey, TValue> other)
{
return EqualityComparer<TKey>.Default.Equals(MatchKey, other.MatchKey) && Distance == other.Distance;
}

public override Boolean Equals(Object obj)
{
return ReferenceEquals(this, obj) || obj is SearchMatch<TKey, TValue> other && Equals(other);
}

public override Int32 GetHashCode()
{
unchecked
{
return (EqualityComparer<TKey>.Default.GetHashCode(MatchKey) * 397) ^ Distance;
}
}

public override String ToString()
{
var sb = new StringBuilder("Match{");
sb.Append("match=").Append(MatchKey);
sb.Append(", matchValue=").Append(MatchValue);
sb.Append(", distance=").Append(Distance);
sb.Append('}');
return sb.ToString();
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
using System;

namespace DevChatter.Bot.Core.Util.FuzzyMatching
{
public class CaseInsensitiveMetric : IMetric<String>
{
private IMetric<String> Metric { get; }

public CaseInsensitiveMetric(IMetric<String> metric)
{
Metric = metric;
}

// https://gist.github.com/wickedshimmy/449595/cb33c2d0369551d1aa5b6ff5e6a802e21ba4ad5c
public Int32 Distance(String x, String y) =>
Metric.Distance(x.ToLowerInvariant(), y.ToLowerInvariant());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
using System;
using System.Linq;

namespace DevChatter.Bot.Core.Util.FuzzyMatching
{
public class DamerauLevenshteinMetric : IMetric<String>
{
// https://gist.github.com/wickedshimmy/449595/cb33c2d0369551d1aa5b6ff5e6a802e21ba4ad5c
public Int32 Distance(String x, String y)
{
int len_orig = x.Length;
int len_diff = y.Length;

var matrix = new int[len_orig + 1, len_diff + 1];
for (int i = 0; i <= len_orig; i++)
matrix[i,0] = i;
for (int j = 0; j <= len_diff; j++)
matrix[0,j] = j;

for (int i = 1; i <= len_orig; i++) {
for (int j = 1; j <= len_diff; j++) {
int cost = y[j - 1] == x[i - 1] ? 0 : 1;
var vals = new int[] {
matrix[i - 1, j] + 1,
matrix[i, j - 1] + 1,
matrix[i - 1, j - 1] + cost
};
matrix[i,j] = vals.Min ();
if (i > 1 && j > 1 && x[i - 1] == y[j - 2] && x[i - 2] == y[j - 1])
matrix[i,j] = Math.Min (matrix[i,j], matrix[i - 2, j - 2] + cost);
}
}
return matrix[len_orig, len_diff];
}
}
}
34 changes: 34 additions & 0 deletions src/DevChatter.Bot.Core/Util/FuzzyMatching/IBkTree.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Copyright 2013 Georgia Tech Applied Research Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/


/**
* A <a href="http://en.wikipedia.org/wiki/BK-tree">BK-tree</a>.
*
* @param <E> type of elements in this tree
*/

namespace DevChatter.Bot.Core.Util.FuzzyMatching
{
public interface IBkTree<TKey, out TValue>
{
/** Returns the metric for elements in this tree. */
IMetric<TKey> Metric { get; }

/** Returns the root node of this tree. */
IBkTreeNode<TKey, TValue> Root { get; }
}
}
Loading