From 0c6d805fcad887357c6c8f499ebd27cd42922082 Mon Sep 17 00:00:00 2001 From: Wietlol Date: Thu, 11 Jul 2019 08:50:00 +0200 Subject: [PATCH 01/10] Added a BkTree classset (there wasnt anything on nuget that looked good enough). --- .../Util/FuzzyMatching/BkTreeSearcher.cs | 146 +++++++++++++++++ .../FuzzyMatching/CaseInsensitiveMetric.cs | 18 +++ .../FuzzyMatching/DamerauLevenshteinMetric.cs | 36 +++++ .../Util/FuzzyMatching/DistanceMetric.cs | 115 ++++++++++++++ .../Util/FuzzyMatching/IBkTree.cs | 34 ++++ .../Util/FuzzyMatching/IBkTreeNode.cs | 21 +++ .../Util/FuzzyMatching/IMetric.cs | 34 ++++ .../FuzzyMatching/IllegalMetricException.cs | 32 ++++ .../Util/FuzzyMatching/MutableBkTree.cs | 147 ++++++++++++++++++ .../Util/FuzzyMatching/MutableBkTreeNode.cs | 52 +++++++ 10 files changed, 635 insertions(+) create mode 100644 src/DevChatter.Bot.Core/Util/FuzzyMatching/BkTreeSearcher.cs create mode 100644 src/DevChatter.Bot.Core/Util/FuzzyMatching/CaseInsensitiveMetric.cs create mode 100644 src/DevChatter.Bot.Core/Util/FuzzyMatching/DamerauLevenshteinMetric.cs create mode 100644 src/DevChatter.Bot.Core/Util/FuzzyMatching/DistanceMetric.cs create mode 100644 src/DevChatter.Bot.Core/Util/FuzzyMatching/IBkTree.cs create mode 100644 src/DevChatter.Bot.Core/Util/FuzzyMatching/IBkTreeNode.cs create mode 100644 src/DevChatter.Bot.Core/Util/FuzzyMatching/IMetric.cs create mode 100644 src/DevChatter.Bot.Core/Util/FuzzyMatching/IllegalMetricException.cs create mode 100644 src/DevChatter.Bot.Core/Util/FuzzyMatching/MutableBkTree.cs create mode 100644 src/DevChatter.Bot.Core/Util/FuzzyMatching/MutableBkTreeNode.cs diff --git a/src/DevChatter.Bot.Core/Util/FuzzyMatching/BkTreeSearcher.cs b/src/DevChatter.Bot.Core/Util/FuzzyMatching/BkTreeSearcher.cs new file mode 100644 index 00000000..a42fc567 --- /dev/null +++ b/src/DevChatter.Bot.Core/Util/FuzzyMatching/BkTreeSearcher.cs @@ -0,0 +1,146 @@ +/* + * Copyright 2013 Georgia Tech Applied Research Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace DevChatter.Bot.Core.Util.FuzzyMatching +{ + /** + * Searches a {@link BkTree}. + * + * @param type of elements in the searched tree + */ + public sealed class BkTreeSearcher + { + public IBkTree Tree { get; } + + /** + * Constructs a searcher that orders matches in increasing order of + * distance from the query. + * + * @param tree tree to search + */ + public BkTreeSearcher(IBkTree tree) + { + Tree = tree ?? throw new ArgumentNullException(nameof(tree)); + } + + /** + * Searches the tree for elements whose distance from the given query + * is less than or equal to the given maximum distance. + * + * @param query query against which to match tree elements + * @param maxDistance non-negative maximum distance of matching elements from query + * @return matching elements in no particular order + */ + public ISet> Search(TKey query, Int32 maxDistance) + { + if (query == null) throw new ArgumentNullException(nameof(query)); + if (maxDistance < 0) throw new ArgumentException("maxDistance must be non-negative"); + + var metric = Tree.Metric; + + ISet> matches = new HashSet>(); + + var queue = new Queue>(); + queue.Enqueue(Tree.Root); + + while (queue.Count() != 0) + { + var node = queue.Dequeue(); + var element = node.Key; + + var distance = metric.Distance(element, query); + if (distance < 0) + throw new IllegalMetricException($"negative distance ({distance}) defined between element `{element}` and query `{query}`"); + + if (distance <= maxDistance) + matches.Add(new SearchMatch(element, node.Value, distance)); + + var minSearchDistance = Math.Max(distance - maxDistance, 0); + var maxSearchDistance = distance + maxDistance; + + for (var searchDistance = minSearchDistance; searchDistance <= maxSearchDistance; ++searchDistance) + { + var childNode = node.GetChildNode(searchDistance); + if (childNode != null) + { + queue.Enqueue(childNode); + } + } + } + + return matches; + } + } + + /** + * An element matching a query. + * + * @param type of matching element + */ + public sealed class SearchMatch + { + public TKey MatchKey { get; } + public TValue MatchValue { get; } + public Int32 Distance { get; } + + /** + * @param match matching element + * @param distance distance of the matching element from the search query + */ + public SearchMatch(TKey matchKey, TValue matchValue, Int32 distance) + { + if (matchKey == null) throw new ArgumentNullException(nameof(matchKey)); + if (distance < 0) throw new ArgumentException("distance must be non-negative"); + + MatchKey = matchKey; + MatchValue = matchValue; + Distance = distance; + } + + private Boolean Equals(SearchMatch other) + { + return EqualityComparer.Default.Equals(MatchKey, other.MatchKey) && Distance == other.Distance; + } + + public override Boolean Equals(Object obj) + { + return ReferenceEquals(this, obj) || obj is SearchMatch other && Equals(other); + } + + public override Int32 GetHashCode() + { + unchecked + { + return (EqualityComparer.Default.GetHashCode(MatchKey) * 397) ^ Distance; + } + } + + public override String ToString() + { + var sb = new StringBuilder("Match{"); + sb.Append("match=").Append(MatchKey); + sb.Append(", matchValue=").Append(MatchValue); + sb.Append(", distance=").Append(Distance); + sb.Append('}'); + return sb.ToString(); + } + } +} \ No newline at end of file diff --git a/src/DevChatter.Bot.Core/Util/FuzzyMatching/CaseInsensitiveMetric.cs b/src/DevChatter.Bot.Core/Util/FuzzyMatching/CaseInsensitiveMetric.cs new file mode 100644 index 00000000..077a54b2 --- /dev/null +++ b/src/DevChatter.Bot.Core/Util/FuzzyMatching/CaseInsensitiveMetric.cs @@ -0,0 +1,18 @@ +using System; + +namespace DevChatter.Bot.Core.Util.FuzzyMatching +{ + public class CaseInsensitiveMetric : IMetric + { + private IMetric Metric { get; } + + public CaseInsensitiveMetric(IMetric metric) + { + Metric = metric; + } + + // https://gist.github.com/wickedshimmy/449595/cb33c2d0369551d1aa5b6ff5e6a802e21ba4ad5c + public Int32 Distance(String x, String y) => + Metric.Distance(x.ToLowerInvariant(), y.ToLowerInvariant()); + } +} diff --git a/src/DevChatter.Bot.Core/Util/FuzzyMatching/DamerauLevenshteinMetric.cs b/src/DevChatter.Bot.Core/Util/FuzzyMatching/DamerauLevenshteinMetric.cs new file mode 100644 index 00000000..589d2aee --- /dev/null +++ b/src/DevChatter.Bot.Core/Util/FuzzyMatching/DamerauLevenshteinMetric.cs @@ -0,0 +1,36 @@ +using System; +using System.Linq; + +namespace DevChatter.Bot.Core.Util.FuzzyMatching +{ + public class DamerauLevenshteinMetric : IMetric + { + // https://gist.github.com/wickedshimmy/449595/cb33c2d0369551d1aa5b6ff5e6a802e21ba4ad5c + public Int32 Distance(String x, String y) + { + int len_orig = x.Length; + int len_diff = y.Length; + + var matrix = new int[len_orig + 1, len_diff + 1]; + for (int i = 0; i <= len_orig; i++) + matrix[i,0] = i; + for (int j = 0; j <= len_diff; j++) + matrix[0,j] = j; + + for (int i = 1; i <= len_orig; i++) { + for (int j = 1; j <= len_diff; j++) { + int cost = y[j - 1] == x[i - 1] ? 0 : 1; + var vals = new int[] { + matrix[i - 1, j] + 1, + matrix[i, j - 1] + 1, + matrix[i - 1, j - 1] + cost + }; + matrix[i,j] = vals.Min (); + if (i > 1 && j > 1 && x[i - 1] == y[j - 2] && x[i - 2] == y[j - 1]) + matrix[i,j] = Math.Min (matrix[i,j], matrix[i - 2, j - 2] + cost); + } + } + return matrix[len_orig, len_diff]; + } + } +} diff --git a/src/DevChatter.Bot.Core/Util/FuzzyMatching/DistanceMetric.cs b/src/DevChatter.Bot.Core/Util/FuzzyMatching/DistanceMetric.cs new file mode 100644 index 00000000..d0b95f63 --- /dev/null +++ b/src/DevChatter.Bot.Core/Util/FuzzyMatching/DistanceMetric.cs @@ -0,0 +1,115 @@ +namespace DevChatter.Bot.Core.Util.FuzzyMatching +{ +// public static class DistanceMetric +// { +// /* +// * Lee Distance +// * http://en.wikipedia.org/wiki/Lee_distance +// */ +// public static int CalculateLeeDistance(int[] source, int[] target) +// { +// if (source.Length != target.Length) +// { +// throw new Exception("Lee distance string comparisons must be of equal length."); +// } +// +// // Iterate both arrays simultaneously, summing absolute value of difference at each position +// return source +// .Zip(target, (v1, v2) => new { v1, v2 }) +// .Sum(m => Math.Abs(m.v1 - m.v2)); +// } +// +// /* +// * Hamming distance +// * http://en.wikipedia.org/wiki/Hamming_distance +// */ +// public static int CalculateHammingDistance(byte[] source, byte[] target) +// { +// if (source.Length != target.Length) +// { +// throw new Exception("Hamming distance string comparisons must be of equal length."); +// } +// +// // Iterate both arrays simultaneously, summing count of bit differences of each byte +// return source +// .Zip(target, (v1, v2) => new { v1, v2 }) +// .Sum(m => +// // Wegner algorithm +// { +// int d = 0; +// int v = m.v1 ^ m.v2; // XOR values to find all dissimilar bits +// +// // Count number of set bits +// while (v > 0) +// { +// ++d; +// v &= (v - 1); +// } +// +// return d; +// }); +// } +// +// /* +// * Levenshtein distance +// * http://en.wikipedia.org/wiki/Levenshtein_distance +// * +// * The original author of this method in Java is Josh Clemm +// * http://code.google.com/p/java-bk-tree +// * +// */ +// public static int CalculateLevenshteinDistance(string source, string target) +// { +// int[,] distance; // distance matrix +// int n; // length of first string +// int m; // length of second string +// int i; // iterates through first string +// int j; // iterates through second string +// char sI; // ith character of first string +// char tJ; // jth character of second string +// int cost; // cost +// +// // Step 1 +// n = source.Length; +// m = target.Length; +// if (n == 0) +// return m; +// if (m == 0) +// return n; +// distance = new int[n+1,m+1]; +// +// // Step 2 +// for (i = 0; i <= n; i++) +// distance[i,0] = i; +// for (j = 0; j <= m; j++) +// distance[0,j] = j; +// +// // Step 3 +// for (i = 1; i <= n; i++) +// { +// sI = source[i-1]; +// +// // Step 4 +// for (j = 1; j <= m; j++) +// { +// tJ = target[j-1]; +// +// // Step 5 +// if (sI == tJ) +// cost = 0; +// else +// cost = 1; +// +// // Step 6 +// distance[i,j] = +// Math.Min( +// Math.Min(distance[i-1,j]+1, distance[i,j-1]+1), +// distance[i-1,j-1] + cost ); +// } +// } +// +// // Step 7 +// return distance[n,m]; +// } +// } +} \ No newline at end of file diff --git a/src/DevChatter.Bot.Core/Util/FuzzyMatching/IBkTree.cs b/src/DevChatter.Bot.Core/Util/FuzzyMatching/IBkTree.cs new file mode 100644 index 00000000..18ff3f31 --- /dev/null +++ b/src/DevChatter.Bot.Core/Util/FuzzyMatching/IBkTree.cs @@ -0,0 +1,34 @@ +/* + * Copyright 2013 Georgia Tech Applied Research Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/** + * A BK-tree. + * + * @param type of elements in this tree + */ + +namespace DevChatter.Bot.Core.Util.FuzzyMatching +{ + public interface IBkTree + { + /** Returns the metric for elements in this tree. */ + IMetric Metric { get; } + + /** Returns the root node of this tree. */ + IBkTreeNode Root { get; } + } +} \ No newline at end of file diff --git a/src/DevChatter.Bot.Core/Util/FuzzyMatching/IBkTreeNode.cs b/src/DevChatter.Bot.Core/Util/FuzzyMatching/IBkTreeNode.cs new file mode 100644 index 00000000..af3155ae --- /dev/null +++ b/src/DevChatter.Bot.Core/Util/FuzzyMatching/IBkTreeNode.cs @@ -0,0 +1,21 @@ + +using System; + +namespace DevChatter.Bot.Core.Util.FuzzyMatching +{ + /** + * A node in a {@link BkTree}. + * + * @param type of elements in the tree to which this node belongs + */ + public interface IBkTreeNode + { + /** Returns the element in this node. */ + TKey Key { get; } + + TValue Value { get; } + + /** Returns the child node at the given distance, if any. */ + IBkTreeNode GetChildNode(Int32 distance); + } +} \ No newline at end of file diff --git a/src/DevChatter.Bot.Core/Util/FuzzyMatching/IMetric.cs b/src/DevChatter.Bot.Core/Util/FuzzyMatching/IMetric.cs new file mode 100644 index 00000000..18eb4fe7 --- /dev/null +++ b/src/DevChatter.Bot.Core/Util/FuzzyMatching/IMetric.cs @@ -0,0 +1,34 @@ +/* + * Copyright 2013 Georgia Tech Applied Research Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace DevChatter.Bot.Core.Util.FuzzyMatching +{ + /** + * A metric, e.g., a string metric, + * that defines a metric space. + * + * @param type of elements in the metric space defined by this metric + */ + public interface IMetric + { + /** + * Returns the distance between the given elements. + */ + Int32 Distance(T x, T y); + } +} diff --git a/src/DevChatter.Bot.Core/Util/FuzzyMatching/IllegalMetricException.cs b/src/DevChatter.Bot.Core/Util/FuzzyMatching/IllegalMetricException.cs new file mode 100644 index 00000000..356effe9 --- /dev/null +++ b/src/DevChatter.Bot.Core/Util/FuzzyMatching/IllegalMetricException.cs @@ -0,0 +1,32 @@ +/* + * Copyright 2013 Georgia Tech Applied Research Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace DevChatter.Bot.Core.Util.FuzzyMatching +{ + /** + * Thrown if a {@link Metric} is not a true metric, e.g., if it defines a + * negative distance between any two elements. + */ + public class IllegalMetricException : Exception + { + public IllegalMetricException(String message) + : base(message) + { + } + } +} \ No newline at end of file diff --git a/src/DevChatter.Bot.Core/Util/FuzzyMatching/MutableBkTree.cs b/src/DevChatter.Bot.Core/Util/FuzzyMatching/MutableBkTree.cs new file mode 100644 index 00000000..8064e0e8 --- /dev/null +++ b/src/DevChatter.Bot.Core/Util/FuzzyMatching/MutableBkTree.cs @@ -0,0 +1,147 @@ +/* + * Copyright 2013 Georgia Tech Applied Research Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.Linq; +using System.Text; + +namespace DevChatter.Bot.Core.Util.FuzzyMatching +{ + /** + * A mutable {@linkplain BkTree BK-tree}. + * + *

Mutating operations are not thread-safe. + * + *

Whereas the {@linkplain #add(Object) mutating methods} are iterative and + * can thus handle very large trees, the {@link #equals(Object)}, + * {@link #hashCode()} and {@link #toString()} methods on this class and its + * {@link BkTree.Node} implementation are each recursive and as such may not + * complete normally when called on very deep trees. + * + * @param type of elements in this tree + */ + [SuppressMessage("ReSharper", "MemberCanBePrivate.Global")] + public sealed class MutableBkTree : IBkTree + { + public IMetric Metric { get; } + + public MutableBkTreeNode Root { get; private set; } + IBkTreeNode IBkTree.Root => Root; + + public MutableBkTree(IMetric metric) + { + Metric = metric ?? throw new ArgumentNullException(nameof(metric)); + } + + /** + * Adds the given element to this tree, if it's not already present. + * + * @param element element + */ + public void Add(TKey key, TValue value) + { + if (key == null) + throw new ArgumentNullException(nameof(key)); + + if (Root == null) + { + Root = new MutableBkTreeNode(key, value); + } + else + { + var node = Root; + while (!node.Key.Equals(key)) + { + var distance = Distance(node.Key, key); + + var parent = node; + if (!parent.ChildrenByDistance.ContainsKey(distance)) + { + node = new MutableBkTreeNode(key, value); + parent.ChildrenByDistance.Add(distance, node); + break; + } + + node = parent.ChildrenByDistance[distance]; + } + } + } + + private Int32 Distance(TKey x, TKey y) + { + var distance = Metric.Distance(x, y); + if (distance < 0) + throw new IllegalMetricException($"negative distance ({distance}) defined between elements `{x}` and `{y}`"); + + return distance; + } + + /** + * Adds all of the given elements to this tree. + * + * @param elements elements + */ + public void AddAll(IEnumerable<(TKey, TValue)> elements) + { + if (elements == null) + throw new ArgumentNullException(nameof(elements)); + + foreach (var (key, value) in elements) + { + Add(key, value); + } + } + + /** + * Adds all of the given elements to this tree. + * + * @param elements elements + */ + public void AddAll(params (TKey, TValue)[] elements) + { + if (elements == null) + throw new ArgumentNullException(nameof(elements)); + + AddAll(elements.AsEnumerable()); + } + + private Boolean Equals(MutableBkTree other) => + Equals(Metric, other.Metric) + && Equals(Root, other.Root); + + public override Boolean Equals(Object obj) => + ReferenceEquals(this, obj) || obj is MutableBkTree other && Equals(other); + + public override Int32 GetHashCode() + { + unchecked + { + return ((Metric != null ? Metric.GetHashCode() : 0) * 397) ^ (Root != null ? Root.GetHashCode() : 0); + } + } + + public override String ToString() + { + var sb = new StringBuilder("MutableBkTree{"); + sb.Append("metric=").Append(Metric); + sb.Append(", root=").Append(Root); + sb.Append('}'); + return sb.ToString(); + } + } +} \ No newline at end of file diff --git a/src/DevChatter.Bot.Core/Util/FuzzyMatching/MutableBkTreeNode.cs b/src/DevChatter.Bot.Core/Util/FuzzyMatching/MutableBkTreeNode.cs new file mode 100644 index 00000000..8a50f87a --- /dev/null +++ b/src/DevChatter.Bot.Core/Util/FuzzyMatching/MutableBkTreeNode.cs @@ -0,0 +1,52 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace DevChatter.Bot.Core.Util.FuzzyMatching +{ + public sealed class MutableBkTreeNode : IBkTreeNode + { + public TKey Key { get; } + + public TValue Value { get; } + + internal IDictionary> ChildrenByDistance { get; } = new Dictionary>(); + + internal MutableBkTreeNode(TKey element, TValue value) + { + if (element == null) + throw new ArgumentNullException(nameof(element)); + + Key = element; + Value = value; + } + + public IBkTreeNode GetChildNode(Int32 distance) => + ChildrenByDistance.ContainsKey(distance) + ? ChildrenByDistance[distance] + : null; + + private Boolean Equals(MutableBkTreeNode other) => + Equals(Key, other.Key) + && Equals(ChildrenByDistance, other.ChildrenByDistance); + + public override Boolean Equals(Object obj) => + ReferenceEquals(this, obj) || obj is MutableBkTreeNode other && Equals(other); + + public override Int32 GetHashCode() + { + var result = Key.GetHashCode(); + result = 31 * result + ChildrenByDistance.GetHashCode(); + return result; + } + + public override String ToString() + { + var sb = new StringBuilder("MutableNode{"); + sb.Append("element=").Append(Key); + sb.Append(", childrenByDistance=").Append(ChildrenByDistance); + sb.Append('}'); + return sb.ToString(); + } + } +} \ No newline at end of file From fea61505193442bb352dbca7b87a2b41058effcb Mon Sep 17 00:00:00 2001 From: Wietlol Date: Thu, 11 Jul 2019 08:50:16 +0200 Subject: [PATCH 02/10] Added CommandWords to IBotCommand for usage on the fuzzy matsch searching. --- src/DevChatter.Bot.Core/Commands/IBotCommand.cs | 1 + src/DevChatter.Bot.Core/Commands/SimpleCommand.cs | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/src/DevChatter.Bot.Core/Commands/IBotCommand.cs b/src/DevChatter.Bot.Core/Commands/IBotCommand.cs index fcd7091f..bb5e1895 100644 --- a/src/DevChatter.Bot.Core/Commands/IBotCommand.cs +++ b/src/DevChatter.Bot.Core/Commands/IBotCommand.cs @@ -18,5 +18,6 @@ public interface IBotCommand CommandUsage Process(IChatClient chatClient, CommandReceivedEventArgs eventArgs); TimeSpan GetCooldownTimeRemaining(); bool IsActiveGame(); + IList<(string Word, IList Args)> CommandWords { get; } } } diff --git a/src/DevChatter.Bot.Core/Commands/SimpleCommand.cs b/src/DevChatter.Bot.Core/Commands/SimpleCommand.cs index 7796d9d0..82ce72fe 100644 --- a/src/DevChatter.Bot.Core/Commands/SimpleCommand.cs +++ b/src/DevChatter.Bot.Core/Commands/SimpleCommand.cs @@ -25,6 +25,10 @@ public SimpleCommand(string commandText, string staticResponse, StaticResponse = staticResponse; RoleRequired = roleRequired; CommandText = commandText; + CommandWords = new List<(string Word, IList Args)> + { + (commandText, new List()) + }; } public string StaticResponse { get; protected set; } @@ -33,6 +37,7 @@ public SimpleCommand(string commandText, string staticResponse, public string CommandText { get; protected set; } public string HelpText { get; protected set; } = "No help text for this command yet."; public string FullHelpText => HelpText; + public IList<(string Word, IList Args)> CommandWords { get; } public bool ShouldExecute(string commandText, out IList args) { From 525e7963b750eb02a180e81cf3d007a57c0e712f Mon Sep 17 00:00:00 2001 From: Wietlol Date: Thu, 11 Jul 2019 08:50:37 +0200 Subject: [PATCH 03/10] Added FindCommandByKeyword() to CommandList. --- .../Commands/Trackers/CommandList.cs | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/src/DevChatter.Bot.Core/Commands/Trackers/CommandList.cs b/src/DevChatter.Bot.Core/Commands/Trackers/CommandList.cs index 4a0b55d3..746b1a5a 100644 --- a/src/DevChatter.Bot.Core/Commands/Trackers/CommandList.cs +++ b/src/DevChatter.Bot.Core/Commands/Trackers/CommandList.cs @@ -2,16 +2,27 @@ using System.Collections; using System.Collections.Generic; using System.Linq; +using DevChatter.Bot.Core.Util.FuzzyMatching; namespace DevChatter.Bot.Core.Commands.Trackers { public class CommandList : IEnumerable { private readonly IList _list; + private IBkTree arguments)> BkTree { get; } - public CommandList(IList list) + private int FuzzySearchMaxDistance { get; } + + public CommandList(IList list, int fuzzySearchMaxDistance = 2) { _list = list ?? throw new ArgumentNullException(nameof(list)); + + FuzzySearchMaxDistance = fuzzySearchMaxDistance; + + var bkTree = + new MutableBkTree arguments)>(new CaseInsensitiveMetric(new DamerauLevenshteinMetric())); + bkTree.AddAll(_list.SelectMany(command => command.CommandWords.Select(word => (word.Word, (command, word.Args))))); + BkTree = bkTree; } public T GetCommandByType() where T : class, IBotCommand @@ -33,5 +44,21 @@ public IBotCommand GetCommandByFullTypeName(string fullTypeName) { return _list.SingleOrDefault(x => x.GetType().FullName == fullTypeName); } + + public IBotCommand FindCommandByKeyword(string keyword, out IList args) + { + var searcher = new BkTreeSearcher arguments)>(BkTree); + + var command = searcher.Search(keyword, FuzzySearchMaxDistance).FirstOrDefault(); + if (command == null) + { + args = new List(); + return null; + } + + args = command.MatchValue.arguments ?? new List(); + + return command.MatchValue.command; + } } } From 41bd5947434b14271d93397e011b0f02413a14cd Mon Sep 17 00:00:00 2001 From: Wietlol Date: Thu, 11 Jul 2019 08:50:59 +0200 Subject: [PATCH 04/10] Added Characterization tests to CommandList::FindCommandByKeyword. --- .../Trackers/CommandListFuzzyMatchingTest.cs | 92 +++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 src/UnitTests/Core/Commands/Trackers/CommandListFuzzyMatchingTest.cs diff --git a/src/UnitTests/Core/Commands/Trackers/CommandListFuzzyMatchingTest.cs b/src/UnitTests/Core/Commands/Trackers/CommandListFuzzyMatchingTest.cs new file mode 100644 index 00000000..f82d0da8 --- /dev/null +++ b/src/UnitTests/Core/Commands/Trackers/CommandListFuzzyMatchingTest.cs @@ -0,0 +1,92 @@ +using System; +using System.Collections.Generic; +using DevChatter.Bot.Core.Commands; +using DevChatter.Bot.Core.Commands.Trackers; +using DevChatter.Bot.Core.Events; +using FluentAssertions; +using Xunit; + +namespace UnitTests.Core.Commands.Trackers +{ + // todo fix naming and location of class + public class CommandListFuzzyMatchingTest + { + [Fact] + public void FindsMatch_GivenPrefixedQuery() // "Hell" is prefix of "Hello" + { + var command = new SimpleCommand("Hello", ""); + + var list = new CommandList(new List { command, }); + + var match = list.FindCommandByKeyword("Hell", out _); + + match.Should().Be(command); + } + + [Fact] + public void FindsMatch_GivenPostfixedQuery() // "ello" is postfix of "Hello" + { + var command = new SimpleCommand("Hello", ""); + + var list = new CommandList(new List { command, }); + + var match = list.FindCommandByKeyword("ello", out _); + + match.Should().Be(match); + } + + [Fact] + public void FindsMatch_GivenTwoMisspellings() + { + var command = new SimpleCommand("Hello", ""); + + var list = new CommandList( + new List { command, }, + 2); + + var match = list.FindCommandByKeyword("Halko", out _); + + match.Should().Be(match); + } + + [Fact] + public void FindsNull_GivenThreeMisspellings() + { + var command = new SimpleCommand("Hello", ""); + + var list = new CommandList( + new List { command, }, + 2); + + var match = list.FindCommandByKeyword("Halku", out _); + + match.Should().Be(null); + } + + [Fact] + public void FindsMatch_GivenThreeMisspellings_AndMaxDistanceSetToThree() + { + var command = new SimpleCommand("Hello", ""); + + var list = new CommandList( + new List { command, }, + 3); + + var match = list.FindCommandByKeyword("Halku", out _); + + match.Should().Be(match); + } + + [Fact] + public void FindsNull_GivenUnrelatableQuery() + { + var command = new SimpleCommand("Hello", ""); + + var list = new CommandList(new List { command, }); + + var match = list.FindCommandByKeyword("Foo", out _); + + match.Should().Be(null); + } + } +} From b41636eb76be9b92d3f5ab8b50bcd1b9fb9b04b0 Mon Sep 17 00:00:00 2001 From: Wietlol Date: Thu, 11 Jul 2019 08:51:13 +0200 Subject: [PATCH 05/10] Added usage of the new FindCommandByKeyword() method. --- src/DevChatter.Bot.Core/Events/CommandHandler.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DevChatter.Bot.Core/Events/CommandHandler.cs b/src/DevChatter.Bot.Core/Events/CommandHandler.cs index 5405c721..59d5f18f 100644 --- a/src/DevChatter.Bot.Core/Events/CommandHandler.cs +++ b/src/DevChatter.Bot.Core/Events/CommandHandler.cs @@ -61,7 +61,7 @@ public void CommandReceivedHandler(object sender, CommandReceivedEventArgs e) } IList args = new List(); - IBotCommand botCommand = _commandList.FirstOrDefault(c => c.ShouldExecute(e.CommandWord, out args)); + IBotCommand botCommand = _commandList.FindCommandByKeyword(e.CommandWord, out args); if (botCommand == null) { return; From e2415144d28a5aa92c53043cedea4d43b5fd1842 Mon Sep 17 00:00:00 2001 From: Wietlol Date: Thu, 11 Jul 2019 08:51:23 +0200 Subject: [PATCH 06/10] Made a note that HelpCommand must use a CommandList instead of List to also make use of the fuzzy matching. --- src/DevChatter.Bot.Core/Commands/HelpCommand.cs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/DevChatter.Bot.Core/Commands/HelpCommand.cs b/src/DevChatter.Bot.Core/Commands/HelpCommand.cs index 8647bebb..90171392 100644 --- a/src/DevChatter.Bot.Core/Commands/HelpCommand.cs +++ b/src/DevChatter.Bot.Core/Commands/HelpCommand.cs @@ -6,6 +6,7 @@ using System; using System.Collections.Generic; using System.Linq; +using DevChatter.Bot.Core.Commands.Trackers; namespace DevChatter.Bot.Core.Commands { @@ -20,6 +21,8 @@ public HelpCommand(IRepository repository, IServiceProvider provider) } private IList _allCommands; + + // todo refactor to DevChatter.Bot.Core.Commands.Trackers.CommandList to allow fuzzy search public IList AllCommands { get { return _allCommands ?? (_allCommands = _provider.GetService>()); } From 443435d9beac78ca48056fd047df4a144625478d Mon Sep 17 00:00:00 2001 From: Wietlol Date: Thu, 11 Jul 2019 08:51:34 +0200 Subject: [PATCH 07/10] Made a note that DeleteCommandOperation must use a CommandList instead of List to also make use of the fuzzy matching. --- .../Commands/Operations/DeleteCommandOperation.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/DevChatter.Bot.Core/Commands/Operations/DeleteCommandOperation.cs b/src/DevChatter.Bot.Core/Commands/Operations/DeleteCommandOperation.cs index e0b3c7ef..70042468 100644 --- a/src/DevChatter.Bot.Core/Commands/Operations/DeleteCommandOperation.cs +++ b/src/DevChatter.Bot.Core/Commands/Operations/DeleteCommandOperation.cs @@ -11,6 +11,8 @@ namespace DevChatter.Bot.Core.Commands.Operations public class DeleteCommandOperation : BaseCommandOperation { private readonly IRepository _repository; + + // todo refactor to DevChatter.Bot.Core.Commands.Trackers.CommandList to allow fuzzy search private readonly IList _allCommands; public DeleteCommandOperation(IRepository repository, IList allCommands) From a5078f86676c50d92d0eff68abf75d165e070c2e Mon Sep 17 00:00:00 2001 From: Wietlol Date: Thu, 11 Jul 2019 09:16:05 +0200 Subject: [PATCH 08/10] Removed unused DistanceMetric file. --- .../Util/FuzzyMatching/DistanceMetric.cs | 115 ------------------ 1 file changed, 115 deletions(-) delete mode 100644 src/DevChatter.Bot.Core/Util/FuzzyMatching/DistanceMetric.cs diff --git a/src/DevChatter.Bot.Core/Util/FuzzyMatching/DistanceMetric.cs b/src/DevChatter.Bot.Core/Util/FuzzyMatching/DistanceMetric.cs deleted file mode 100644 index d0b95f63..00000000 --- a/src/DevChatter.Bot.Core/Util/FuzzyMatching/DistanceMetric.cs +++ /dev/null @@ -1,115 +0,0 @@ -namespace DevChatter.Bot.Core.Util.FuzzyMatching -{ -// public static class DistanceMetric -// { -// /* -// * Lee Distance -// * http://en.wikipedia.org/wiki/Lee_distance -// */ -// public static int CalculateLeeDistance(int[] source, int[] target) -// { -// if (source.Length != target.Length) -// { -// throw new Exception("Lee distance string comparisons must be of equal length."); -// } -// -// // Iterate both arrays simultaneously, summing absolute value of difference at each position -// return source -// .Zip(target, (v1, v2) => new { v1, v2 }) -// .Sum(m => Math.Abs(m.v1 - m.v2)); -// } -// -// /* -// * Hamming distance -// * http://en.wikipedia.org/wiki/Hamming_distance -// */ -// public static int CalculateHammingDistance(byte[] source, byte[] target) -// { -// if (source.Length != target.Length) -// { -// throw new Exception("Hamming distance string comparisons must be of equal length."); -// } -// -// // Iterate both arrays simultaneously, summing count of bit differences of each byte -// return source -// .Zip(target, (v1, v2) => new { v1, v2 }) -// .Sum(m => -// // Wegner algorithm -// { -// int d = 0; -// int v = m.v1 ^ m.v2; // XOR values to find all dissimilar bits -// -// // Count number of set bits -// while (v > 0) -// { -// ++d; -// v &= (v - 1); -// } -// -// return d; -// }); -// } -// -// /* -// * Levenshtein distance -// * http://en.wikipedia.org/wiki/Levenshtein_distance -// * -// * The original author of this method in Java is Josh Clemm -// * http://code.google.com/p/java-bk-tree -// * -// */ -// public static int CalculateLevenshteinDistance(string source, string target) -// { -// int[,] distance; // distance matrix -// int n; // length of first string -// int m; // length of second string -// int i; // iterates through first string -// int j; // iterates through second string -// char sI; // ith character of first string -// char tJ; // jth character of second string -// int cost; // cost -// -// // Step 1 -// n = source.Length; -// m = target.Length; -// if (n == 0) -// return m; -// if (m == 0) -// return n; -// distance = new int[n+1,m+1]; -// -// // Step 2 -// for (i = 0; i <= n; i++) -// distance[i,0] = i; -// for (j = 0; j <= m; j++) -// distance[0,j] = j; -// -// // Step 3 -// for (i = 1; i <= n; i++) -// { -// sI = source[i-1]; -// -// // Step 4 -// for (j = 1; j <= m; j++) -// { -// tJ = target[j-1]; -// -// // Step 5 -// if (sI == tJ) -// cost = 0; -// else -// cost = 1; -// -// // Step 6 -// distance[i,j] = -// Math.Min( -// Math.Min(distance[i-1,j]+1, distance[i,j-1]+1), -// distance[i-1,j-1] + cost ); -// } -// } -// -// // Step 7 -// return distance[n,m]; -// } -// } -} \ No newline at end of file From 5a360095d8d2775fa6fbf789d0ff0bd792a61491 Mon Sep 17 00:00:00 2001 From: Wietlol Date: Thu, 11 Jul 2019 09:16:20 +0200 Subject: [PATCH 09/10] Added IsEnabled to the IBotCommand interface. --- src/DevChatter.Bot.Core/Commands/BaseCommand.cs | 6 +++--- src/DevChatter.Bot.Core/Commands/IBotCommand.cs | 1 + src/DevChatter.Bot.Core/Commands/SimpleCommand.cs | 1 + 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/DevChatter.Bot.Core/Commands/BaseCommand.cs b/src/DevChatter.Bot.Core/Commands/BaseCommand.cs index d9c3ec0f..21ddfb98 100644 --- a/src/DevChatter.Bot.Core/Commands/BaseCommand.cs +++ b/src/DevChatter.Bot.Core/Commands/BaseCommand.cs @@ -17,7 +17,7 @@ public abstract class BaseCommand : IBotCommand { protected readonly IRepository Repository; protected DateTimeOffset _timeCommandLastInvoked; - private bool _isEnabled; + public bool IsEnabled { get; private set; } public UserRole RoleRequired { get; private set; } public TimeSpan Cooldown { get; private set; } = TimeSpan.Zero; public string PrimaryCommandText => CommandWords.FirstOrDefault().Word; @@ -42,7 +42,7 @@ private void RefreshCommandData() cmdInfo.Insert(0, (command.CommandWord, new List())); RoleRequired = command.RequiredRole; - _isEnabled = command.IsEnabled; + IsEnabled = command.IsEnabled; HelpText = command.HelpText; Cooldown = command.Cooldown; CommandWords = cmdInfo; @@ -53,7 +53,7 @@ private void RefreshCommandData() public bool ShouldExecute(string commandText, out IList args) { args = new List(); - if (_isEnabled) + if (IsEnabled) { if (CommandWords.Any(x => x.Word.EqualsIns(commandText))) { diff --git a/src/DevChatter.Bot.Core/Commands/IBotCommand.cs b/src/DevChatter.Bot.Core/Commands/IBotCommand.cs index bb5e1895..dec05827 100644 --- a/src/DevChatter.Bot.Core/Commands/IBotCommand.cs +++ b/src/DevChatter.Bot.Core/Commands/IBotCommand.cs @@ -19,5 +19,6 @@ public interface IBotCommand TimeSpan GetCooldownTimeRemaining(); bool IsActiveGame(); IList<(string Word, IList Args)> CommandWords { get; } + bool IsEnabled { get; } } } diff --git a/src/DevChatter.Bot.Core/Commands/SimpleCommand.cs b/src/DevChatter.Bot.Core/Commands/SimpleCommand.cs index 82ce72fe..03b268c6 100644 --- a/src/DevChatter.Bot.Core/Commands/SimpleCommand.cs +++ b/src/DevChatter.Bot.Core/Commands/SimpleCommand.cs @@ -38,6 +38,7 @@ public SimpleCommand(string commandText, string staticResponse, public string HelpText { get; protected set; } = "No help text for this command yet."; public string FullHelpText => HelpText; public IList<(string Word, IList Args)> CommandWords { get; } + public Boolean IsEnabled => true; public bool ShouldExecute(string commandText, out IList args) { From faa8cfc5208a516a3fb881f9357e54eb02d47e95 Mon Sep 17 00:00:00 2001 From: Wietlol Date: Thu, 11 Jul 2019 09:16:36 +0200 Subject: [PATCH 10/10] Updated search to exclude disabled commands. --- src/DevChatter.Bot.Core/Commands/Trackers/CommandList.cs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/DevChatter.Bot.Core/Commands/Trackers/CommandList.cs b/src/DevChatter.Bot.Core/Commands/Trackers/CommandList.cs index 746b1a5a..f55a48b3 100644 --- a/src/DevChatter.Bot.Core/Commands/Trackers/CommandList.cs +++ b/src/DevChatter.Bot.Core/Commands/Trackers/CommandList.cs @@ -49,7 +49,10 @@ public IBotCommand FindCommandByKeyword(string keyword, out IList args) { var searcher = new BkTreeSearcher arguments)>(BkTree); - var command = searcher.Search(keyword, FuzzySearchMaxDistance).FirstOrDefault(); + var command = searcher + .Search(keyword, FuzzySearchMaxDistance) + .FirstOrDefault(it => it.MatchValue.command.IsEnabled); + if (command == null) { args = new List();