From b970b45d66e5518a33e718e4f3700b46e14dd7fa Mon Sep 17 00:00:00 2001 From: Vinicius Stock Date: Fri, 1 Sep 2023 14:29:18 -0400 Subject: [PATCH] Add entries prefix tree (#955) * Allow overriding values in a prefix tree * Start maintaining a prefix tree for entries --- lib/ruby_indexer/lib/ruby_indexer/index.rb | 47 +++++++++++++++++-- .../lib/ruby_indexer/prefix_tree.rb | 6 ++- lib/ruby_indexer/test/index_test.rb | 20 ++++++++ lib/ruby_indexer/test/prefix_tree_test.rb | 10 ++++ 4 files changed, 78 insertions(+), 5 deletions(-) diff --git a/lib/ruby_indexer/lib/ruby_indexer/index.rb b/lib/ruby_indexer/lib/ruby_indexer/index.rb index 55ee85b7c..68b74771a 100644 --- a/lib/ruby_indexer/lib/ruby_indexer/index.rb +++ b/lib/ruby_indexer/lib/ruby_indexer/index.rb @@ -17,6 +17,9 @@ def initialize # } @entries = T.let({}, T::Hash[String, T::Array[Entry]]) + # Holds all entries in the index using a prefix tree for searching based on prefixes to provide autocompletion + @entries_tree = T.let(PrefixTree[T::Array[Entry]].new, PrefixTree[T::Array[Entry]]) + # Holds references to where entries where discovered so that we can easily delete them # { # "/my/project/foo.rb" => [#, #], @@ -33,13 +36,21 @@ def delete(indexable) # For each constant discovered in `path`, delete the associated entry from the index. If there are no entries # left, delete the constant from the index. @files_to_entries[indexable.full_path]&.each do |entry| - entries = @entries[entry.name] + name = entry.name + entries = @entries[name] next unless entries # Delete the specific entry from the list for this name entries.delete(entry) - # If all entries were deleted, then remove the name from the hash - @entries.delete(entry.name) if entries.empty? + + # If all entries were deleted, then remove the name from the hash and from the prefix tree. Otherwise, update + # the prefix tree with the current entries + if entries.empty? + @entries.delete(name) + @entries_tree.delete(name) + else + @entries_tree.insert(name, entries) + end end @files_to_entries.delete(indexable.full_path) @@ -50,8 +61,11 @@ def delete(indexable) sig { params(entry: Entry).void } def <<(entry) - (@entries[entry.name] ||= []) << entry + name = entry.name + + (@entries[name] ||= []) << entry (@files_to_entries[entry.file_path] ||= []) << entry + @entries_tree.insert(name, T.must(@entries[name])) end sig { params(fully_qualified_name: String).returns(T.nilable(T::Array[Entry])) } @@ -64,6 +78,31 @@ def search_require_paths(query) @require_paths_tree.search(query) end + # Searches entries in the index based on an exact prefix, intended for providing autocomplete. All possible matches + # to the prefix are returned. The return is an array of arrays, where each entry is the array of entries for a given + # name match. For example: + # ## Example + # ```ruby + # # If the index has two entries for `Foo::Bar` and one for `Foo::Baz`, then: + # index.prefix_search("Foo::B") + # # Will return: + # [ + # [#, #], + # [#], + # ] + # ``` + sig { params(query: String, nesting: T::Array[String]).returns(T::Array[T::Array[Entry]]) } + def prefix_search(query, nesting) + results = (nesting.length + 1).downto(0).flat_map do |i| + prefix = T.must(nesting[0...i]).join("::") + namespaced_query = prefix.empty? ? query : "#{prefix}::#{query}" + @entries_tree.search(namespaced_query) + end + + results.uniq! + results + end + # Fuzzy searches index entries based on Jaro-Winkler similarity. If no query is provided, all entries are returned sig { params(query: T.nilable(String)).returns(T::Array[Entry]) } def fuzzy_search(query) diff --git a/lib/ruby_indexer/lib/ruby_indexer/prefix_tree.rb b/lib/ruby_indexer/lib/ruby_indexer/prefix_tree.rb index 881a01848..800edd4c0 100644 --- a/lib/ruby_indexer/lib/ruby_indexer/prefix_tree.rb +++ b/lib/ruby_indexer/lib/ruby_indexer/prefix_tree.rb @@ -64,6 +64,10 @@ def insert(key, value) node = node.children[char] ||= Node.new(char, value, node) end + # This line is to allow a value to be overridden. When we are indexing files, we want to be able to update entries + # for a given fully qualified name if we find more occurrences of it. Without being able to override, that would + # not be possible + node.value = value node.leaf = true end @@ -116,7 +120,7 @@ class Node attr_reader :key sig { returns(Value) } - attr_reader :value + attr_accessor :value sig { returns(T::Boolean) } attr_accessor :leaf diff --git a/lib/ruby_indexer/test/index_test.rb b/lib/ruby_indexer/test/index_test.rb index 8f84ef2e0..b2323e2af 100644 --- a/lib/ruby_indexer/test/index_test.rb +++ b/lib/ruby_indexer/test/index_test.rb @@ -138,5 +138,25 @@ class Foo assert_equal(["path/foo", "path/other_foo"], @index.search_require_paths("path")) end + + def test_searching_for_entries_based_on_prefix + @index.index_single(IndexablePath.new("/fake", "/fake/path/foo.rb"), <<~RUBY) + class Foo::Bar + end + RUBY + @index.index_single(IndexablePath.new("/fake", "/fake/path/other_foo.rb"), <<~RUBY) + class Foo::Bar + end + + class Foo::Baz + end + RUBY + + results = @index.prefix_search("Foo", []).map { |entries| entries.map(&:name) } + assert_equal([["Foo::Bar", "Foo::Bar"], ["Foo::Baz"]], results) + + results = @index.prefix_search("Ba", ["Foo"]).map { |entries| entries.map(&:name) } + assert_equal([["Foo::Bar", "Foo::Bar"], ["Foo::Baz"]], results) + end end end diff --git a/lib/ruby_indexer/test/prefix_tree_test.rb b/lib/ruby_indexer/test/prefix_tree_test.rb index 90f340668..9f3286f6f 100644 --- a/lib/ruby_indexer/test/prefix_tree_test.rb +++ b/lib/ruby_indexer/test/prefix_tree_test.rb @@ -136,5 +136,15 @@ def test_deleting_non_terminal_nodes assert_empty(tree.search("abcdef")) assert_equal(["value1"], tree.search("abc")) end + + def test_overriding_values + tree = PrefixTree[Integer].new + + tree.insert("foo/bar", 123) + assert_equal([123], tree.search("foo/bar")) + + tree.insert("foo/bar", 456) + assert_equal([456], tree.search("foo/bar")) + end end end