Skip to content

Commit

Permalink
Add entries prefix tree (Shopify#955)
Browse files Browse the repository at this point in the history
* Allow overriding values in a prefix tree

* Start maintaining a prefix tree for entries
  • Loading branch information
vinistock authored Sep 1, 2023
1 parent b98702d commit b970b45
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 5 deletions.
47 changes: 43 additions & 4 deletions lib/ruby_indexer/lib/ruby_indexer/index.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ def initialize
# }
@entries = T.let({}, T::Hash[String, T::Array[Entry]])

# Holds all entries in the index using a prefix tree for searching based on prefixes to provide autocompletion
@entries_tree = T.let(PrefixTree[T::Array[Entry]].new, PrefixTree[T::Array[Entry]])

# Holds references to where entries where discovered so that we can easily delete them
# {
# "/my/project/foo.rb" => [#<Entry::Class>, #<Entry::Class>],
Expand All @@ -33,13 +36,21 @@ def delete(indexable)
# For each constant discovered in `path`, delete the associated entry from the index. If there are no entries
# left, delete the constant from the index.
@files_to_entries[indexable.full_path]&.each do |entry|
entries = @entries[entry.name]
name = entry.name
entries = @entries[name]
next unless entries

# Delete the specific entry from the list for this name
entries.delete(entry)
# If all entries were deleted, then remove the name from the hash
@entries.delete(entry.name) if entries.empty?

# If all entries were deleted, then remove the name from the hash and from the prefix tree. Otherwise, update
# the prefix tree with the current entries
if entries.empty?
@entries.delete(name)
@entries_tree.delete(name)
else
@entries_tree.insert(name, entries)
end
end

@files_to_entries.delete(indexable.full_path)
Expand All @@ -50,8 +61,11 @@ def delete(indexable)

sig { params(entry: Entry).void }
def <<(entry)
(@entries[entry.name] ||= []) << entry
name = entry.name

(@entries[name] ||= []) << entry
(@files_to_entries[entry.file_path] ||= []) << entry
@entries_tree.insert(name, T.must(@entries[name]))
end

sig { params(fully_qualified_name: String).returns(T.nilable(T::Array[Entry])) }
Expand All @@ -64,6 +78,31 @@ def search_require_paths(query)
@require_paths_tree.search(query)
end

# Searches entries in the index based on an exact prefix, intended for providing autocomplete. All possible matches
# to the prefix are returned. The return is an array of arrays, where each entry is the array of entries for a given
# name match. For example:
# ## Example
# ```ruby
# # If the index has two entries for `Foo::Bar` and one for `Foo::Baz`, then:
# index.prefix_search("Foo::B")
# # Will return:
# [
# [#<Entry::Class name="Foo::Bar">, #<Entry::Class name="Foo::Bar">],
# [#<Entry::Class name="Foo::Baz">],
# ]
# ```
sig { params(query: String, nesting: T::Array[String]).returns(T::Array[T::Array[Entry]]) }
def prefix_search(query, nesting)
results = (nesting.length + 1).downto(0).flat_map do |i|
prefix = T.must(nesting[0...i]).join("::")
namespaced_query = prefix.empty? ? query : "#{prefix}::#{query}"
@entries_tree.search(namespaced_query)
end

results.uniq!
results
end

# Fuzzy searches index entries based on Jaro-Winkler similarity. If no query is provided, all entries are returned
sig { params(query: T.nilable(String)).returns(T::Array[Entry]) }
def fuzzy_search(query)
Expand Down
6 changes: 5 additions & 1 deletion lib/ruby_indexer/lib/ruby_indexer/prefix_tree.rb
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ def insert(key, value)
node = node.children[char] ||= Node.new(char, value, node)
end

# This line is to allow a value to be overridden. When we are indexing files, we want to be able to update entries
# for a given fully qualified name if we find more occurrences of it. Without being able to override, that would
# not be possible
node.value = value
node.leaf = true
end

Expand Down Expand Up @@ -116,7 +120,7 @@ class Node
attr_reader :key

sig { returns(Value) }
attr_reader :value
attr_accessor :value

sig { returns(T::Boolean) }
attr_accessor :leaf
Expand Down
20 changes: 20 additions & 0 deletions lib/ruby_indexer/test/index_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -138,5 +138,25 @@ class Foo

assert_equal(["path/foo", "path/other_foo"], @index.search_require_paths("path"))
end

def test_searching_for_entries_based_on_prefix
@index.index_single(IndexablePath.new("/fake", "/fake/path/foo.rb"), <<~RUBY)
class Foo::Bar
end
RUBY
@index.index_single(IndexablePath.new("/fake", "/fake/path/other_foo.rb"), <<~RUBY)
class Foo::Bar
end
class Foo::Baz
end
RUBY

results = @index.prefix_search("Foo", []).map { |entries| entries.map(&:name) }
assert_equal([["Foo::Bar", "Foo::Bar"], ["Foo::Baz"]], results)

results = @index.prefix_search("Ba", ["Foo"]).map { |entries| entries.map(&:name) }
assert_equal([["Foo::Bar", "Foo::Bar"], ["Foo::Baz"]], results)
end
end
end
10 changes: 10 additions & 0 deletions lib/ruby_indexer/test/prefix_tree_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -136,5 +136,15 @@ def test_deleting_non_terminal_nodes
assert_empty(tree.search("abcdef"))
assert_equal(["value1"], tree.search("abc"))
end

def test_overriding_values
tree = PrefixTree[Integer].new

tree.insert("foo/bar", 123)
assert_equal([123], tree.search("foo/bar"))

tree.insert("foo/bar", 456)
assert_equal([456], tree.search("foo/bar"))
end
end
end

0 comments on commit b970b45

Please sign in to comment.