Skip to content

Commit

Permalink
Add support for all languages in py-treesitter (run-llama#13587)
Browse files Browse the repository at this point in the history
  • Loading branch information
345ishaan authored Jul 1, 2024
1 parent a7c7920 commit b17edc7
Show file tree
Hide file tree
Showing 19 changed files with 673 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ exclude = ["**/BUILD"]
license = "MIT"
name = "llama-index-indices-managed-llama-cloud"
readme = "README.md"
version = "0.2.1"
version = "0.2.2"

[tool.poetry.dependencies]
python = ">=3.8.1,<4.0"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os

from collections import defaultdict
from enum import Enum
from tree_sitter import Node
Expand Down Expand Up @@ -230,13 +232,8 @@ def __init__(
):
callback_manager = callback_manager or CallbackManager([])

if signature_identifiers is None:
try:
signature_identifiers = _DEFAULT_SIGNATURE_IDENTIFIERS[language]
except KeyError:
raise ValueError(
f"Must provide signature_identifiers for language {language}."
)
if signature_identifiers is None and language in _DEFAULT_SIGNATURE_IDENTIFIERS:
signature_identifiers = _DEFAULT_SIGNATURE_IDENTIFIERS[language]

super().__init__(
include_prev_next_rel=False,
Expand Down Expand Up @@ -529,6 +526,14 @@ def _parse_nodes(

try:
parser = tree_sitter_languages.get_parser(self.language)
language = tree_sitter_languages.get_language(self.language)

# Construct the path to the SCM file
scm_fname = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"pytree-sitter-queries",
f"tree-sitter-{self.language}-tags.scm",
)
except Exception as e:
print(
f"Could not get parser for language {self.language}. Check "
Expand All @@ -537,13 +542,38 @@ def _parse_nodes(
)
raise e # noqa: TRY201

query = None
if self.signature_identifiers is None:
assert os.path.exists(scm_fname), f"Could not find {scm_fname}"
fp = open(scm_fname)
query_scm = fp.read()
query = language.query(query_scm)

nodes_with_progress = get_tqdm_iterable(
nodes, show_progress, "Parsing documents into nodes"
)

for node in nodes_with_progress:
text = node.text
tree = parser.parse(bytes(text, "utf-8"))

if self.signature_identifiers is None:
assert query is not None
self.signature_identifiers = {}
tag_to_type = {}
captures = query.captures(tree.root_node)
for _node, _tag in captures:
tag_to_type[_tag] = _node.type
if _tag.startswith("name.definition"):
# ignore name.
parent_tag = _tag[5:]
assert parent_tag in tag_to_type
parent_type = tag_to_type[parent_tag]
if parent_type not in self.signature_identifiers:
self.signature_identifiers[
parent_type
] = _SignatureCaptureOptions(name_identifier=_node.type)

if (
not tree.root_node.children
or tree.root_node.children[0].type != "ERROR"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
(struct_specifier name: (type_identifier) @name.definition.class body:(_)) @definition.class

(declaration type: (union_specifier name: (type_identifier) @name.definition.class)) @definition.class

(function_declarator declarator: (identifier) @name.definition.function) @definition.function

(type_definition declarator: (type_identifier) @name.definition.type) @definition.type

(enum_specifier name: (type_identifier) @name.definition.type) @definition.type
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
(class_declaration
name: (identifier) @name.definition.class
) @definition.class

(class_declaration
bases: (base_list (_) @name.reference.class)
) @reference.class

(interface_declaration
name: (identifier) @name.definition.interface
) @definition.interface

(interface_declaration
bases: (base_list (_) @name.reference.interface)
) @reference.interface

(method_declaration
name: (identifier) @name.definition.method
) @definition.method

(object_creation_expression
type: (identifier) @name.reference.class
) @reference.class

(type_parameter_constraints_clause
target: (identifier) @name.reference.class
) @reference.class

(type_constraint
type: (identifier) @name.reference.class
) @reference.class

(variable_declaration
type: (identifier) @name.reference.class
) @reference.class

(invocation_expression
function:
(member_access_expression
name: (identifier) @name.reference.send
)
) @reference.send

(namespace_declaration
name: (identifier) @name.definition.module
) @definition.module
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
(struct_specifier name: (type_identifier) @name.definition.class body:(_)) @definition.class

(declaration type: (union_specifier name: (type_identifier) @name.definition.class)) @definition.class

(function_declarator declarator: (identifier) @name.definition.function) @definition.function

(function_declarator declarator: (field_identifier) @name.definition.function) @definition.function

(function_declarator declarator: (qualified_identifier scope: (namespace_identifier) @scope name: (identifier) @name.definition.method)) @definition.method

(type_definition declarator: (type_identifier) @name.definition.type) @definition.type

(enum_specifier name: (type_identifier) @name.definition.type) @definition.type

(class_specifier name: (type_identifier) @name.definition.class) @definition.class
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
;; defun/defsubst
(function_definition name: (symbol) @name.definition.function) @definition.function

;; Treat macros as function definitions for the sake of TAGS.
(macro_definition name: (symbol) @name.definition.function) @definition.function

;; Match function calls
(list (symbol) @name.reference.function) @reference.function
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
; Definitions

; * modules and protocols
(call
target: (identifier) @ignore
(arguments (alias) @name.definition.module)
(#match? @ignore "^(defmodule|defprotocol)$")) @definition.module

; * functions/macros
(call
target: (identifier) @ignore
(arguments
[
; zero-arity functions with no parentheses
(identifier) @name.definition.function
; regular function clause
(call target: (identifier) @name.definition.function)
; function clause with a guard clause
(binary_operator
left: (call target: (identifier) @name.definition.function)
operator: "when")
])
(#match? @ignore "^(def|defp|defdelegate|defguard|defguardp|defmacro|defmacrop|defn|defnp)$")) @definition.function

; References

; ignore calls to kernel/special-forms keywords
(call
target: (identifier) @ignore
(#match? @ignore "^(def|defp|defdelegate|defguard|defguardp|defmacro|defmacrop|defn|defnp|defmodule|defprotocol|defimpl|defstruct|defexception|defoverridable|alias|case|cond|else|for|if|import|quote|raise|receive|require|reraise|super|throw|try|unless|unquote|unquote_splicing|use|with)$"))

; ignore module attributes
(unary_operator
operator: "@"
operand: (call
target: (identifier) @ignore))

; * function call
(call
target: [
; local
(identifier) @name.reference.call
; remote
(dot
right: (identifier) @name.reference.call)
]) @reference.call

; * pipe into function call
(binary_operator
operator: "|>"
right: (identifier) @name.reference.call) @reference.call

; * modules
(alias) @name.reference.module @reference.module
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
(value_declaration (function_declaration_left (lower_case_identifier) @name.definition.function)) @definition.function

(function_call_expr (value_expr (value_qid) @name.reference.function)) @reference.function
(exposed_value (lower_case_identifier) @name.reference.function)) @reference.function
(type_annotation ((lower_case_identifier) @name.reference.function) (colon)) @reference.function

(type_declaration ((upper_case_identifier) @name.definition.type) ) @definition.type

(type_ref (upper_case_qid (upper_case_identifier) @name.reference.type)) @reference.type
(exposed_type (upper_case_identifier) @name.reference.type)) @reference.type

(type_declaration (union_variant (upper_case_identifier) @name.definition.union)) @definition.union

(value_expr (upper_case_qid (upper_case_identifier) @name.reference.union)) @reference.union


(module_declaration
(upper_case_qid (upper_case_identifier)) @name.definition.module
) @definition.module
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
(
(comment)* @doc
.
(function_declaration
name: (identifier) @name.definition.function) @definition.function
(#strip! @doc "^//\\s*")
(#set-adjacent! @doc @definition.function)
)

(
(comment)* @doc
.
(method_declaration
name: (field_identifier) @name.definition.method) @definition.method
(#strip! @doc "^//\\s*")
(#set-adjacent! @doc @definition.method)
)

(call_expression
function: [
(identifier) @name.reference.call
(parenthesized_expression (identifier) @name.reference.call)
(selector_expression field: (field_identifier) @name.reference.call)
(parenthesized_expression (selector_expression field: (field_identifier) @name.reference.call))
]) @reference.call

(type_spec
name: (type_identifier) @name.definition.type) @definition.type

(type_identifier) @name.reference.type @reference.type
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
(class_declaration
name: (identifier) @name.definition.class) @definition.class

(method_declaration
name: (identifier) @name.definition.method) @definition.method

(method_invocation
name: (identifier) @name.reference.call
arguments: (argument_list) @reference.call)

(interface_declaration
name: (identifier) @name.definition.interface) @definition.interface

(type_list
(type_identifier) @name.reference.implementation) @reference.implementation

(object_creation_expression
type: (type_identifier) @name.reference.class) @reference.class

(superclass (type_identifier) @name.reference.class) @reference.class
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
(
(comment)* @doc
.
(method_definition
name: (property_identifier) @name.definition.method) @definition.method
(#not-eq? @name.definition.method "constructor")
(#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
(#select-adjacent! @doc @definition.method)
)

(
(comment)* @doc
.
[
(class
name: (_) @name.definition.class)
(class_declaration
name: (_) @name.definition.class)
] @definition.class
(#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
(#select-adjacent! @doc @definition.class)
)

(
(comment)* @doc
.
[
(function
name: (identifier) @name.definition.function)
(function_declaration
name: (identifier) @name.definition.function)
(generator_function
name: (identifier) @name.definition.function)
(generator_function_declaration
name: (identifier) @name.definition.function)
] @definition.function
(#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
(#select-adjacent! @doc @definition.function)
)

(
(comment)* @doc
.
(lexical_declaration
(variable_declarator
name: (identifier) @name.definition.function
value: [(arrow_function) (function)]) @definition.function)
(#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
(#select-adjacent! @doc @definition.function)
)

(
(comment)* @doc
.
(variable_declaration
(variable_declarator
name: (identifier) @name.definition.function
value: [(arrow_function) (function)]) @definition.function)
(#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
(#select-adjacent! @doc @definition.function)
)

(assignment_expression
left: [
(identifier) @name.definition.function
(member_expression
property: (property_identifier) @name.definition.function)
]
right: [(arrow_function) (function)]
) @definition.function

(pair
key: (property_identifier) @name.definition.function
value: [(arrow_function) (function)]) @definition.function

(
(call_expression
function: (identifier) @name.reference.call) @reference.call
(#not-match? @name.reference.call "^(require)$")
)

(call_expression
function: (member_expression
property: (property_identifier) @name.reference.call)
arguments: (_) @reference.call)

(new_expression
constructor: (_) @name.reference.class) @reference.class
Loading

0 comments on commit b17edc7

Please sign in to comment.