From 5225f6ecd33ffa14f1ee49d3a7e1eea99b7cb501 Mon Sep 17 00:00:00 2001 From: Christian Buttner Date: Fri, 9 Aug 2024 19:43:37 +0200 Subject: [PATCH] Add `tree_sitter.c3l` --- libraries/tree_sitter.c3l/README.md | 38 + libraries/tree_sitter.c3l/api.c3i | 1269 +++++++++++++++++++++++ libraries/tree_sitter.c3l/manifest.json | 32 + libraries/tree_sitter.c3l/wrappers.c3 | 129 +++ 4 files changed, 1468 insertions(+) create mode 100644 libraries/tree_sitter.c3l/README.md create mode 100644 libraries/tree_sitter.c3l/api.c3i create mode 100644 libraries/tree_sitter.c3l/manifest.json create mode 100644 libraries/tree_sitter.c3l/wrappers.c3 diff --git a/libraries/tree_sitter.c3l/README.md b/libraries/tree_sitter.c3l/README.md new file mode 100644 index 0000000..f1a0d3a --- /dev/null +++ b/libraries/tree_sitter.c3l/README.md @@ -0,0 +1,38 @@ +# tree_sitter.c3l + +## Usage Example + +This is an example for how use tree-sitter together with the [tree-sitter-c3](https://github.com/c3lang/tree-sitter-c3) grammar. + +Add to your project settings: +```json + "dependencies": [ "tree_sitter", "tree_sitter_c3" ], +``` + +```c +import ts; +import tree_sitter_c3; // tree-sitter-c3 bindings + +fn void! main(String[] args) { + Language* language = tree_sitter_c3::language(); + Parser* parser = parser::new_with_language(language)!; + defer parser::delete(parser); + + String patterns = `(bitstruct_body (bitstruct_member_declaration ":" @delimiter)+ @member)`; + + Query* query = query::new(language, patterns)!; + defer query::delete(query); + + QueryCursor* cursor = ts::query_cursor_new(); + defer ts::query_cursor_delete(cursor); + + ts::query_cursor_exec(cursor, query, root_node); + + for (QueryMatch match; ts::query_cursor_next_match(cursor, &match);) { + QueryCapture! member_capture = match.get_capture("member", query); + QueryCapture! delimiter_capture = match.get_capture("delimiter", query); + // ... + } +} +``` + diff --git a/libraries/tree_sitter.c3l/api.c3i b/libraries/tree_sitter.c3l/api.c3i new file mode 100644 index 0000000..1d078dd --- /dev/null +++ b/libraries/tree_sitter.c3l/api.c3i @@ -0,0 +1,1269 @@ +/** + * Tree-Sitter API + * https://github.com/tree-sitter/tree-sitter/blob/master/lib/include/tree_sitter/api.h + **/ +module ts; + +/*-************************-*/ +/* Section - ABI Versioning */ +/*-************************-*/ + +/** + * The latest ABI version that is supported by the current version of the + * library. When Languages are generated by the Tree-sitter CLI, they are + * assigned an ABI version number that corresponds to the current CLI version. + * The Tree-sitter library is generally backwards-compatible with languages + * generated using older CLI versions, but is not forwards-compatible. + */ +const TREE_SITTER_LANGUAGE_VERSION = 14; + +/** + * The earliest ABI version that is supported by the current version of the + * library. + */ +const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION = 13; + +/*-***************-*/ +/* Section - Types */ +/*-***************-*/ + +distinct StateId = ushort; +distinct Symbol = ushort; +distinct FieldId = ushort; + +distinct Language = void; +distinct Parser = void; +distinct Tree = void; +distinct Query = void; +distinct QueryCursor = void; +distinct LookaheadIterator = void; + +enum InputEncoding : CInt { + UTF8, + UTF16, +} + +enum SymbolType : CInt { + REGULAR, + ANONYMOUS, + AUXILIARY, +} + +struct Point { + uint row; + uint column; +} + +struct Range { + Point start_point; + Point end_point; + uint start_byte; + uint end_byte; +} + +def Read_Fn = fn ZString (void* payload, uint byte_index, Point position, uint* bytes_read); + +struct Input { + void* payload; + Read_Fn read; + InputEncoding encoding; +} + +enum LogType : CInt { + PARSE, + LEX, +} + +def Log_Fn = fn void (void* payload, LogType log_type, ZString buffer); + +struct Logger { + void* payload; + Log_Fn log; +} + +struct InputEdit { + uint start_byte; + uint old_end_byte; + uint new_end_byte; + Point start_point; + Point old_end_point; + Point new_end_point; +} + +struct Node { + uint[4] context; + void* id; + Tree* tree; +} + +struct TreeCursor { + void* tree; + void* id; + uint[2] context; +} + +struct QueryCapture { + Node node; + uint index; +} + +enum Quantifier : CInt { + ZERO, + ZERO_OR_ONE, + ZERO_OR_MORE, + ONE, + ONE_OR_MORE, +} + +struct QueryMatch { + uint id; + ushort pattern_index; + ushort capture_count; + QueryCapture* captures; +} + +enum QueryPredicateStepType : CInt { + DONE, + CAPTURE, + STRING, +} + +struct QueryPredicateStep { + QueryPredicateStepType type; + uint value_id; +} + +enum QueryError : CInt { + NONE, + SYNTAX, + NODE_TYPE, + FIELD, + CAPTURE, + STRUCTURE, + LANGUAGE, +} + +/*-****************-*/ +/* Section - Parser */ +/*-****************-*/ + +/** + * Create a new parser. + */ +fn Parser* parser_new() @extern("ts_parser_new"); + +/** + * Delete the parser, freeing all of the memory that it used. + */ +fn void parser_delete(Parser* self) @extern("ts_parser_delete"); + +/** + * Get the parser's current language. + */ +fn Language* parser_language(Parser* self) @extern("ts_parser_language"); + +/** + * Set the language that the parser should use for parsing. + * + * Returns a boolean indicating whether or not the language was successfully + * assigned. True means assignment succeeded. False means there was a version + * mismatch: the language was generated with an incompatible version of the + * Tree-sitter CLI. Check the language's version using [`ts_language_version`] + * and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and + * [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants. + */ +fn bool parser_set_language(Parser* self, Language* language) @extern("ts_parser_set_language"); + +/** + * Set the ranges of text that the parser should include when parsing. + * + * By default, the parser will always include entire documents. This function + * allows you to parse only a *portion* of a document but still return a syntax + * tree whose ranges match up with the document as a whole. You can also pass + * multiple disjoint ranges. + * + * The second and third parameters specify the location and length of an array + * of ranges. The parser does *not* take ownership of these ranges; it copies + * the data, so it doesn't matter how these ranges are allocated. + * + * If `count` is zero, then the entire document will be parsed. Otherwise, + * the given ranges must be ordered from earliest to latest in the document, + * and they must not overlap. That is, the following must hold for all: + * + * `i < count - 1`: `ranges[i].end_byte <= ranges[i + 1].start_byte` + * + * If this requirement is not satisfied, the operation will fail, the ranges + * will not be assigned, and this function will return `false`. On success, + * this function returns `true` + */ +fn bool parser_set_included_ranges( + Parser* self, + Range* ranges, + uint count +) @extern("ts_parser_set_included_ranges"); + +/** + * Get the ranges of text that the parser will include when parsing. + * + * The returned pointer is owned by the parser. The caller should not free it + * or write to it. The length of the array will be written to the given + * `count` pointer. + */ +fn Range* parser_included_ranges( + Parser* self, + uint* count +) @extern("ts_parser_included_ranges"); + +/** + * Use the parser to parse some source code and create a syntax tree. + * + * If you are parsing this document for the first time, pass `NULL` for the + * `old_tree` parameter. Otherwise, if you have already parsed an earlier + * version of this document and the document has since been edited, pass the + * previous syntax tree so that the unchanged parts of it can be reused. + * This will save time and memory. For this to work correctly, you must have + * already edited the old syntax tree using the [`ts_tree_edit`] function in a + * way that exactly matches the source code changes. + * + * The [`TSInput`] parameter lets you specify how to read the text. It has the + * following three fields: + * 1. [`read`]: A function to retrieve a chunk of text at a given byte offset + * and (row, column) position. The function should return a pointer to the + * text and write its length to the [`bytes_read`] pointer. The parser does + * not take ownership of this buffer; it just borrows it until it has + * finished reading it. The function should write a zero value to the + * [`bytes_read`] pointer to indicate the end of the document. + * 2. [`payload`]: An arbitrary pointer that will be passed to each invocation + * of the [`read`] function. + * 3. [`encoding`]: An indication of how the text is encoded. Either + * `TSInputEncodingUTF8` or `TSInputEncodingUTF16`. + * + * This function returns a syntax tree on success, and `NULL` on failure. There + * are three possible reasons for failure: + * 1. The parser does not have a language assigned. Check for this using the + [`ts_parser_language`] function. + * 2. Parsing was cancelled due to a timeout that was set by an earlier call to + * the [`ts_parser_set_timeout_micros`] function. You can resume parsing from + * where the parser left out by calling [`ts_parser_parse`] again with the + * same arguments. Or you can start parsing from scratch by first calling + * [`ts_parser_reset`]. + * 3. Parsing was cancelled using a cancellation flag that was set by an + * earlier call to [`ts_parser_set_cancellation_flag`]. You can resume parsing + * from where the parser left out by calling [`ts_parser_parse`] again with + * the same arguments. + * + * [`read`]: TSInput::read + * [`payload`]: TSInput::payload + * [`encoding`]: TSInput::encoding + * [`bytes_read`]: TSInput::read + */ +fn Tree* parser_parse( + Parser* self, + Tree* old_tree, + Input input +) @extern("ts_parser_parse"); + +/** + * Use the parser to parse some source code stored in one contiguous buffer. + * The first two parameters are the same as in the [`ts_parser_parse`] function + * above. The second two parameters indicate the location of the buffer and its + * length in bytes. + */ +fn Tree* parser_parse_string( + Parser* self, + Tree* old_tree, + char* string, + uint length +) @extern("ts_parser_parse_string"); + +/** + * Use the parser to parse some source code stored in one contiguous buffer with + * a given encoding. The first four parameters work the same as in the + * [`ts_parser_parse_string`] method above. The final parameter indicates whether + * the text is encoded as UTF8 or UTF16. + */ +fn Tree* parser_parse_string_encoding( + Parser* self, + Tree* old_tree, + char* string, + uint length, + InputEncoding encoding +) @extern("ts_parser_parse_string_encoding"); + +/** + * Instruct the parser to start the next parse from the beginning. + * + * If the parser previously failed because of a timeout or a cancellation, then + * by default, it will resume where it left off on the next call to + * [`ts_parser_parse`] or other parsing functions. If you don't want to resume, + * and instead intend to use this parser to parse some other document, you must + * call [`ts_parser_reset`] first. + */ +fn void parser_reset(Parser* self) @extern("ts_parser_reset"); + +/** + * Set the maximum duration in microseconds that parsing should be allowed to + * take before halting. + * + * If parsing takes longer than this, it will halt early, returning NULL. + * See [`ts_parser_parse`] for more information. + */ +fn void parser_set_timeout_micros(Parser* self, ulong timeout_micros) @extern("ts_parser_set_timeout_micros"); + +/** + * Get the duration in microseconds that parsing is allowed to take. + */ +fn ulong parser_timeout_micros(Parser* self) @extern("ts_parser_timeout_micros"); + +/** + * Set the parser's current cancellation flag pointer. + * + * If a non-null pointer is assigned, then the parser will periodically read + * from this pointer during parsing. If it reads a non-zero value, it will + * halt early, returning NULL. See [`ts_parser_parse`] for more information. + */ +fn void parser_set_cancellation_flag(Parser* self, usz* flag) @extern("ts_parser_set_cancellation_flag"); + +/** + * Get the parser's current cancellation flag pointer. + */ +fn usz* parser_cancellation_flag(Parser* self) @extern("ts_parser_cancellation_flag"); + +/** + * Set the logger that a parser should use during parsing. + * + * The parser does not take ownership over the logger payload. If a logger was + * previously assigned, the caller is responsible for releasing any memory + * owned by the previous logger. + */ +fn void parser_set_logger(Parser* self, Logger logger) @extern("ts_parser_set_logger"); + +/** + * Get the parser's current logger. + */ +fn Logger parser_logger(Parser* self) @extern("ts_parser_logger"); + +/** + * Set the file descriptor to which the parser should write debugging graphs + * during parsing. The graphs are formatted in the DOT language. You may want + * to pipe these graphs directly to a `dot(1)` process in order to generate + * SVG output. You can turn off this logging by passing a negative number. + */ +fn void parser_print_dot_graphs(Parser* self, CInt fd) @extern("ts_parser_print_dot_graphs"); + +/*-**************-*/ +/* Section - Tree */ +/*-**************-*/ + +/** + * Create a shallow copy of the syntax tree. This is very fast. + * + * You need to copy a syntax tree in order to use it on more than one thread at + * a time, as syntax trees are not thread safe. + */ +fn Tree* tree_copy(Tree* self) @extern("ts_tree_copy"); + +/** + * Delete the syntax tree, freeing all of the memory that it used. + */ +fn void tree_delete(Tree* self) @extern("ts_tree_delete"); + +/** + * Get the root node of the syntax tree. + */ +fn Node tree_root_node(Tree* self) @extern("ts_tree_root_node"); + +/** + * Get the root node of the syntax tree, but with its position + * shifted forward by the given offset. + */ +fn Node tree_root_node_with_offset( + Tree* self, + uint offset_bytes, + Point offset_extent +) @extern("ts_tree_root_node_with_offset"); + +/** + * Get the language that was used to parse the syntax tree. + */ +fn Language* tree_language(Tree* self) @extern("ts_tree_language"); + +/** + * Get the array of included ranges that was used to parse the syntax tree. + * + * The returned pointer must be freed by the caller. + */ +fn Range* tree_included_ranges(Tree* self, uint* length) @extern("ts_tree_included_ranges"); + +/** + * Edit the syntax tree to keep it in sync with source code that has been + * edited. + * + * You must describe the edit both in terms of byte offsets and in terms of + * (row, column) coordinates. + */ +fn void tree_edit(Tree* self, InputEdit* edit) @extern("ts_tree_edit"); + +/** + * Compare an old edited syntax tree to a new syntax tree representing the same + * document, returning an array of ranges whose syntactic structure has changed. + * + * For this to work correctly, the old syntax tree must have been edited such + * that its ranges match up to the new tree. Generally, you'll want to call + * this function right after calling one of the [`ts_parser_parse`] functions. + * You need to pass the old tree that was passed to parse, as well as the new + * tree that was returned from that function. + * + * The returned array is allocated using `malloc` and the caller is responsible + * for freeing it using `free`. The length of the array will be written to the + * given `length` pointer. + */ +fn Range* tree_get_changed_ranges( + Tree* old_tree, + Tree* new_tree, + uint* length +) @extern("ts_tree_get_changed_ranges"); + +/** + * Write a DOT graph describing the syntax tree to the given file. + */ +fn void tree_print_dot_graph(Tree* self, CInt file_descriptor) @extern("ts_tree_print_dot_graph"); + +/*-**************-*/ +/* Section - Node */ +/*-**************-*/ + +/** + * Get the node's type as a null-terminated string. + */ +fn ZString node_type(Node self) @extern("ts_node_type"); + +/** + * Get the node's type as a numerical id. + */ +fn Symbol node_symbol(Node self) @extern("ts_node_symbol"); + +/** + * Get the node's language. + */ +fn Language* node_language(Node self) @extern("ts_node_language"); + +/** + * Get the node's type as it appears in the grammar ignoring aliases as a + * null-terminated string. + */ +fn ZString node_grammar_type(Node self) @extern("ts_node_grammar_type"); + +/** + * Get the node's type as a numerical id as it appears in the grammar ignoring + * aliases. This should be used in [`ts_language_next_state`] instead of + * [`ts_node_symbol`]. + */ +fn Symbol node_grammar_symbol(Node self) @extern("ts_node_grammar_symbol"); + +/** + * Get the node's start byte. + */ +fn uint node_start_byte(Node self) @extern("ts_node_start_byte"); + +/** + * Get the node's start position in terms of rows and columns. + */ +fn Point node_start_point(Node self) @extern("ts_node_start_point"); + +/** + * Get the node's end byte. + */ +fn uint node_end_byte(Node self) @extern("ts_node_end_byte"); + +/** + * Get the node's end position in terms of rows and columns. + */ +fn Point node_end_point(Node self) @extern("ts_node_end_point"); + +/** + * Get an S-expression representing the node as a string. + * + * This string is allocated with `malloc` and the caller is responsible for + * freeing it using `free`. + */ +fn ZString node_string(Node self) @extern("ts_node_string"); + +/** + * Check if the node is null. Functions like [`ts_node_child`] and + * [`ts_node_next_sibling`] will return a null node to indicate that no such node + * was found. + */ +fn bool node_is_null(Node self) @extern("ts_node_is_null"); + +/** + * Check if the node is *named*. Named nodes correspond to named rules in the + * grammar, whereas *anonymous* nodes correspond to string literals in the + * grammar. + */ +fn bool node_is_named(Node self) @extern("ts_node_is_named"); + +/** + * Check if the node is *missing*. Missing nodes are inserted by the parser in + * order to recover from certain kinds of syntax errors. + */ +fn bool node_is_missing(Node self) @extern("ts_node_is_missing"); + +/** + * Check if the node is *extra*. Extra nodes represent things like comments, + * which are not required the grammar, but can appear anywhere. + */ +fn bool node_is_extra(Node self) @extern("ts_node_is_extra"); + +/** + * Check if a syntax node has been edited. + */ +fn bool node_has_changes(Node self) @extern("ts_node_has_changes"); + +/** + * Check if the node is a syntax error or contains any syntax errors. + */ +fn bool node_has_error(Node self) @extern("ts_node_has_error"); + +/** + * Check if the node is a syntax error. +*/ +fn bool node_is_error(Node self) @extern("ts_node_is_error"); + +/** + * Get this node's parse state. +*/ +fn StateId node_parse_state(Node self) @extern("ts_node_parse_state"); + +/** + * Get the parse state after this node. +*/ +fn StateId node_next_parse_state(Node self) @extern("ts_node_next_parse_state"); + +/** + * Get the node's immediate parent. + * Prefer [`ts_node_child_containing_descendant`] for + * iterating over the node's ancestors. + */ +fn Node node_parent(Node self) @extern("ts_node_parent"); + +/** + * Get the node's child that contains `descendant`. + */ +fn Node node_child_containing_descendant(Node self, Node descendant) @extern("ts_node_child_containing_descendant"); + +/** + * Get the node's child at the given index, where zero represents the first + * child. + */ +fn Node node_child(Node self, uint child_index) @extern("ts_node_child"); + +/** + * Get the field name for node's child at the given index, where zero represents + * the first child. Returns NULL, if no field is found. + */ +fn ZString node_field_name_for_child(Node self, uint child_index) @extern("ts_node_field_name_for_child"); + +/** + * Get the node's number of children. + */ +fn uint node_child_count(Node self) @extern("ts_node_child_count"); + +/** + * Get the node's *named* child at the given index. + * + * See also [`ts_node_is_named`]. + */ +fn Node node_named_child(Node self, uint child_index) @extern("ts_node_named_child"); + +/** + * Get the node's number of *named* children. + * + * See also [`ts_node_is_named`]. + */ +fn uint node_named_child_count(Node self) @extern("ts_node_named_child_count"); + +/** + * Get the node's child with the given field name. + */ +fn Node node_child_by_field_name( + Node self, + ZString name, + uint name_length +) @extern("ts_node_child_by_field_name"); + +/** + * Get the node's child with the given numerical field id. + * + * You can convert a field name to an id using the + * [`ts_language_field_id_for_name`] function. + */ +fn Node node_child_by_field_id(Node self, FieldId field_id) @extern("ts_node_child_by_field_id"); + +/** + * Get the node's next / previous sibling. + */ +fn Node node_next_sibling(Node self) @extern("ts_node_next_sibling"); +fn Node node_prev_sibling(Node self) @extern("ts_node_prev_sibling"); + +/** + * Get the node's next / previous *named* sibling. + */ +fn Node node_next_named_sibling(Node self) @extern("ts_node_next_named_sibling"); +fn Node node_prev_named_sibling(Node self) @extern("ts_node_prev_named_sibling"); + +/** + * Get the node's first child that extends beyond the given byte offset. + */ +fn Node node_first_child_for_byte(Node self, uint byte) @extern("ts_node_first_child_for_byte"); + +/** + * Get the node's first named child that extends beyond the given byte offset. + */ +fn Node node_first_named_child_for_byte(Node self, uint byte) @extern("ts_node_first_named_child_for_byte"); + +/** + * Get the node's number of descendants, including one for the node itself. + */ +fn uint node_descendant_count(Node self) @extern("ts_node_descendant_count"); + +/** + * Get the smallest node within this node that spans the given range of bytes + * or (row, column) positions. + */ +fn Node node_descendant_for_byte_range(Node self, uint start, uint end) @extern("ts_node_descendant_for_byte_range"); +fn Node node_descendant_for_point_range(Node self, Point start, Point end) @extern("ts_node_descendant_for_point_range"); + +/** + * Get the smallest named node within this node that spans the given range of + * bytes or (row, column) positions. + */ +fn Node node_named_descendant_for_byte_range(Node self, uint start, uint end) @extern("ts_node_named_descendant_for_byte_range"); +fn Node node_named_descendant_for_point_range(Node self, Point start, Point end) @extern("ts_node_named_descendant_for_point_range"); + +/** + * Edit the node to keep it in-sync with source code that has been edited. + * + * This function is only rarely needed. When you edit a syntax tree with the + * [`ts_tree_edit`] function, all of the nodes that you retrieve from the tree + * afterward will already reflect the edit. You only need to use [`ts_node_edit`] + * when you have a [`TSNode`] instance that you want to keep and continue to use + * after an edit. + */ +fn void node_edit(Node self, InputEdit* edit) @extern("ts_node_edit"); + +/** + * Check if two nodes are identical. + */ +fn bool node_eq(Node self, Node other) @extern("ts_node_eq"); + +/*-********************-*/ +/* Section - TreeCursor */ +/*-********************-*/ + +/** + * Create a new tree cursor starting from the given node. + * + * A tree cursor allows you to walk a syntax tree more efficiently than is + * possible using the [`TSNode`] functions. It is a mutable object that is always + * on a certain syntax node, and can be moved imperatively to different nodes. + */ +fn TreeCursor tree_cursor_new(Node node) @extern("ts_tree_cursor_new"); + +/** + * Delete a tree cursor, freeing all of the memory that it used. + */ +fn void tree_cursor_delete(TreeCursor* self) @extern("ts_tree_cursor_delete"); + +/** + * Re-initialize a tree cursor to start at a different node. + */ +fn void tree_cursor_reset(TreeCursor* self, Node node) @extern("ts_tree_cursor_reset"); + +/** + * Re-initialize a tree cursor to the same position as another cursor. + * + * Unlike [`ts_tree_cursor_reset`], this will not lose parent information and + * allows reusing already created cursors. +*/ +fn void tree_cursor_reset_to(TreeCursor* dst, TreeCursor* src) @extern("ts_tree_cursor_reset_to"); + +/** + * Get the tree cursor's current node. + */ +fn Node tree_cursor_current_node(TreeCursor* self) @extern("ts_tree_cursor_current_node"); + +/** + * Get the field name of the tree cursor's current node. + * + * This returns `NULL` if the current node doesn't have a field. + * See also [`ts_node_child_by_field_name`]. + */ +fn ZString tree_cursor_current_field_name(TreeCursor* self) @extern("ts_tree_cursor_current_field_name"); + +/** + * Get the field id of the tree cursor's current node. + * + * This returns zero if the current node doesn't have a field. + * See also [`ts_node_child_by_field_id`], [`ts_language_field_id_for_name`]. + */ +fn FieldId tree_cursor_current_field_id(TreeCursor* self) @extern("ts_tree_cursor_current_field_id"); + +/** + * Move the cursor to the parent of its current node. + * + * This returns `true` if the cursor successfully moved, and returns `false` + * if there was no parent node (the cursor was already on the root node). + */ +fn bool tree_cursor_goto_parent(TreeCursor* self) @extern("ts_tree_cursor_goto_parent"); + +/** + * Move the cursor to the next sibling of its current node. + * + * This returns `true` if the cursor successfully moved, and returns `false` + * if there was no next sibling node. + */ +fn bool tree_cursor_goto_next_sibling(TreeCursor* self) @extern("ts_tree_cursor_goto_next_sibling"); + +/** + * Move the cursor to the previous sibling of its current node. + * + * This returns `true` if the cursor successfully moved, and returns `false` if + * there was no previous sibling node. + * + * Note, that this function may be slower than + * [`ts_tree_cursor_goto_next_sibling`] due to how node positions are stored. In + * the worst case, this will need to iterate through all the children upto the + * previous sibling node to recalculate its position. + */ +fn bool tree_cursor_goto_previous_sibling(TreeCursor* self) @extern("ts_tree_cursor_goto_previous_sibling"); + +/** + * Move the cursor to the first child of its current node. + * + * This returns `true` if the cursor successfully moved, and returns `false` + * if there were no children. + */ +fn bool tree_cursor_goto_first_child(TreeCursor* self) @extern("ts_tree_cursor_goto_first_child"); + +/** + * Move the cursor to the last child of its current node. + * + * This returns `true` if the cursor successfully moved, and returns `false` if + * there were no children. + * + * Note that this function may be slower than [`ts_tree_cursor_goto_first_child`] + * because it needs to iterate through all the children to compute the child's + * position. + */ +fn bool tree_cursor_goto_last_child(TreeCursor* self) @extern("ts_tree_cursor_goto_last_child"); + +/** + * Move the cursor to the node that is the nth descendant of + * the original node that the cursor was constructed with, where + * zero represents the original node itself. + */ +fn void tree_cursor_goto_descendant(TreeCursor* self, uint goal_descendant_index) @extern("ts_tree_cursor_goto_descendant"); + +/** + * Get the index of the cursor's current node out of all of the + * descendants of the original node that the cursor was constructed with. + */ +fn uint tree_cursor_current_descendant_index(TreeCursor* self) @extern("ts_tree_cursor_current_descendant_index"); + +/** + * Get the depth of the cursor's current node relative to the original + * node that the cursor was constructed with. + */ +fn uint tree_cursor_current_depth(TreeCursor* self) @extern("ts_tree_cursor_current_depth"); + +/** + * Move the cursor to the first child of its current node that extends beyond + * the given byte offset or point. + * + * This returns the index of the child node if one was found, and returns -1 + * if no such child was found. + */ +fn long tree_cursor_goto_first_child_for_byte(TreeCursor* self, uint goal_byte) @extern("ts_tree_cursor_goto_first_child_for_byte"); +fn long tree_cursor_goto_first_child_for_point(TreeCursor* self, Point goal_point) @extern("ts_tree_cursor_goto_first_child_for_point"); + +fn TreeCursor tree_cursor_copy(TreeCursor* cursor) @extern("ts_tree_cursor_copy"); + +/*-***************-*/ +/* Section - Query */ +/*-***************-*/ + +/** + * Create a new query from a string containing one or more S-expression + * patterns. The query is associated with a particular language, and can + * only be run on syntax nodes parsed with that language. + * + * If all of the given patterns are valid, this returns a [`TSQuery`]. + * If a pattern is invalid, this returns `NULL`, and provides two pieces + * of information about the problem: + * 1. The byte offset of the error is written to the `error_offset` parameter. + * 2. The type of error is written to the `error_type` parameter. + */ +fn Query* query_new( + Language* language, + char* source, + uint source_len, + uint* error_offset, + QueryError* error_type +) @extern("ts_query_new"); + +/** + * Delete a query, freeing all of the memory that it used. + */ +fn void query_delete(Query* self) @extern("ts_query_delete"); + +/** + * Get the number of patterns, captures, or string literals in the query. + */ +fn uint query_pattern_count(Query* self) @extern("ts_query_pattern_count"); +fn uint query_capture_count(Query* self) @extern("ts_query_capture_count"); +fn uint query_string_count(Query* self) @extern("ts_query_string_count"); + +/** + * Get the byte offset where the given pattern starts in the query's source. + * + * This can be useful when combining queries by concatenating their source + * code strings. + */ +fn uint query_start_byte_for_pattern(Query *self, uint pattern_index) @extern("ts_query_start_byte_for_pattern"); + +/** + * Get the byte offset where the given pattern ends in the query's source. + * + * This can be useful when combining queries by concatenating their source + * code strings. + */ +fn uint query_end_byte_for_pattern(Query *self, uint pattern_index) @extern("ts_query_end_byte_for_pattern"); + +/** + * Get all of the predicates for the given pattern in the query. + * + * The predicates are represented as a single array of steps. There are three + * types of steps in this array, which correspond to the three legal values for + * the `type` field: + * - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names + * of captures. Their `value_id` can be used with the + * [`ts_query_capture_name_for_id`] function to obtain the name of the capture. + * - `TSQueryPredicateStepTypeString` - Steps with this type represent literal + * strings. Their `value_id` can be used with the + * [`ts_query_string_value_for_id`] function to obtain their string value. + * - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels* + * that represent the end of an individual predicate. If a pattern has two + * predicates, then there will be two steps with this `type` in the array. + */ +fn QueryPredicateStep* query_predicates_for_pattern( + Query* self, + uint pattern_index, + uint* step_count +) @extern("ts_query_predicates_for_pattern"); + +/* + * Check if the given pattern in the query has a single root node. + */ +fn bool query_is_pattern_rooted(Query* self, uint pattern_index) @extern("ts_query_is_pattern_rooted"); + +/* + * Check if the given pattern in the query is 'non local'. + * + * A non-local pattern has multiple root nodes and can match within a + * repeating sequence of nodes, as specified by the grammar. Non-local + * patterns disable certain optimizations that would otherwise be possible + * when executing a query on a specific range of a syntax tree. + */ +fn bool query_is_pattern_non_local(Query* self, uint pattern_index) @extern("ts_query_is_pattern_non_local"); + +/* + * Check if a given pattern is guaranteed to match once a given step is reached. + * The step is specified by its byte offset in the query's source code. + */ +fn bool query_is_pattern_guaranteed_at_step(Query* self, uint byte_offset) @extern("ts_query_is_pattern_guaranteed_at_step"); + +/** + * Get the name and length of one of the query's captures, or one of the + * query's string literals. Each capture and string is associated with a + * numeric id based on the order that it appeared in the query's source. + */ +fn ZString query_capture_name_for_id( + Query* self, + uint index, + uint* length +) @extern("ts_query_capture_name_for_id"); + +/** + * Get the quantifier of the query's captures. Each capture is * associated + * with a numeric id based on the order that it appeared in the query's source. + */ +fn Quantifier query_capture_quantifier_for_id( + Query* self, + uint pattern_index, + uint capture_index +) @extern("ts_query_capture_quantifier_for_id"); + +fn ZString query_string_value_for_id( + Query* self, + uint index, + uint* length +) @extern("ts_query_string_value_for_id"); + +/** + * Disable a certain capture within a query. + * + * This prevents the capture from being returned in matches, and also avoids + * any resource usage associated with recording the capture. Currently, there + * is no way to undo this. + */ +fn void query_disable_capture(Query* self, ZString name, uint length) @extern("ts_query_disable_capture"); + +/** + * Disable a certain pattern within a query. + * + * This prevents the pattern from matching and removes most of the overhead + * associated with the pattern. Currently, there is no way to undo this. + */ +fn void query_disable_pattern(Query* self, uint pattern_index) @extern("ts_query_disable_pattern"); + +/** + * Create a new cursor for executing a given query. + * + * The cursor stores the state that is needed to iteratively search + * for matches. To use the query cursor, first call [`ts_query_cursor_exec`] + * to start running a given query on a given syntax node. Then, there are + * two options for consuming the results of the query: + * 1. Repeatedly call [`ts_query_cursor_next_match`] to iterate over all of the + * *matches* in the order that they were found. Each match contains the + * index of the pattern that matched, and an array of captures. Because + * multiple patterns can match the same set of nodes, one match may contain + * captures that appear *before* some of the captures from a previous match. + * 2. Repeatedly call [`ts_query_cursor_next_capture`] to iterate over all of the + * individual *captures* in the order that they appear. This is useful if + * don't care about which pattern matched, and just want a single ordered + * sequence of captures. + * + * If you don't care about consuming all of the results, you can stop calling + * [`ts_query_cursor_next_match`] or [`ts_query_cursor_next_capture`] at any point. + * You can then start executing another query on another node by calling + * [`ts_query_cursor_exec`] again. + */ +fn QueryCursor* query_cursor_new() @extern("ts_query_cursor_new"); + +/** + * Delete a query cursor, freeing all of the memory that it used. + */ +fn void query_cursor_delete(QueryCursor* self) @extern("ts_query_cursor_delete"); + +/** + * Start running a given query on a given node. + */ +fn void query_cursor_exec(QueryCursor* self, Query* query, Node node) @extern("ts_query_cursor_exec"); + +/** + * Manage the maximum number of in-progress matches allowed by this query + * cursor. + * + * Query cursors have an optional maximum capacity for storing lists of + * in-progress captures. If this capacity is exceeded, then the + * earliest-starting match will silently be dropped to make room for further + * matches. This maximum capacity is optional — by default, query cursors allow + * any number of pending matches, dynamically allocating new space for them as + * needed as the query is executed. + */ +fn bool query_cursor_did_exceed_match_limit(QueryCursor* self) @extern("ts_query_cursor_did_exceed_match_limit"); +fn uint query_cursor_match_limit(QueryCursor* self) @extern("ts_query_cursor_match_limit"); +fn void query_cursor_set_match_limit(QueryCursor* self, uint limit) @extern("ts_query_cursor_set_match_limit"); + +/** + * Set the range of bytes or (row, column) positions in which the query + * will be executed. + */ +fn void query_cursor_set_byte_range(QueryCursor* self, uint start_byte, uint end_byte) @extern("ts_query_cursor_set_byte_range"); +fn void query_cursor_set_point_range(QueryCursor* self, Point start_point, Point end_point) @extern("ts_query_cursor_set_point_range"); + +/** + * Advance to the next match of the currently running query. + * + * If there is a match, write it to `*match` and return `true`. + * Otherwise, return `false`. + */ +fn bool query_cursor_next_match(QueryCursor* self, QueryMatch* match) @extern("ts_query_cursor_next_match"); +fn void query_cursor_remove_match(QueryCursor* self, uint match_id) @extern("ts_query_cursor_remove_match"); + +/** + * Advance to the next capture of the currently running query. + * + * If there is a capture, write its match to `*match` and its index within + * the matche's capture list to `*capture_index`. Otherwise, return `false`. + */ +fn bool query_cursor_next_capture( + QueryCursor* self, + QueryMatch* match, + uint* capture_index +) @extern("ts_query_cursor_next_capture"); + +/** + * Set the maximum start depth for a query cursor. + * + * This prevents cursors from exploring children nodes at a certain depth. + * Note if a pattern includes many children, then they will still be checked. + * + * The zero max start depth value can be used as a special behavior and + * it helps to destructure a subtree by staying on a node and using captures + * for interested parts. Note that the zero max start depth only limit a search + * depth for a pattern's root node but other nodes that are parts of the pattern + * may be searched at any depth what defined by the pattern structure. + * + * Set to `UINT32_MAX` to remove the maximum start depth. + */ +fn void query_cursor_set_max_start_depth(QueryCursor* self, uint max_start_depth) @extern("ts_query_cursor_set_max_start_depth"); + +/*-******************-*/ +/* Section - Language */ +/*-******************-*/ + +/** + * Get another reference to the given language. + */ +fn Language* language_copy(Language* self) @extern("ts_language_copy"); + +/** + * Free any dynamically-allocated resources for this language, if + * this is the last reference. + */ +fn void language_delete(Language* self) @extern("ts_language_delete"); + +/** + * Get the number of distinct node types in the language. + */ +fn uint language_symbol_count(Language* self) @extern("ts_language_symbol_count"); + +/** + * Get the number of valid states in this language. +*/ +fn uint language_state_count(Language* self) @extern("ts_language_state_count"); + +/** + * Get a node type string for the given numerical id. + */ +fn ZString language_symbol_name(Language* self, Symbol symbol) @extern("ts_language_symbol_name"); + +/** + * Get the numerical id for the given node type string. + */ +fn Symbol language_symbol_for_name( + Language* self, + ZString string, + uint length, + bool is_named +) @extern("ts_language_symbol_for_name"); + +/** + * Get the number of distinct field names in the language. + */ +fn uint language_field_count(Language* self) @extern("ts_language_field_count"); + +/** + * Get the field name string for the given numerical id. + */ +fn ZString language_field_name_for_id(Language* self, FieldId id) @extern("ts_language_field_name_for_id"); + +/** + * Get the numerical id for the given field name string. + */ +fn FieldId language_field_id_for_name(Language* self, char* name, uint name_length) @extern("ts_language_field_id_for_name"); + +/** + * Check whether the given node type id belongs to named nodes, anonymous nodes, + * or a hidden nodes. + * + * See also [`ts_node_is_named`]. Hidden nodes are never returned from the API. + */ +fn SymbolType language_symbol_type(Language* self, Symbol symbol) @extern("ts_language_symbol_type"); + +/** + * Get the ABI version number for this language. This version number is used + * to ensure that languages were generated by a compatible version of + * Tree-sitter. + * + * See also [`ts_parser_set_language`]. + */ +fn uint language_version(Language* self) @extern("ts_language_version"); + +/** + * Get the next parse state. Combine this with lookahead iterators to generate + * completion suggestions or valid symbols in error nodes. Use + * [`ts_node_grammar_symbol`] for valid symbols. +*/ +fn StateId language_next_state(Language* self, StateId state, Symbol symbol) @extern("ts_language_next_state"); + +/*-****************************-*/ +/* Section - Lookahead Iterator */ +/*-****************************-*/ + +/** + * Create a new lookahead iterator for the given language and parse state. + * + * This returns `NULL` if state is invalid for the language. + * + * Repeatedly using [`ts_lookahead_iterator_next`] and + * [`ts_lookahead_iterator_current_symbol`] will generate valid symbols in the + * given parse state. Newly created lookahead iterators will contain the `ERROR` + * symbol. + * + * Lookahead iterators can be useful to generate suggestions and improve syntax + * error diagnostics. To get symbols valid in an ERROR node, use the lookahead + * iterator on its first leaf node state. For `MISSING` nodes, a lookahead + * iterator created on the previous non-extra leaf node may be appropriate. +*/ +fn LookaheadIterator* lookahead_iterator_new(Language* self, StateId state) @extern("ts_lookahead_iterator_new"); + +/** + * Delete a lookahead iterator freeing all the memory used. +*/ +fn void lookahead_iterator_delete(LookaheadIterator* self) @extern("ts_lookahead_iterator_delete"); + +/** + * Reset the lookahead iterator to another state. + * + * This returns `true` if the iterator was reset to the given state and `false` + * otherwise. +*/ +fn bool lookahead_iterator_reset_state(LookaheadIterator* self, StateId state) @extern("ts_lookahead_iterator_reset_state"); + +/** + * Reset the lookahead iterator. + * + * This returns `true` if the language was set successfully and `false` + * otherwise. +*/ +fn bool lookahead_iterator_reset(LookaheadIterator* self, Language* language, StateId state) @extern("ts_lookahead_iterator_reset"); + +/** + * Get the current language of the lookahead iterator. +*/ +fn Language* lookahead_iterator_language(LookaheadIterator* self) @extern("ts_lookahead_iterator_language"); + +/** + * Advance the lookahead iterator to the next symbol. + * + * This returns `true` if there is a new symbol and `false` otherwise. +*/ +fn bool lookahead_iterator_next(LookaheadIterator* self) @extern("ts_lookahead_iterator_next"); + +/** + * Get the current symbol of the lookahead iterator; +*/ +fn Symbol lookahead_iterator_current_symbol(LookaheadIterator* self) @extern("ts_lookahead_iterator_current_symbol"); + +/** + * Get the current symbol type of the lookahead iterator as a null terminated + * string. +*/ +fn ZString lookahead_iterator_current_symbol_name(LookaheadIterator* self) @extern("ts_lookahead_iterator_current_symbol_name"); + +/*-*********************************-*/ +/* Section - WebAssembly Integration */ +/*-********************************-*/ + +distinct WasmEngine = void; +distinct WasmStore = void; + +enum WasmErrorKind : CInt { + NONE, + PARSE, + COMPILE, + INSTANTIATE, + ALLOCATE, +} + +struct WasmError { + WasmErrorKind kind; + ZString message; +} + +/** + * Create a Wasm store. + */ +fn WasmStore* wasm_store_new( + WasmEngine* engine, + WasmError* error +) @extern("ts_wasm_store_new"); + +/** + * Free the memory associated with the given Wasm store. + */ +fn void wasm_store_delete(WasmStore*) @extern("ts_wasm_store_delete"); + +/** + * Create a language from a buffer of Wasm. The resulting language behaves + * like any other Tree-sitter language, except that in order to use it with + * a parser, that parser must have a Wasm store. Note that the language + * can be used with any Wasm store, it doesn't need to be the same store that + * was used to originally load it. + */ +fn Language* wasm_store_load_language( + WasmStore*, + ZString name, + ZString wasm, + uint wasm_len, + WasmError* error +) @extern("ts_wasm_store_load_language"); + +/** + * Get the number of languages instantiated in the given wasm store. + */ +fn usz wasm_store_language_count(WasmStore*) @extern("ts_wasm_store_language_count"); + +/** + * Check if the language came from a Wasm module. If so, then in order to use + * this language with a Parser*, that parser must have a Wasm store assigned. + */ +fn bool language_is_wasm(Language*) @extern("ts_language_is_wasm"); + +/** + * Assign the given Wasm store to the parser. A parser must have a Wasm store + * in order to use Wasm languages. + */ +fn void parser_set_wasm_store(Parser*, WasmStore*) @extern("ts_parser_set_wasm_store"); + +/** + * Remove the parser's current Wasm store and return it. This returns NULL if + * the parser doesn't have a Wasm store. + */ +fn WasmStore* parser_take_wasm_store(Parser*) @extern("ts_parser_take_wasm_store"); + +/*-******************************-*/ +/* Section - Global Configuration */ +/*-******************************-*/ + +/** + * Set the allocation functions used by the library. + * + * By default, Tree-sitter uses the standard libc allocation functions, + * but aborts the process when an allocation fails. This function lets + * you supply alternative allocation functions at runtime. + * + * If you pass `NULL` for any parameter, Tree-sitter will switch back to + * its default implementation of that function. + * + * If you call this function after the library has already been used, then + * you must ensure that either: + * 1. All the existing objects have been freed. + * 2. The new allocator shares its state with the old one, so it is capable + * of freeing memory that was allocated by the old allocator. + */ + +def Malloc_Fn = fn void* (usz); +def Calloc_Fn = fn void* (usz, usz); +def Realloc_Fn = fn void* (usz); +def Free_Fn = fn void (void*); + +fn void set_allocator( + Malloc_Fn new_malloc, + Calloc_Fn new_calloc, + Realloc_Fn new_realloc, + Free_Fn new_free +) @extern("ts_set_allocator"); diff --git a/libraries/tree_sitter.c3l/manifest.json b/libraries/tree_sitter.c3l/manifest.json new file mode 100644 index 0000000..e3a8deb --- /dev/null +++ b/libraries/tree_sitter.c3l/manifest.json @@ -0,0 +1,32 @@ +{ + "provides" : "tree_sitter", + "targets" : { + "linux-aarch64" : { + "linked-libraries" : ["tree-sitter"] + }, + "linux-riscv32" : { + "linked-libraries" : ["tree-sitter"] + }, + "linux-riscv64" : { + "linked-libraries" : ["tree-sitter"] + }, + "linux-x86" : { + "linked-libraries" : ["tree-sitter"] + }, + "linux-x64" : { + "linked-libraries" : ["tree-sitter"] + }, + "macos-aarch64" : { + "linked-libraries" : ["tree-sitter"] + }, + "macos-x64" : { + "linked-libraries" : ["tree-sitter"] + }, + "netbsd-x64" : { + "linked-libraries" : ["tree-sitter"] + }, + "openbsd-x64" : { + "linked-libraries" : ["tree-sitter"] + }, + }, +} diff --git a/libraries/tree_sitter.c3l/wrappers.c3 b/libraries/tree_sitter.c3l/wrappers.c3 new file mode 100644 index 0000000..e09f479 --- /dev/null +++ b/libraries/tree_sitter.c3l/wrappers.c3 @@ -0,0 +1,129 @@ +//////////////////////////////// +// Parser +// ----------------------------- + +module ts::parser; + +fault ParserFault { + INCOMPATIBLE_LANGUAGE_VERSION, +} + +fn Parser*! new_with_language(Language* language) { + Parser* parser = ts::parser_new(); + return ts::parser_set_language(parser, language) ? parser : ParserFault.INCOMPATIBLE_LANGUAGE_VERSION?; +} + +fn void delete(Parser* parser) { + ts::parser_delete(parser); +} + +fn Tree* parse_string(Parser* parser, Tree* old_tree, String string) { + return ts::parser_parse_string(parser, old_tree, string.ptr, string.len); +} + +//////////////////////////////// +// Node +// ----------------------------- + +module ts::node; + +fn bool Node.is_null(self) @inline => ts::node_is_null(self); +fn usz Node.start_byte(self) @inline => ts::node_start_byte(self); +fn usz Node.end_byte(self) @inline => ts::node_end_byte(self); +fn String Node.get_text(self, char[] data) @inline => (String)data[self.start_byte() .. self.end_byte()-1]; + +//////////////////////////////// +// Query +// ----------------------------- + +module ts::query; + +fault QueryFault { + SYNTAX, + NODE_TYPE, + FIELD, + CAPTURE, + STRUCTURE, + LANGUAGE, +} + +fn QueryFault fault_from_error(QueryError err) @private { + switch (err) { + case SYNTAX: return QueryFault.SYNTAX; + case NODE_TYPE: return QueryFault.NODE_TYPE; + case FIELD: return QueryFault.FIELD; + case CAPTURE: return QueryFault.CAPTURE; + case STRUCTURE: return QueryFault.STRUCTURE; + case LANGUAGE: return QueryFault.LANGUAGE; + default: unreachable(); + } +} + +/** + * Create a new query. + * @param language "Tree-sitter language to use." + * @param [in] query "The query string containing one or more S-expression patterns." + * @param [out] error_offset "Offset of error in query." + **/ +fn Query*! new(Language* language, String query, uint* error_offset = null) { + QueryError error_type; + Query* result = ts::query_new(language, query.ptr, query.len, error_offset, &error_type); + if (!result) { + return fault_from_error(error_type)?; + } + return result; +} + +/** + * Delete a query, freeing all of the memory that it used. + */ +fn void delete(Query* query) { + ts::query_delete(query); +} + +// Query Match +// ----------------------------- + +/** + * Try to get a QueryCapture from a QueryMatch by name. + **/ +fn QueryCapture! QueryMatch.get_capture(&match, String name, Query* query) { + for (uint i = 0; i < match.capture_count; i++) { + QueryCapture capture = match.captures[i]; + + String capture_name = {| + uint length; + ZString str = ts::query_capture_name_for_id(query, capture.index, &length); + return (String)str[:length]; + |}; + + if (name == capture_name) { + return capture; + } + } + + return SearchResult.MISSING?; +} + +// Query Capture +// ----------------------------- + +fn usz QueryCapture.start_byte(&self) @inline => self.node.start_byte(); +fn usz QueryCapture.end_byte(&self) @inline => self.node.end_byte(); +fn String QueryCapture.get_text(&self, char[] data) @inline => self.node.get_text(data); + +//////////////////////////////// +// Query Cursor +// ----------------------------- + +module ts::query_cursor; + +fn QueryCursor* new() => ts::query_cursor_new(); +fn void delete(QueryCursor* cursor) => ts::query_cursor_delete(cursor); + +macro void @exec(QueryCursor* cursor, Query* query, Node node; @body(QueryMatch* match)) { + ts::query_cursor_exec(cursor, query, node); + for (QueryMatch match; ts::query_cursor_next_match(cursor, &match);) { + @body(&match); + } +}