Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support named capture groups #112

Open
wants to merge 30 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
8cda4b6
Update .ocamlformat
dyzsr Aug 25, 2022
62b678a
Add field `alias` to Sedlex.node
dyzsr Sep 3, 2022
73e31fa
Update `regexp_of_pattern` to analyze aliases
dyzsr Sep 3, 2022
2144578
Refactor tracking path
dyzsr Sep 5, 2022
d2b29d9
Implement path tracing
dyzsr Sep 18, 2022
5f3dea9
Fix location messages
dyzsr Sep 19, 2022
8f741b2
Fix version number
dyzsr Sep 19, 2022
ad3d0a0
Bug fixes & Add testcases
dyzsr Sep 20, 2022
8e706d9
Optimize traces generation
dyzsr Sep 20, 2022
214f689
Make allow_alias a mandatory argument
dyzsr Sep 21, 2022
dc02370
Add testcases
dyzsr Sep 22, 2022
9081c14
Fix mark and backtrack
dyzsr Sep 22, 2022
fad0f51
Maintain alias starts/stops instead of pos/len
dyzsr Sep 22, 2022
e02fd3c
Fix path tracing & Use expect test
dyzsr Sep 22, 2022
86f42a8
Fix path tracing
dyzsr Sep 23, 2022
53488c6
Fix char set guard
dyzsr Sep 23, 2022
9154025
Optimize trace cases generation
dyzsr Sep 24, 2022
18f54cf
Dup case fixes
dyzsr Sep 24, 2022
18d7062
Merge alias offsets
dyzsr Sep 26, 2022
0e5dbd4
Fix merging actions
dyzsr Sep 26, 2022
72e6f3f
Optimize error message
dyzsr Sep 26, 2022
1a2961a
Update gen_alias & gen_offsets
dyzsr Sep 27, 2022
23bf119
Update actions
dyzsr Sep 27, 2022
e7258ef
Updates
dyzsr Sep 27, 2022
5ae1589
Remove nodes_idx
dyzsr Sep 27, 2022
5b63621
Optimize tracking path
dyzsr Sep 27, 2022
c8224bf
Update tracking path
dyzsr Sep 28, 2022
c63ff90
Fix the use of try with
dyzsr Sep 28, 2022
5f0bad9
Update tests
dyzsr Sep 28, 2022
f54e94d
Fix is_relevant
dyzsr Sep 29, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .ocamlformat
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version=0.19.0
#version=0.19.0
profile = conventional
break-separators = after
space-around-lists = false
Expand Down
12 changes: 8 additions & 4 deletions src/lib/sedlexing.ml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ type lexbuf = {
mutable marked_bol : int;
mutable marked_line : int;
mutable marked_val : int;
mutable marked_path : int list;
mutable filename : string;
mutable finished : bool;
}
Expand All @@ -58,6 +59,7 @@ let empty_lexbuf =
marked_bol = 0;
marked_line = 0;
marked_val = 0;
marked_path = [];
filename = "";
finished = false;
}
Expand Down Expand Up @@ -154,23 +156,24 @@ let __private__next_int lexbuf : int =
Uchar.to_int ret
end

let mark lexbuf i =
let mark lexbuf i path =
lexbuf.marked_pos <- lexbuf.pos;
lexbuf.marked_bol <- lexbuf.curr_bol;
lexbuf.marked_line <- lexbuf.curr_line;
lexbuf.marked_val <- i
lexbuf.marked_val <- i;
lexbuf.marked_path <- path

let start lexbuf =
lexbuf.start_pos <- lexbuf.pos;
lexbuf.start_bol <- lexbuf.curr_bol;
lexbuf.start_line <- lexbuf.curr_line;
mark lexbuf (-1)
mark lexbuf (-1) []

let backtrack lexbuf =
lexbuf.pos <- lexbuf.marked_pos;
lexbuf.curr_bol <- lexbuf.marked_bol;
lexbuf.curr_line <- lexbuf.marked_line;
lexbuf.marked_val
(lexbuf.marked_val, lexbuf.marked_path)

let rollback lexbuf =
lexbuf.pos <- lexbuf.start_pos;
Expand All @@ -189,6 +192,7 @@ let lexeme lexbuf =
Array.sub lexbuf.buf lexbuf.start_pos (lexbuf.pos - lexbuf.start_pos)

let lexeme_char lexbuf pos = lexbuf.buf.(lexbuf.start_pos + pos)
let lexeme_code lexbuf pos = Uchar.to_int lexbuf.buf.(lexbuf.start_pos + pos)

let lexing_positions lexbuf =
let start_p =
Expand Down
16 changes: 10 additions & 6 deletions src/lib/sedlexing.mli
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ val lexeme : lexbuf -> Uchar.t array
the matched string. *)
val lexeme_char : lexbuf -> int -> Uchar.t

(** [Sedlexing.lexeme_code lexbuf pos] returns code point number [pos] in
the matched string. *)
val lexeme_code : lexbuf -> int -> int

(** [Sedlexing.sub_lexeme lexbuf pos len] returns a substring of the string
matched by the regular expression as an array of Unicode code point. *)
val sub_lexeme : lexbuf -> int -> int -> Uchar.t array
Expand Down Expand Up @@ -151,19 +155,19 @@ val next : lexbuf -> Uchar.t option
lexer buffer and increments to current position. If the input stream
is exhausted, the function returns -1.
If a ['\n'] is encountered, the tracked line number is incremented.

This is a private API, it should not be used by code using this module's
API and can be removed at any time. *)
val __private__next_int : lexbuf -> int

(** [mark lexbuf i] stores the integer [i] in the internal
slot. The backtrack position is set to the current position. *)
val mark : lexbuf -> int -> unit
(** [mark lexbuf i path] stores the integer [i] and the list [path] in the
internal slot. The backtrack position is set to the current position. *)
val mark : lexbuf -> int -> int list -> unit

(** [backtrack lexbuf] returns the value stored in the
(** [backtrack lexbuf] returns the value and path stored in the
internal slot of the buffer, and performs backtracking
(the current position is set to the value of the backtrack position). *)
val backtrack : lexbuf -> int
val backtrack : lexbuf -> int * int list

(** [with_tokenizer tokenizer lexbuf] given a lexer and a lexbuf,
returns a generator of tokens annotated with positions.
Expand Down
Loading