Skip to content

Commit

Permalink
Merge pull request #138 from nberth/keep-source-case
Browse files Browse the repository at this point in the history
Preserve case of original source text
  • Loading branch information
nberth authored Jan 22, 2024
2 parents 973be89 + 39e0cc2 commit 0738b47
Show file tree
Hide file tree
Showing 28 changed files with 1,673 additions and 1,629 deletions.
21 changes: 17 additions & 4 deletions src/lsp/cobol_indent/indent_check.ml
Original file line number Diff line number Diff line change
Expand Up @@ -548,12 +548,14 @@ and check_statement (text:text) state ifcheck =

(*TODO: find a better way to distinguish PERFORM(inline) and PERFORM_CLOSED(out-of-line)*)
(*A bug here, TODO.md for details*)
| {payload = TextWord "PERFORM"; loc } :: _ :: {payload = TextWord "TIMES"; _} :: wordlist
| {payload = TextWord "PERFORM"; loc } :: {payload = TextWord ("UNTIL"|"VARYING"|"WITH"|"TEST"); _}
:: wordlist ->
| { payload = TextWord "PERFORM"; loc } :: _ ::
{ payload = TextWord "TIMES"; _} :: wordlist
| { payload = TextWord "PERFORM"; loc } ::
{ payload = TextWord ("UNTIL"|"VARYING"|"WITH"|"TEST"); _} :: wordlist ->
handle_open_scope PERFORM loc wordlist state ifcheck

| {payload = TextWord "ELSE"; loc} :: {payload = TextWord "IF"; _} :: wordlist ->
| { payload = TextWord "ELSE"; loc } ::
{ payload = TextWord "IF"; _ } :: wordlist ->
let context = exp_scope_termination THEN context in
begin match context with
| (THEN, _) :: (IF, _) :: context' ->
Expand Down Expand Up @@ -1228,6 +1230,17 @@ and check_fun = function


let check_indentation (text:text) (state:indent_state) =
let text =
(* Note: the above code assumes upper-cased text words, so that's what we
need to feed it. *)
let open Cobol_common.Srcloc.INFIX in
let open Cobol_preproc.Text.TYPES in
EzList.tail_map begin fun tw -> match ~&tw with
| TextWord w -> TextWord (String.uppercase_ascii w) &@<- tw
| CDirWord w -> CDirWord (String.uppercase_ascii w) &@<- tw
| _ -> tw
end text
in
match state.range with
| None ->
check_fun state.scope text state true
Expand Down
18 changes: 12 additions & 6 deletions src/lsp/cobol_indent/indent_keywords.ml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@
(* *)
(**************************************************************************)

(* Mappings to/from tokens and keyword strings are very redundant with what's
(auto-generated!) in `cobol_parser/text_keywords.ml` (and a little bit of
manual mapping from `combined_tokens` in `cobol_parser/text_tokenizer.ml`.
TODO: use the aforementioned associations instead of redefining them
below. *)

open Indent_type

(*data division *)
Expand Down Expand Up @@ -42,12 +49,11 @@ let keyword_stmt =
"TERMINATE"; "UNLOCK"; "UNSTRING"; "USE"; "VALIDATE"; "WRITE";
(*standard 1985*)
"ALTER"; "DISABLE"; "ENABLE"; "PURGE"; "RECEIVE"; "SEND";]
let keyword_stmt_tbl = Hashtbl.create 16
let () =
List.iter (fun x ->
Hashtbl.add keyword_stmt_tbl x ())
keyword_stmt
let is_statement = Hashtbl.mem keyword_stmt_tbl
let is_statement =
let keyword_stmt_tbl = Hashtbl.create 16 in
List.iter (fun x -> Hashtbl.add keyword_stmt_tbl x ()) keyword_stmt;
fun word ->
Hashtbl.mem keyword_stmt_tbl word


let str_proc_keyword_tbl = Hashtbl.create 16
Expand Down
6 changes: 0 additions & 6 deletions src/lsp/cobol_lsp/lsp_lookup.ml
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,6 @@ open TYPES

(* --- *)

let bare name : Cobol_ptree.qualname = Cobol_ptree.Name name
let qual name : Cobol_ptree.qualname option -> Cobol_ptree.qualname = function
| None -> Cobol_ptree.Name name
| Some qn -> Cobol_ptree.Qual (name, qn)
let simple_name : Cobol_ptree.qualname -> string = function
| Qual (n, _) | Name n -> ~&n
let baseloc_of_qualname: Cobol_ptree.qualname -> srcloc = function
| Name name
| Qual (name, _) -> ~@name
Expand Down
3 changes: 2 additions & 1 deletion src/lsp/cobol_parser/text_categorizer.mll
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ rule token = parse
| (sign? digit* as n) (['.' ','] as sep) (digit+ as d)
{ Numeric (n, Some (sep, d, None)) }

| (sign? digit* as n) (['.' ','] as sep) (digit+ as d) 'E' (exponent as e)
| (sign? digit* as n) (['.' ','] as sep) (digit+ as d) ['E' 'e']
(exponent as e)
{ Numeric (n, Some (sep, d, Some e)) }

| ident as s (* 31 characters max *)
Expand Down
2 changes: 1 addition & 1 deletion src/lsp/cobol_parser/text_lexer.ml
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ let reserve_words lexer : Cobol_config.words_spec -> unit =
(* --- *)

let token_of_keyword { token_of_keyword; _ } s =
match Hashtbl.find token_of_keyword s with
match Hashtbl.find token_of_keyword (String.uppercase_ascii s) with
| { token; enabled = true; reserved = true } -> token
| _ -> raise Not_found

Expand Down
2 changes: 1 addition & 1 deletion src/lsp/cobol_preproc/preproc_state.ml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ let find_phrase first_word ?(prefix = `Same) text : _ result =
Ok { prefix; phrase; suffix }

(** [find_full_phrase words ~search_deep ~try_hard ~prefix text] looks for a
pharse comprised of all words in [words] and termiates with a period in
phrase comprised of all words in [words] that termiates with a period in
[text]. If [prefix = `Rev] and upon success, the prefix is reveresed in the
returned structure.
Expand Down
4 changes: 2 additions & 2 deletions src/lsp/cobol_preproc/src_lexer.mll
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,7 @@ and cdtoken = parse
{ cdtoken lexbuf }

| (nonblank+ as s)
{ CDTok (try Hashtbl.find cdtoken_of_keyword s
{ CDTok (try Hashtbl.find cdtoken_of_keyword (String.uppercase_ascii s)
with Not_found -> TEXT_WORD s) }

| eof
Expand All @@ -599,7 +599,7 @@ and pptoken = parse
| '.' { PPTok PERIOD }

| (([^ '(' ')']+) as s)
{ PPTok (try Hashtbl.find pptoken_of_keyword s
{ PPTok (try Hashtbl.find pptoken_of_keyword (String.uppercase_ascii s)
with Not_found -> TEXT_WORD s) }

| eof
Expand Down
7 changes: 3 additions & 4 deletions src/lsp/cobol_preproc/src_lexing.ml
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,8 @@ let unexpected (type k) (kind: k c)

(* --- *)

(* Switch to upper-cased representation already: *)
let textword s =
TextWord (String.uppercase_ascii ~&s) &@<- s
TextWord ~&s &@<- s

let cdirword ?marker ~start_pos ~end_pos s state =
let s = remove_blanks s in
Expand All @@ -238,7 +237,7 @@ let cdirword ?marker ~start_pos ~end_pos s state =
s, start_pos
in
let loc = raw_loc ~start_pos ~end_pos state in
emit (CDirWord (String.uppercase_ascii s) &@ loc) state
emit (CDirWord s &@ loc) state


let rev_pseudotext: text -> _ state -> pseudotext * _ state = fun text state ->
Expand Down Expand Up @@ -392,7 +391,7 @@ let unclosed_ebcdics =
let symc = "[0-9][0-9, ]*" in (* symbolic EBCDIC *)
let dblq = "\\([^\"]\\|\"" ^ symc ^ "\"\\|\"\"\\)*\"" ^ symc in
let splq = "\\([^']\\|'" ^ symc ^ "\"\\|''\\)*'" ^ symc in
let re = Str.regexp ("^\\(" ^ dblq ^ "\\|" ^ splq ^ "\\)$") in
let re = Str.regexp_case_fold ("^\\(" ^ dblq ^ "\\|" ^ splq ^ "\\)$") in
fun str -> Str.string_match re str 0

let quoted_alphanum ?(fitting = Nominal) ~knd
Expand Down
10 changes: 7 additions & 3 deletions src/lsp/cobol_preproc/text.ml
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,13 @@ include FMT
(* Various predicates on source-flagged character strings *)

let textwordp t = match ~&t with TextWord _ -> true | _ -> false
let textword_eqp ~eq:w t = match ~&t with TextWord t -> t = w | _ -> false
let textword_eqp ~eq:w t = match ~&t with
| TextWord t -> String.uppercase_ascii t = String.uppercase_ascii w
| _ -> false
let cdirp t = match ~&t with CDirWord _ -> true | _ -> false
let cdir_eqp ~eq:w t = match ~&t with CDirWord t -> t = w | _ -> false
let cdir_eqp ~eq:w t = match ~&t with
| CDirWord t -> String.uppercase_ascii t = String.uppercase_ascii w
| _ -> false

(* Manipulating pseudo-words and text *)

Expand All @@ -108,7 +112,7 @@ let split_pseudo_string w =
(PwText t &@ tloc) :: acc, wloc
| Str.Delim d ->
let dloc, wloc = split_loc wloc d in
(PwDelim (d, Str.regexp (Str.quote d)) &@ dloc) :: acc, wloc
(PwDelim (d, Str.regexp_case_fold (Str.quote d)) &@ dloc) :: acc, wloc
end ([], ~@w)
in
List.rev pseudoword_items
Expand Down
2 changes: 2 additions & 0 deletions src/lsp/cobol_preproc/text.mli
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ include module type of Text_types
and type comment = Text_types.comment
and type comments = Text_types.comments

(** [*_eqp] functions below perform (ASCII) case-insensitive comparisons. *)

val textwordp : text_word with_loc -> bool
val textword_eqp : eq:string -> text_word with_loc -> bool

Expand Down
2 changes: 1 addition & 1 deletion src/lsp/cobol_preproc/text_processor.ml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ let partial_word (type k) (req: k partial_word_request) words : (k, _) result =
let partial_subst (k: partial_replacing) ({ payload = pat; _ } as repl_from) =
{ partial_subst_dir = k.repl_dir;
partial_subst_len = String.length pat;
partial_subst_regexp = Str.regexp @@ match k.repl_dir with
partial_subst_regexp = Str.regexp_case_fold @@ match k.repl_dir with
| Leading when k.repl_strict -> "^" ^ Str.quote pat ^ "\\(.+\\)$"
| Leading -> "^" ^ Str.quote pat ^ "\\(.*\\)$"
| Trailing when k.repl_strict -> "^\\(.+\\)" ^ Str.quote pat ^ "$"
Expand Down
Loading

0 comments on commit 0738b47

Please sign in to comment.