From d967b65193eba20c569a003cd5c35299b9e79ffd Mon Sep 17 00:00:00 2001 From: David Declerck Date: Wed, 2 Oct 2024 10:31:15 +0200 Subject: [PATCH] Merge SVN 5024 --- cobc/ChangeLog | 30 ++++++++ cobc/cobc.c | 15 ++-- cobc/codegen.c | 68 ++++++++---------- cobc/parser.y | 111 +++++++++++++++------------- cobc/pplex.l | 188 ++++++++++++++++++++++++++++++------------------ cobc/reserved.c | 2 - cobc/scanner.l | 113 +++++++++++++++-------------- cobc/tree.h | 1 - 8 files changed, 309 insertions(+), 219 deletions(-) diff --git a/cobc/ChangeLog b/cobc/ChangeLog index c6100d62f..68dbb33b9 100644 --- a/cobc/ChangeLog +++ b/cobc/ChangeLog @@ -36,6 +36,28 @@ * typeck.c (cb_emit_sort_init): generate call to cob_file_sort_options * help.c (cobc_print_usage_dialect): extended -fregister help +2023-04-17 Simon Sobisch + + * scanner.l: dropped last_token_is_dot and integer_is_label by checking + last_token instead + * scanner.l, parser.y, reserved.c, tree.h: dropped cobc_force_literal by + introducing second_last_token and checking the tokens instead + * scanner.l: early consume spaces after comma and semicolon + * scanner.l: simplify check for redundant periods by splitting its rule + * cobc.c (file_basename): directly use filename, + only handle backslash on win32 + * codegen.c: minor refactoring to drop global string_buffer + * codegen.c: generate COB_EXT_IMPORT and COB_EXT_EXPORT instead of extern + * codegen.c (codegen): minor refactoring + * parser.y (setup_registers): extracted + * parser.y: minor refactoring + +2023-04-14 Simon Sobisch + + * pplex.l (ppopen_get_file): extracted from ppopen and minor refactor + * pplex.l (ppecho): disabled performance-dropping fflush until found + necessary + 2023-03-29 Simon Sobisch * typeck.c (count_pic_edited): renamed from count_pic_alphanumeric_edited @@ -78,6 +100,8 @@ * typeck.c (cb_build_cond_fields): optimize comparison between field and ZEROES up to COB_ZEROES_ALPHABETIC_BYTE_LENGTH + * pplex.l (ppopen): fixes for auto-detection of reference-format FR #45 + handling tabs, dos eol and empty lines correctly 2023-02-28 Simon Sobisch @@ -207,6 +231,7 @@ 2023-01-28 Fabrice Le Fessant + FR #45: auto-detection of reference-format * cobc.c (main): initialize cb_config_text_column to 72 to avoid a race condition in config files. * pplex.l (ppopen): try to autodetect the format of a file after @@ -229,6 +254,10 @@ * cobc.c, flag.def, cobc.h: handle external table for "ebcdic-table" * codegen.c: generate code using the external table +2023-01-20 Ron Norman + + * codeoptim.c: fix cob_check_subscript_inline for min subscript value + 2023-01-20 Simon Sobisch fixed bug #704: ANY LENGTH cannot have ref-mod, POS not context-sensitive @@ -1269,6 +1298,7 @@ 2022-01-25 Nicolas Berthier + FR #137: relax syntax for partial replace * pplex.l, ppparse.y, config.h: support COPY and REPLACE statements with partial REPLACING operands specified using literals diff --git a/cobc/cobc.c b/cobc/cobc.c index 3c01e299b..2757e70ac 100644 --- a/cobc/cobc.c +++ b/cobc/cobc.c @@ -2761,7 +2761,6 @@ file_stripext (char *buff) static char * file_basename (const char *filename, const char *strip_ext) { - const char *p; const char *startp; const char *endp; size_t len; @@ -2775,19 +2774,23 @@ file_basename (const char *filename, const char *strip_ext) /* LCOV_EXCL_STOP */ /* Remove directory name */ - startp = NULL; - for (p = filename; *p; p++) { - if (*p == '/' || *p == '\\') { - startp = p; + startp = strrchr (filename, '/'); +#if defined(_WIN32) || defined(__CYGWIN__) + { + const char *slash = strrchr (filename, '\\'); + if (slash + && (!startp || startp < slash)) { + startp = slash; } } +#endif if (startp) { startp++; } else { startp = filename; } - /* Remove extension */ + /* Remove extension (= after last '.') */ if (!strip_ext || strcmp (strip_ext, COB_BASENAME_KEEP_EXT)) { endp = strrchr (filename, '.'); } else { diff --git a/cobc/codegen.c b/cobc/codegen.c index abacefac3..56b82edb4 100644 --- a/cobc/codegen.c +++ b/cobc/codegen.c @@ -153,7 +153,6 @@ static struct base_list *globext_cache = NULL; static struct base_list *local_base_cache = NULL; static struct string_list *string_cache = NULL; static struct string_list *source_cache = NULL; -static char *string_buffer = NULL; static struct label_list *label_cache = NULL; static struct ml_tree_list *ml_tree_cache = NULL; @@ -581,7 +580,6 @@ clear_local_codegen_vars (void) local_base_cache = NULL; local_field_cache = NULL; static_call_cache = NULL; - string_buffer = NULL; string_cache = NULL; ml_tree_cache = NULL; @@ -13395,6 +13393,7 @@ output_internal_function (struct cb_program *prog, cb_tree parameter_list) for (l = prog->file_list; l; l = CB_CHAIN (l)) { f = CB_FILE (CB_VALUE (l))->record; if (f->flag_external) { + char string_buffer[COB_MINI_BUFF]; strcpy (string_buffer, f->name); for (p = string_buffer; *p; p++) { if (*p == '-' || *p == ' ') { @@ -13891,9 +13890,9 @@ static void output_entry_function (struct cb_program *prog, cb_tree entry, cb_tree parameter_list, const int gencode) { - const char *entry_name; - cb_tree using_list; - cb_tree l; + const char *entry_name = CB_LABEL (CB_PURPOSE (entry))->name; + cb_tree using_list = CB_VALUE (CB_VALUE (entry)); + cb_tree l = CB_PURPOSE (CB_VALUE (entry)); /* entry convention */ cb_tree l1; cb_tree l2; struct cb_field *f; @@ -13909,11 +13908,6 @@ output_entry_function (struct cb_program *prog, cb_tree entry, int sticky_nonp[MAX_CALL_FIELD_PARAMS] = { 0 }; int entry_convention = 0; - entry_name = CB_LABEL (CB_PURPOSE (entry))->name; - using_list = CB_VALUE (CB_VALUE (entry)); - - /* entry convention */ - l = CB_PURPOSE (CB_VALUE (entry)); /* LCOV_EXCL_START */ if (!l || !(CB_INTEGER_P (l) || CB_NUMERIC_LITERAL_P (l))) { /* not translated as it is a highly unlikely internal abort */ @@ -13921,6 +13915,7 @@ output_entry_function (struct cb_program *prog, cb_tree entry, COBC_ABORT (); } /* LCOV_EXCL_STOP */ + if (CB_INTEGER_P (l)) { entry_convention = CB_INTEGER (l)->val; } else if (CB_NUMERIC_LITERAL_P (l)) { @@ -13930,18 +13925,16 @@ output_entry_function (struct cb_program *prog, cb_tree entry, if (gencode) { output_line ("/* ENTRY '%s' */", entry_name); output_newline (); -#if (defined(_WIN32) || defined(__CYGWIN__)) && !defined(__clang__) } else { if (!prog->nested_level) { - output ("__declspec(dllexport) "); + output ("COB_EXT_EXPORT "); } -#endif } /* Output return type. */ - if ((prog->nested_level && !prog->flag_void) - || (prog->flag_main && !prog->flag_recursive - && !strcmp(prog->program_id, entry_name))) { + if ( (prog->nested_level && !prog->flag_void) + || (prog->flag_main && !prog->flag_recursive + && !strcmp (prog->program_id, entry_name))) { output ("static "); } if (prog->flag_void) { @@ -14271,7 +14264,7 @@ output_function_prototypes (struct cb_program *prog) may use the same but not-default function */ if (strcmp (prog->extfh, extfh_value) != 0 && strcmp ("EXTFH", extfh_value) != 0) { - output_line ("extern int %s (unsigned char *opcode, FCD3 *fcd);", + output_line ("COB_EXT_IMPORT int %s (unsigned char *opcode, FCD3 *fcd);", extfh_value); } } @@ -14292,7 +14285,7 @@ output_function_prototypes (struct cb_program *prog) /* prototype for general EXTFH function */ if (prog->file_list && prog->extfh && strcmp ("EXTFH", prog->extfh) != 0) { - output ("extern int %s (unsigned char *opcode, FCD3 *fcd);", prog->extfh); + output ("COB_EXT_IMPORT int %s (unsigned char *opcode, FCD3 *fcd);", prog->extfh); output_newline (); } @@ -14382,31 +14375,30 @@ emit_base_symbols (struct cb_program *prog) void codegen (struct cb_program *prog, const char *translate_name) { + const int set_xref = cb_listing_xref; int subsequent_call = 0; codegen_init (prog, translate_name); + /* Temporarily disable cross-reference during C generation */ + cb_listing_xref = 0; + for (;;) { - /* Temporarily disable cross-reference during C generation */ - if (cb_listing_xref) { - cb_listing_xref = 0; - codegen_internal (current_program, subsequent_call); - cb_listing_xref = 1; - } else { - codegen_internal (current_program, subsequent_call); - } + codegen_internal (current_program, subsequent_call); if (!current_program->next_program) { break; } - if (current_program->flag_file_global && current_program->next_program->nested_level) { + subsequent_call = 1; + if (current_program->flag_file_global + && current_program->next_program->nested_level) { has_global_file = 1; } else { has_global_file = 0; } current_program = current_program->next_program; - subsequent_call = 1; } current_program = prog; + cb_listing_xref = set_xref; codegen_finalize (); } @@ -14414,6 +14406,8 @@ codegen (struct cb_program *prog, const char *translate_name) void codegen_init (struct cb_program *prog, const char *translate_name) { + char timestamp_buffer[COB_MINI_BUFF]; + current_program = prog; current_section = NULL; current_paragraph = NULL; @@ -14422,6 +14416,7 @@ codegen_init (struct cb_program *prog, const char *translate_name) output_line_number = 1; output_name = (char*)translate_name; + /* escape output name for C string */ if (strchr (output_name, '\\')) { char buff[COB_MEDIUM_BUFF]; int pos = 0; @@ -14456,32 +14451,27 @@ codegen_init (struct cb_program *prog, const char *translate_name) } } - if (!string_buffer) { - string_buffer = cobc_main_malloc ((size_t)COB_MINI_BUFF); - } - - strftime (string_buffer, (size_t)COB_MINI_MAX, + strftime (timestamp_buffer, (size_t)COB_MINI_MAX, "%b %d %Y %H:%M:%S", ¤t_compile_tm); output_target = yyout; - output_header (string_buffer, NULL); + output_header (timestamp_buffer, NULL); output_target = cb_storage_file; - output_header (string_buffer, NULL); + output_header (timestamp_buffer, NULL); { - struct cb_program* cp; + struct cb_program *cp; for (cp = prog; cp; cp = cp->next_program) { if (cp->flag_prototype) { continue; } output_target = cp->local_include->local_fp; - output_header (string_buffer, cp); + output_header (timestamp_buffer, cp); } } output_target = yyout; output_standard_includes (prog); - /* string_buffer has formatted date from above */ - output_gnucobol_defines (string_buffer); + output_gnucobol_defines (timestamp_buffer); output_newline (); #if defined(HAVE_SIGACTION) && !defined(_WIN32) diff --git a/cobc/parser.y b/cobc/parser.y index 4502eab1a..d09de33a5 100644 --- a/cobc/parser.y +++ b/cobc/parser.y @@ -152,7 +152,6 @@ unsigned int cobc_in_procedure = 0; unsigned int cobc_in_data_division = 0; unsigned int cobc_in_usage = 0; unsigned int cobc_in_repository = 0; -unsigned int cobc_force_literal = 0; unsigned int cobc_cs_check = 0; unsigned int cobc_allow_program_name = 0; unsigned int cobc_in_xml_generate_body = 0; @@ -469,8 +468,8 @@ begin_implicit_statement (void) # if 0 /* activate only for debugging purposes for attribs FIXME: Replace by DEBUG_LOG function */ -static -void print_bits (cob_flags_t num) +static void +print_bits (cob_flags_t num) { unsigned int size = sizeof (cob_flags_t); unsigned int max_pow = 1 << (size * 8 - 1); @@ -487,16 +486,16 @@ void print_bits (cob_flags_t num) /* functions for storing current position and assigning it to a cb_tree after its parsing is finished */ -static COB_INLINE -void backup_current_pos (void) +static COB_INLINE void +backup_current_pos (void) { backup_source_file = cb_source_file; backup_source_line = cb_source_line; } #if 0 /* currently not used */ -static COB_INLINE -void set_pos_from_backup (cb_tree x) +static COB_INLINE void +set_pos_from_backup (cb_tree x) { x->source_file = backup_source_file; x->source_line = backup_source_line; @@ -538,7 +537,9 @@ emit_entry (const char *name, const int encode, cb_tree using_list, cb_tree conv for (l = current_program->entry_list; l; l = CB_CHAIN (l)) { struct cb_label *check = CB_LABEL (CB_PURPOSE (l)); if (strcmp (name, check->name) == 0) { - cb_error_x (CB_TREE (current_statement), + cb_error_x (current_statement + ? CB_TREE (current_statement) + : CB_TREE (current_program), _("ENTRY '%s' duplicated"), name); } } @@ -551,7 +552,7 @@ emit_entry (const char *name, const int encode, cb_tree using_list, cb_tree conv current_program->entry_list = cb_list_append (current_program->entry_list, - CB_BUILD_PAIR (label, CB_BUILD_PAIR(entry_conv, using_list))); + CB_BUILD_PAIR (label, CB_BUILD_PAIR (entry_conv, using_list))); } /* Main entry point and the number of its main parameters */ @@ -1131,7 +1132,6 @@ clear_initial_values (void) cobc_in_procedure = 0; cobc_in_data_division = 0; cobc_in_repository = 0; - cobc_force_literal = 0; cobc_in_xml_generate_body = 0; cobc_in_json_generate_body = 0; non_const_word = 0; @@ -1232,7 +1232,8 @@ end_scope_of_program_name (struct cb_program *program, const unsigned char type) /* Remove any subprograms */ l = CB_LIST (defined_prog_list); while (l) { - if (CB_PROGRAM (l->value)->nested_level > program->nested_level) { + const struct cb_program *lprog = CB_PROGRAM (l->value); + if (lprog->nested_level > program->nested_level) { remove_program_name (l, prev); } else { prev = l; @@ -1248,11 +1249,11 @@ end_scope_of_program_name (struct cb_program *program, const unsigned char type) if (!program->flag_common) { l = CB_LIST (defined_prog_list); while (l) { + const struct cb_program *lprog = CB_PROGRAM (l->value); /* The nested_level check is for the pathological case where two nested programs have the same name */ - if (0 == strcmp (program->orig_program_id, - CB_PROGRAM (l->value)->orig_program_id) - && program->nested_level == CB_PROGRAM (l->value)->nested_level) { + if (program->nested_level == lprog->nested_level + && !strcmp (program->orig_program_id, lprog->orig_program_id)) { remove_program_name (l, prev); if (prev && prev->chain != NULL) { l = CB_LIST (prev->chain); @@ -1272,6 +1273,17 @@ end_scope_of_program_name (struct cb_program *program, const unsigned char type) } } +static void +setup_registers (void) +{ + backup_source_file = cb_source_file; + cb_source_file = "register-definition"; + cb_set_intr_when_compiled (); + cb_build_registers (); + cb_add_external_defined_registers (); + cb_source_file = backup_source_file; +} + static void setup_program_start (void) { @@ -1289,29 +1301,31 @@ setup_program_start (void) } static int -setup_program (cb_tree id, cb_tree as_literal, const unsigned char type, const int prototype) +setup_program (cb_tree id, cb_tree as_literal, const enum cob_module_type type, const int prototype) { const char *external_name = NULL; setup_program_start (); - /* finish last program/function */ - if (!first_prog) { + if (first_prog) { + /* in this case we had setup an "empty" current_program + along with registers before (on start) and now only "add" + to that */ + first_prog = 0; + } else { + /* finish last program/function */ if (!current_program->flag_validated) { current_program->flag_validated = 1; cb_validate_program_body (current_program); } + /* setup new */ clear_initial_values (); current_program = cb_build_program (current_program, depth); if (depth) { build_words_for_nested_programs(); } - cb_set_intr_when_compiled (); - cb_build_registers (); - cb_add_external_defined_registers (); - } else { - first_prog = 0; + setup_registers (); } /* set internal name */ @@ -1320,6 +1334,7 @@ setup_program (cb_tree id, cb_tree as_literal, const unsigned char type, const i } else { current_program->program_name = CB_NAME (id); } + stack_progid[depth] = current_program->program_name; current_program->prog_type = type; current_program->flag_prototype = prototype; @@ -3462,17 +3477,14 @@ set_record_size (cb_tree min, cb_tree max) start: { - clear_initial_values (); defined_prog_list = NULL; cobc_cs_check = 0; main_flag_set = 0; + clear_initial_values (); current_program = cb_build_program (NULL, 0); - backup_source_file = cb_source_file; - cb_set_intr_when_compiled (); - cb_build_registers (); - cb_add_external_defined_registers (); + setup_registers (); } compilation_group { @@ -4327,7 +4339,7 @@ repository_name: | PROGRAM WORD _as_literal { if ($2 != cb_error_node - && cb_verify (cb_program_prototypes, _("PROGRAM phrase"))) { + && cb_verify (cb_program_prototypes, _("PROGRAM phrase"))) { setup_prototype ($2, $3, COB_MODULE_TYPE_PROGRAM, 0); } } @@ -11007,13 +11019,13 @@ procedure_param_list: procedure_param: _procedure_type _size_optional _procedure_optional WORD _acu_size { - cb_tree x; - struct cb_field *f; - - x = cb_build_identifier ($4, 0); - if ($3 == cb_int1 && CB_VALID_TREE (x) && cb_ref (x) != cb_error_node) { - f = CB_FIELD (cb_ref (x)); - f->flag_is_pdiv_opt = 1; + cb_tree x = cb_build_identifier ($4, 0); + if ($3 == cb_int1 + && CB_VALID_TREE (x)) { + cb_tree fx = cb_ref (x); + if (fx != cb_error_node) { + CB_FIELD (fx)->flag_is_pdiv_opt = 1; + } } $$ = CB_BUILD_PAIR (cb_int (call_mode), x); @@ -11464,10 +11476,8 @@ statement_list: statements: { - cb_tree label; - if (!current_section) { - label = cb_build_reference ("MAIN SECTION"); + cb_tree label = cb_build_reference ("MAIN SECTION"); current_section = CB_LABEL (cb_build_label (label, NULL)); current_section->flag_section = 1; current_section->flag_dummy_section = 1; @@ -11477,12 +11487,12 @@ statements: emit_statement (CB_TREE (current_section)); } if (!current_paragraph) { - label = cb_build_reference ("MAIN PARAGRAPH"); + cb_tree label = cb_build_reference ("MAIN PARAGRAPH"); current_paragraph = CB_LABEL (cb_build_label (label, NULL)); - CB_TREE (current_paragraph)->source_file - = CB_TREE (current_section)->source_file; - CB_TREE (current_paragraph)->source_line - = CB_TREE (current_section)->source_line; + current_paragraph->common.source_file + = current_section->common.source_file; + current_paragraph->common.source_line + = current_section->common.source_line; current_paragraph->flag_declaratives = !!in_declaratives; current_paragraph->flag_skip_label = !!skip_statements; current_paragraph->flag_dummy_paragraph = 1; @@ -20091,8 +20101,9 @@ scope_terminator: _dot: TOK_DOT | { - if (! cb_verify (cb_missing_period, _("optional period"))) + if (! cb_verify (cb_missing_period, _("optional period"))) { YYERROR; + } } ; @@ -20100,8 +20111,9 @@ _dot_or_else_end_of_file_control: TOK_DOT | _file_control_end_delimiter { - if (! cb_verify (cb_missing_period, _("optional period"))) + if (! cb_verify (cb_missing_period, _("optional period"))) { YYERROR; + } cobc_repeat_last_token = 1; } ; @@ -20120,8 +20132,9 @@ _dot_or_else_end_of_file_description: | level_number_in_area_a | _file_description_end_delimiter { - if (! cb_verify (cb_missing_period, _("optional period"))) + if (! cb_verify (cb_missing_period, _("optional period"))) { YYERROR; + } cobc_repeat_last_token = 1; } ; @@ -20140,8 +20153,9 @@ _dot_or_else_end_of_record_description: | level_number_in_area_a | _record_description_end_delimiter { - if (! cb_verify (cb_missing_period, _("optional period"))) + if (! cb_verify (cb_missing_period, _("optional period"))) { YYERROR; + } cobc_repeat_last_token = 1; } ; @@ -20160,8 +20174,9 @@ _dot_or_else_area_a: /* in PROCEDURE DIVISION */ TOK_DOT | TOKEN_EOF { - if (! cb_verify (cb_missing_period, _("optional period"))) + if (! cb_verify (cb_missing_period, _("optional period"))) { YYERROR; + } } | WORD_IN_AREA_A { diff --git a/cobc/pplex.l b/cobc/pplex.l index 73c46128d..beef152ae 100644 --- a/cobc/pplex.l +++ b/cobc/pplex.l @@ -1183,120 +1183,165 @@ is_fixed_indicator (char c){ } } -/* open file (source or coypbook) for further processing */ -int -ppopen (const char *name, struct cb_replace_list *replacing_list) +/* open file with the specified 'name', then check for BOM (skipped) and if + in reference-format "auto" also for "likely free-format" */ +static FILE * +ppopen_get_file (const char *name) { struct copy_info *current_copy_info; - char *dname; - cb_tree x = NULL; - - if (ppin) { - for (; newline_count > 0; newline_count--) { - ungetc ('\n', ppin); - } - } - /* Open copy/source file, or use stdin */ + /* special case stdin; + note that we cannot handle BOM or deduce source format in this case + as rewind() clears the input buffer if used on stdin (and output in + console has normally no BOM at all), therefore compile from stdin + _has to_ specify free format if needed (or, more reasonable, use a CDF + directive to specify that) */ if (strcmp (name, COB_DASH) == 0) { - ppin = stdin; - } else { - for (current_copy_info = copy_stack; current_copy_info; current_copy_info = current_copy_info->next) { - /* FIXME: for WIN32 compare with cleaning / and \ (COPY "lib/file" vs COPY "lib\file"), - * ideally open first, then check if we have thy physical same file - * (could also fix symlinked files) */ - if (!strcmp (name, current_copy_info->dname)) { - struct cb_tree_common xc; - x = &xc; - for (current_copy_info = current_copy_info->next; current_copy_info; current_copy_info = current_copy_info->prev) { - int line; - if (current_copy_info->prev) { - line = current_copy_info->prev->line; - } else { - line = cb_source_line; - } - cb_inclusion_note (current_copy_info->dname, line); + return stdin; + } + + /* check for recursive inclusion */ + for (current_copy_info = copy_stack; current_copy_info; current_copy_info = current_copy_info->next) { + /* FIXME: for WIN32 compare with cleaning / and \ (COPY "lib/file" vs COPY "lib\file"), + * ideally open first, then check if we have the same physical file + * (would also fix recursion check for symlinked files) */ + if (!strcmp (name, current_copy_info->dname)) { + struct cb_tree_common loc; + for (current_copy_info = current_copy_info->next; current_copy_info; current_copy_info = current_copy_info->prev) { + int line; + if (current_copy_info->prev) { + line = current_copy_info->prev->line; + } else { + line = cb_source_line; } - x->source_file = name; - x->source_line = -1; - cb_error_x (x, _("recursive inclusion")); - break; + cb_inclusion_note (current_copy_info->dname, line); } - + loc.source_file = name; + loc.source_line = -1; + cb_error_x (&loc, _("recursive inclusion")); + return 0; } - if (!x) { + } + + /* try to open the file with the given name */ #ifdef __OS400__ - ppin = fopen (name, "r"); + ppin = fopen (name, "r"); #else - ppin = fopen (name, "rb"); + ppin = fopen (name, "rb"); #endif - } else { - ppin = 0; - } - } - - if (!ppin && !x) { + if (!ppin) { cb_error ("%s: %s", name, cb_get_strerror ()); /* Note: postpone error exit as we need the saved buffers later on */ + return 0; } - /* Check for BOM - *not* for input from stdin as rewind() clears the input - buffer if used on stdin and output in console has normally no BOM at all */ - if (ppin && strcmp (name, COB_DASH) != 0) { + /* Check for BOM and, if source-format was not specified, also for free-form */ + { int fseek_to = 0 ; #define COBC_LOOKAHEAD 20 unsigned char buffer[COBC_LOOKAHEAD]; int nread = fread (buffer, 1, COBC_LOOKAHEAD, ppin); - int pos = 0; + + /* check for and skip UTF-8 BOM */ if (nread >= 3 && buffer[0] == 0xEF && buffer[1] == 0xBB && buffer[2] == 0xBF) { fseek_to = 3; - pos = 3; } - if (source_format == CB_FORMAT_AUTO){ - /* If indicator is wrong on first line, switch to free format */ + + /* try to deduce source format */ + if (source_format == CB_FORMAT_AUTO) { + int pos = fseek_to; + /* if indicator is wrong on first line with source, switch to free format */ /* skip empty lines */ + char last_pos_7 = ' '; int amount_of_0a_seen = 0; - while (nread-pos > 7 && (buffer[pos] == '\r' || buffer[pos] == '\n')){ - if (buffer[pos] == '\n') amount_of_0a_seen++; + int line_pos = 0; + while (nread - pos > 7) { + switch (buffer[pos]) { + case '\r': + break; + case '\n': + amount_of_0a_seen++; + line_pos = 0; + break; + case '\t': + buffer[pos] = ' '; + line_pos++; + while (line_pos % cb_tab_width != 0) { + line_pos++; + } + break; + default: + line_pos++; + break; + } + if (line_pos >= 7) { + last_pos_7 = buffer[pos]; + break; + } pos++; } /* check tab or indicator */ - if ( nread-pos > 7 && buffer[pos] != '\t' && !is_fixed_indicator (buffer[pos+6]) ){ + if (!is_fixed_indicator (last_pos_7)) { struct cb_tree_common loc; loc.source_file = name; loc.source_line = 1 + amount_of_0a_seen; loc.source_column = 7; cb_note_x (COB_WARNOPT_NONE, &loc, _("free format detected")); - (void) cobc_deciph_source_format ("FREE"); + (void)cobc_deciph_source_format ("FREE"); } } - fseek(ppin, fseek_to, SEEK_SET); + fseek (ppin, fseek_to, SEEK_SET); + } + + return ppin; +} + +/* open file (source or coypbook) for further processing */ +int +ppopen (const char *name, struct cb_replace_list *replacing_list) +{ + struct copy_info *current_copy_info; + char *dname; + + if (ppin) { + for (; newline_count > 0; newline_count--) { + ungetc ('\n', ppin); + } } - if (source_format==CB_FORMAT_AUTO) + /* open copy/source file, or use stdin */ + ppin = ppopen_get_file (name); + + /* note: detection of free format in ppopen_get_file above (not for stdin) */ + if (source_format == CB_FORMAT_AUTO) { cobc_set_source_format (CB_FORMAT_FIXED); + } - if (cb_current_file){ - if (cb_current_file->source_format==CB_FORMAT_AUTO) - cb_current_file->source_format=cobc_get_source_format (); - /* This is delayed until after format detection */ + /* store listing information if requested */ + if (cb_current_file) { + if (cb_current_file->source_format == CB_FORMAT_AUTO) { + cb_current_file->source_format = cobc_get_source_format (); + } + /* this must be delayed until after format detection */ cobc_set_listing_header_code (); /* Save name for listing */ - if (!cb_current_file->name) + if (!cb_current_file->name) { cb_current_file->name = cobc_strdup (name); + } + cb_current_file->copy_line = cb_source_line; } - /* Add to dependency list */ + /* add opened file to dependency list */ if (cb_depend_file) { cb_depend_list = pp_text_list_add (cb_depend_list, name, strlen (name)); } - /* Preserve the current buffer */ + /* preserve the current buffer */ current_copy_info = cobc_malloc (sizeof (struct copy_info)); current_copy_info->file = cb_source_file; current_copy_info->buffer = YY_CURRENT_BUFFER; - /* Save variables */ + /* save variables */ current_copy_info->replacing = current_replace_list; current_copy_info->line = cb_source_line; current_copy_info->quotation_mark = quotation_mark; @@ -1309,11 +1354,7 @@ ppopen (const char *name, struct cb_replace_list *replacing_list) } copy_stack = current_copy_info; - if (cb_current_file) { - cb_current_file->copy_line = cb_source_line; - } - - /* Set replacing list */ + /* set replacing list */ if (replacing_list) { if (current_replace_list) { replacing_list->last->next = current_replace_list; @@ -1339,11 +1380,13 @@ ppopen (const char *name, struct cb_replace_list *replacing_list) } #endif - /* Switch to new buffer */ + /* switch to new buffer */ switch_to_buffer (1, dname, yy_create_buffer (ppin, YY_BUF_SIZE)); /* postponed errror handling */ - if (!ppin) return -1; + if (!ppin) { + return -1; + } return 0; } @@ -2610,8 +2653,11 @@ ppecho (const char *text, const cob_u32_t alt_space, const int textlen) struct cb_text_list *save_ptr_text_queue; int status, save_status; +#if 0 /* Simon: disabled until found necessary, as this takes together with frwite + a big part of the parsing phase of cobc, increasing the IO cost by numbers */ /* ensure nothing is in the stream buffer */ fflush (ppout); +#endif /* Check for replacement text before outputting */ if (alt_space) { diff --git a/cobc/reserved.c b/cobc/reserved.c index f55e89d9a..1e126aaff 100644 --- a/cobc/reserved.c +++ b/cobc/reserved.c @@ -4953,14 +4953,12 @@ lookup_reserved_word (const char *name) if (p->token == FUNCTION_ID) { cobc_cs_check = 0; - cobc_force_literal = 1; } else if (p->token == INTRINSIC) { if (!cobc_in_repository) { return NULL; } } else if (p->token == PROGRAM_ID) { cobc_cs_check = CB_CS_PROGRAM_ID; - cobc_force_literal = 1; } else if (p->token == REPOSITORY) { cobc_in_repository = 1; } diff --git a/cobc/scanner.l b/cobc/scanner.l index dcbf5348b..0b2af1cb3 100644 --- a/cobc/scanner.l +++ b/cobc/scanner.l @@ -115,6 +115,7 @@ static int yywrap (void) { #define RETURN_TOK(expr) \ do { \ last_yylval = yylval; \ + second_last_token = last_token; \ last_token = (expr); \ return last_token; \ } ONCE_COB @@ -156,8 +157,12 @@ enum cb_sym_ebcdic_state { }; /* Local variables */ -static cb_tree last_yylval; -static int last_token; + +/* local history */ +static cb_tree last_yylval; /* for repeating the last token*/ +static enum yytokentype last_token; +static enum yytokentype second_last_token; /* for history purposes */ + static struct cb_level_78 *top_78_ptr = NULL; static struct cb_level_78 *const_78_ptr = NULL; static struct cb_level_78 *lev_78_ptr = NULL; @@ -168,8 +173,6 @@ static char *pic_buff2 = NULL; static size_t plex_size; static size_t pic1_size; static size_t pic2_size; -static unsigned int last_token_is_dot = 0; -static unsigned int integer_is_label = 0; static unsigned int inside_bracket = 0; static char err_msg[COB_MINI_BUFF]; @@ -215,14 +218,6 @@ AREA_A "#AREA_A"\n return last_token; } - /* We treat integer literals immediately after '.' as labels; - that is, they must be level numbers or section names. */ - if (last_token_is_dot) { - integer_is_label = 1; - last_token_is_dot = 0; - } else { - integer_is_label = 0; - } cobc_in_area_a = 0; %} @@ -414,7 +409,6 @@ AREA_A "#AREA_A"\n [''""] { /* String literal */ - cobc_force_literal = 0; read_literal (yytext[0], CB_LITERAL_DEFAULT); RETURN_TOK (LITERAL); } @@ -422,13 +416,11 @@ AREA_A "#AREA_A"\n X"\'"[^''\n]*"\'" | X"\""[^""\n]*"\"" { /* X string literal */ - cobc_force_literal = 0; RETURN_TOK (scan_x (yytext + 2, "X")); } N[''""] { - /* N national string literal */ - cobc_force_literal = 0; + /* N national string (UTF16) literal */ /* TODO: national string - needs different handling */ read_literal (yytext [1], CB_LITERAL_N); RETURN_TOK (LITERAL); @@ -437,7 +429,6 @@ N[''""] { NC[''""] { /* NC national character string literal (extension, but same handling as COBOL 2002 national string literal) */ - cobc_force_literal = 0; /* TODO: national string - needs different handling */ read_literal (yytext [2], CB_LITERAL_NC); RETURN_TOK (LITERAL); @@ -446,13 +437,11 @@ NC[''""] { NX"\'"[^''\n]*"\'" | NX"\""[^""\n]*"\"" { /* NX string literal */ - cobc_force_literal = 0; RETURN_TOK (scan_x (yytext + 3, "NX")); } U[''""] { - /* N national string literal */ - cobc_force_literal = 0; + /* UTF8 string literal */ /* TODO: utf8 string - needs different handling */ read_literal (yytext [1], CB_LITERAL_U); RETURN_TOK (LITERAL); @@ -461,42 +450,36 @@ U[''""] { UX"\'"[^''\n]*"\'" | UX"\""[^""\n]*"\"" { /* UX string literal */ - cobc_force_literal = 0; RETURN_TOK (scan_x (yytext + 3, "UX")); } Z"\'"[^''\n]*"\'" | Z"\""[^""\n]*"\"" { /* Z string literal */ - cobc_force_literal = 0; RETURN_TOK (scan_z (yytext + 2, "Z")); } L"\'"[^''\n]*"\'" | L"\""[^""\n]*"\"" { /* L string literal */ - cobc_force_literal = 0; RETURN_TOK (scan_z (yytext + 2, "L")); } H"\'"[^''\n]*"\'" | H"\""[^""\n]*"\"" { /* H hexadecimal/numeric literal */ - cobc_force_literal = 0; RETURN_TOK (scan_h (yytext + 2, "H")); } B"\'"[^''\n]*"\'" | B"\""[^""\n]*"\"" { /* B boolean/numeric literal */ - cobc_force_literal = 0; RETURN_TOK (scan_b (yytext + 2, "B")); } BX"\'"[^''\n]*"\'" | BX"\""[^""\n]*"\"" { /* BX boolean hexadecimal string literal */ - cobc_force_literal = 0; RETURN_TOK (scan_x (yytext + 3, "BX")); } @@ -507,19 +490,16 @@ B#[0-9]+ { they are processed. */ /* ACUCOBOL binary numeric literal */ - cobc_force_literal = 0; RETURN_TOK (scan_b (yytext + 2, "B#")); } O#[0-9]+ { /* ACUCOBOL octal numeric literal */ - cobc_force_literal = 0; RETURN_TOK (scan_o (yytext + 2, "O#")); } %[0-9]+ { /* HP-COBOL octal numeric literal */ - cobc_force_literal = 0; RETURN_TOK (scan_o (yytext + 1, "%")); } @@ -528,7 +508,6 @@ H#[0-9A-Za-z]+ { /* ACUCOBOL hexadecimal numeric literal */ char type[3] = "x#"; type[0] = yytext [0]; - cobc_force_literal = 0; RETURN_TOK (scan_h (yytext + 2, type)); } @@ -538,7 +517,7 @@ H#[0-9A-Za-z]+ { } \) { - if (inside_bracket > 0) { + if (inside_bracket) { inside_bracket--; } RETURN_TOK (TOK_CLOSE_PAREN); @@ -547,8 +526,9 @@ H#[0-9A-Za-z]+ { [0-9][0-9]? { int value; - cobc_force_literal = 0; - if (integer_is_label || cobc_in_area_a) { + /* We treat integer literals immediately after '.' as labels; + that is, they must be level numbers or section names. */ + if (last_token == TOK_DOT || cobc_in_area_a) { yylval = cb_build_reference (yytext); if (!cobc_in_procedure) { @@ -590,8 +570,9 @@ H#[0-9A-Za-z]+ { [0-9]+ { - cobc_force_literal = 0; - if (integer_is_label || cobc_in_area_a) { + /* We treat integer literals immediately after '.' as labels; + that is, they must be level numbers or section names. */ + if (last_token == TOK_DOT || cobc_in_area_a) { /* Integer label */ yylval = cb_build_reference (yytext); if (cobc_in_area_a) { @@ -618,7 +599,7 @@ H#[0-9A-Za-z]+ { /* Ignore */ } -<*>;+ { +<*>;+[ ]* { if (inside_bracket) { RETURN_TOK (SEMI_COLON); } @@ -640,7 +621,7 @@ H#[0-9A-Za-z]+ { RETURN_TOK (scan_numeric (yytext)); } -,+ { +,+[ ]* { if (inside_bracket) { RETURN_TOK (COMMA_DELIM); } @@ -666,6 +647,13 @@ H#[0-9A-Za-z]+ { unput (','); } +,[ ]+ { + if (inside_bracket) { + RETURN_TOK (COMMA_DELIM); + } + /* Ignore */ +} + , { if (inside_bracket) { RETURN_TOK (COMMA_DELIM); @@ -674,13 +662,11 @@ H#[0-9A-Za-z]+ { } "END"[ ,;\n]+"PROGRAM"/[ .,;\n] { - cobc_force_literal = 1; count_lines (yytext); RETURN_TOK (END_PROGRAM); } "END"[ ,;\n]+"FUNCTION"/[ .,;\n] { - cobc_force_literal = 1; count_lines (yytext); RETURN_TOK (END_FUNCTION); } @@ -1015,7 +1001,6 @@ H#[0-9A-Za-z]+ { struct cb_text_list *tlp; cb_tree x; cb_tree l; - struct cb_program *program; cb_check_word_length ((unsigned int)yyleng, yytext); @@ -1051,9 +1036,11 @@ H#[0-9A-Za-z]+ { } /* Bail early for (END) PROGRAM-ID when not a literal */ - if (cobc_force_literal) { + if ((second_last_token == PROGRAM_ID && last_token == TOK_DOT) + || (second_last_token == FUNCTION_ID && last_token == TOK_DOT) + || last_token == END_PROGRAM + || last_token == END_FUNCTION) { /* Force PROGRAM-ID / END PROGRAM */ - cobc_force_literal = 0; if (cb_fold_call) { yylval = cb_build_reference (yytext); RETURN_TOK (PROGRAM_NAME); @@ -1110,7 +1097,7 @@ H#[0-9A-Za-z]+ { /* Check user programs */ if (cobc_in_id) { - program = cb_find_defined_program_by_name (yytext); + const struct cb_program *program = cb_find_defined_program_by_name (yytext); if (program) { yylval = cb_build_reference (yytext); RETURN_TOK (PROGRAM_NAME); @@ -1214,16 +1201,41 @@ H#[0-9A-Za-z]+ { RETURN_TOK (EXPONENTIATION); } -"."([ \n]*".")* { - if (last_token_is_dot || strlen (yytext) > 1) { - cb_warning (COBC_WARN_FILLER, _("ignoring redundant .")); +"."([ \n]*".")+ { + /* note: according to the COBOL standard that is wrong as + a dot is only TOK_DOT if it is followed by [ \n] or EOF + CHECKME: switching to this would make parsing of decimals + easier - how do other compilers handle this? */ + /* note: we explicit consume multiple . here to get the right position */ + cb_warning (COBC_WARN_FILLER, _("ignoring redundant .")); + +#if 0 /* it seems there is no case because of the consume above where + the last token would be a dot */ + if (last_token != TOK_DOT) { + yylval = NULL; + RETURN_TOK (TOK_DOT); } +#else + yylval = NULL; + RETURN_TOK (TOK_DOT); +#endif +} - if (!last_token_is_dot) { - last_token_is_dot = 1; +"." { + /* note: according to the COBOL standard that is wrong as + a dot is only TOK_DOT if it is followed by [ \n] or EOF + CHECKME: switching to this would make parsing of decimals + easier - how do other compilers handle this? */ +#if 0 /* it seems there is no case because of the consume above where + the last token would be a dot, _possibly_ EOF? */ + if (last_token != TOK_DOT) { yylval = NULL; RETURN_TOK (TOK_DOT); } +#else + yylval = NULL; + RETURN_TOK (TOK_DOT); +#endif } "&" { @@ -1326,11 +1338,9 @@ H#[0-9A-Za-z]+ { <> { /* At EOF - Clear variables */ clear_constants (); - last_token_is_dot = 0; + last_token = second_last_token = 0; cobc_in_area_a = 0; - integer_is_label = 0; inside_bracket = 0; - cobc_force_literal = 0; yyterminate (); } @@ -2016,7 +2026,6 @@ scan_o (const char *text, const char *type) #if 0 /* activate to have all %literals to be alphanumeric */ char xbuff[19]; sprintf ((char *)&xbuff, "'%X'", (unsigned int)val); - cobc_force_literal = 0; RETURN_TOK (scan_x ((const char *)&xbuff + 1, "X")); #endif } diff --git a/cobc/tree.h b/cobc/tree.h index 4fad7772f..2d244aa02 100644 --- a/cobc/tree.h +++ b/cobc/tree.h @@ -2301,7 +2301,6 @@ extern unsigned int cobc_in_procedure; extern unsigned int cobc_in_data_division; extern unsigned int cobc_in_usage; extern unsigned int cobc_in_repository; -extern unsigned int cobc_force_literal; extern unsigned int cobc_cs_check; extern unsigned int cobc_allow_program_name; extern unsigned int cobc_in_xml_generate_body;