diff --git a/.gitignore b/.gitignore index b76c8926..78b3fb16 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ Makefile.main build/ vendor/ java/lib/ +.vscode/ diff --git a/internal/tokenizer/common.go b/internal/tokenizer/common.go new file mode 100644 index 00000000..1c47ee33 --- /dev/null +++ b/internal/tokenizer/common.go @@ -0,0 +1,7 @@ +// Package tokenizer implements file tokenization used by the enry content +// classifier. This package is an implementation detail of enry and should not +// be imported by other packages. +package tokenizer + +// ByteLimit defines the maximum prefix of an input text that will be tokenized. +const ByteLimit = 100000 diff --git a/internal/tokenizer/flex/lex.linguist_yy.c b/internal/tokenizer/flex/lex.linguist_yy.c new file mode 100644 index 00000000..1cdb9d0e --- /dev/null +++ b/internal/tokenizer/flex/lex.linguist_yy.c @@ -0,0 +1,2226 @@ + +#line 3 "lex.linguist_yy.c" + +#define YY_INT_ALIGNED short int + +/* A lexical scanner generated by flex */ + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 5 +#define YY_FLEX_SUBMINOR_VERSION 35 +#if YY_FLEX_SUBMINOR_VERSION > 0 +#define FLEX_BETA +#endif + +/* First, we deal with platform-specific or compiler-specific issues. */ + +/* begin standard C headers. */ +#include +#include +#include +#include + +/* end standard C headers. */ + +/* flex integer type definitions */ + +#ifndef FLEXINT_H +#define FLEXINT_H + +/* C99 systems have . Non-C99 systems may or may not. */ + +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + +/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, + * if you want the limit (max/min) macros for int types. + */ +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS 1 +#endif + +#include +typedef int8_t flex_int8_t; +typedef uint8_t flex_uint8_t; +typedef int16_t flex_int16_t; +typedef uint16_t flex_uint16_t; +typedef int32_t flex_int32_t; +typedef uint32_t flex_uint32_t; +typedef uint64_t flex_uint64_t; +#else +typedef signed char flex_int8_t; +typedef short int flex_int16_t; +typedef int flex_int32_t; +typedef unsigned char flex_uint8_t; +typedef unsigned short int flex_uint16_t; +typedef unsigned int flex_uint32_t; +#endif /* ! C99 */ + +/* Limits of integral types. */ +#ifndef INT8_MIN +#define INT8_MIN (-128) +#endif +#ifndef INT16_MIN +#define INT16_MIN (-32767-1) +#endif +#ifndef INT32_MIN +#define INT32_MIN (-2147483647-1) +#endif +#ifndef INT8_MAX +#define INT8_MAX (127) +#endif +#ifndef INT16_MAX +#define INT16_MAX (32767) +#endif +#ifndef INT32_MAX +#define INT32_MAX (2147483647) +#endif +#ifndef UINT8_MAX +#define UINT8_MAX (255U) +#endif +#ifndef UINT16_MAX +#define UINT16_MAX (65535U) +#endif +#ifndef UINT32_MAX +#define UINT32_MAX (4294967295U) +#endif + +#endif /* ! FLEXINT_H */ + +#ifdef __cplusplus + +/* The "const" storage-class-modifier is valid. */ +#define YY_USE_CONST + +#else /* ! __cplusplus */ + +/* C99 requires __STDC__ to be defined as 1. */ +#if defined (__STDC__) + +#define YY_USE_CONST + +#endif /* defined (__STDC__) */ +#endif /* ! __cplusplus */ + +#ifdef YY_USE_CONST +#define yyconst const +#else +#define yyconst +#endif + +/* Returned upon end-of-file. */ +#define YY_NULL 0 + +/* Promotes a possibly negative, possibly signed char to an unsigned + * integer for use as an array index. If the signed char is negative, + * we want to instead treat it as an 8-bit unsigned char, hence the + * double cast. + */ +#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) + +/* An opaque pointer. */ +#ifndef YY_TYPEDEF_YY_SCANNER_T +#define YY_TYPEDEF_YY_SCANNER_T +typedef void* yyscan_t; +#endif + +/* For convenience, these vars (plus the bison vars far below) + are macros in the reentrant scanner. */ +#define yyin yyg->yyin_r +#define yyout yyg->yyout_r +#define yyextra yyg->yyextra_r +#define yyleng yyg->yyleng_r +#define yytext yyg->yytext_r +#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno) +#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column) +#define yy_flex_debug yyg->yy_flex_debug_r + +/* Enter a start condition. This macro really ought to take a parameter, + * but we do it the disgusting crufty way forced on us by the ()-less + * definition of BEGIN. + */ +#define BEGIN yyg->yy_start = 1 + 2 * + +/* Translate the current start state into a value that can be later handed + * to BEGIN to return to the state. The YYSTATE alias is for lex + * compatibility. + */ +#define YY_START ((yyg->yy_start - 1) / 2) +#define YYSTATE YY_START + +/* Action number for EOF rule of a given start state. */ +#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) + +/* Special action meaning "start processing a new file". */ +#define YY_NEW_FILE linguist_yyrestart(yyin ,yyscanner ) + +#define YY_END_OF_BUFFER_CHAR 0 + +/* Size of default input buffer. */ +#ifndef YY_BUF_SIZE +#define YY_BUF_SIZE 16384 +#endif + +/* The state buf must be large enough to hold one state per character in the main buffer. + */ +#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type)) + +#ifndef YY_TYPEDEF_YY_BUFFER_STATE +#define YY_TYPEDEF_YY_BUFFER_STATE +typedef struct yy_buffer_state *YY_BUFFER_STATE; +#endif + +#ifndef YY_TYPEDEF_YY_SIZE_T +#define YY_TYPEDEF_YY_SIZE_T +typedef size_t yy_size_t; +#endif + +#define EOB_ACT_CONTINUE_SCAN 0 +#define EOB_ACT_END_OF_FILE 1 +#define EOB_ACT_LAST_MATCH 2 + + #define YY_LESS_LINENO(n) + +/* Return all but the first "n" matched characters back to the input stream. */ +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + int yyless_macro_arg = (n); \ + YY_LESS_LINENO(yyless_macro_arg);\ + *yy_cp = yyg->yy_hold_char; \ + YY_RESTORE_YY_MORE_OFFSET \ + yyg->yy_c_buf_p = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \ + YY_DO_BEFORE_ACTION; /* set up yytext again */ \ + } \ + while ( 0 ) + +#define unput(c) yyunput( c, yyg->yytext_ptr , yyscanner ) + +#ifndef YY_STRUCT_YY_BUFFER_STATE +#define YY_STRUCT_YY_BUFFER_STATE +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + yy_size_t yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + yy_size_t yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + int yy_bs_lineno; /**< The line count. */ + int yy_bs_column; /**< The column count. */ + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; + +#define YY_BUFFER_NEW 0 +#define YY_BUFFER_NORMAL 1 + /* When an EOF's been seen but there's still some text to process + * then we mark the buffer as YY_EOF_PENDING, to indicate that we + * shouldn't try reading from the input source any more. We might + * still have a bunch of tokens to match, though, because of + * possible backing-up. + * + * When we actually see the EOF, we change the status to "new" + * (via linguist_yyrestart()), so that the user can continue scanning by + * just pointing yyin at a new input file. + */ +#define YY_BUFFER_EOF_PENDING 2 + + }; +#endif /* !YY_STRUCT_YY_BUFFER_STATE */ + +/* We provide macros for accessing buffer states in case in the + * future we want to put the buffer states in a more general + * "scanner state". + * + * Returns the top of the stack, or NULL. + */ +#define YY_CURRENT_BUFFER ( yyg->yy_buffer_stack \ + ? yyg->yy_buffer_stack[yyg->yy_buffer_stack_top] \ + : NULL) + +/* Same as previous macro, but useful when we know that the buffer stack is not + * NULL or when we need an lvalue. For internal use only. + */ +#define YY_CURRENT_BUFFER_LVALUE yyg->yy_buffer_stack[yyg->yy_buffer_stack_top] + +void linguist_yyrestart (FILE *input_file ,yyscan_t yyscanner ); +void linguist_yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); +YY_BUFFER_STATE linguist_yy_create_buffer (FILE *file,int size ,yyscan_t yyscanner ); +void linguist_yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); +void linguist_yy_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); +void linguist_yypush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); +void linguist_yypop_buffer_state (yyscan_t yyscanner ); + +static void linguist_yyensure_buffer_stack (yyscan_t yyscanner ); +static void linguist_yy_load_buffer_state (yyscan_t yyscanner ); +static void linguist_yy_init_buffer (YY_BUFFER_STATE b,FILE *file ,yyscan_t yyscanner ); + +#define YY_FLUSH_BUFFER linguist_yy_flush_buffer(YY_CURRENT_BUFFER ,yyscanner) + +YY_BUFFER_STATE linguist_yy_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner ); +YY_BUFFER_STATE linguist_yy_scan_string (yyconst char *yy_str ,yyscan_t yyscanner ); +YY_BUFFER_STATE linguist_yy_scan_bytes (yyconst char *bytes,yy_size_t len ,yyscan_t yyscanner ); + +void *linguist_yyalloc (yy_size_t ,yyscan_t yyscanner ); +void *linguist_yyrealloc (void *,yy_size_t ,yyscan_t yyscanner ); +void linguist_yyfree (void * ,yyscan_t yyscanner ); + +#define yy_new_buffer linguist_yy_create_buffer + +#define yy_set_interactive(is_interactive) \ + { \ + if ( ! YY_CURRENT_BUFFER ){ \ + linguist_yyensure_buffer_stack (yyscanner); \ + YY_CURRENT_BUFFER_LVALUE = \ + linguist_yy_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \ + } + +#define yy_set_bol(at_bol) \ + { \ + if ( ! YY_CURRENT_BUFFER ){\ + linguist_yyensure_buffer_stack (yyscanner); \ + YY_CURRENT_BUFFER_LVALUE = \ + linguist_yy_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \ + } + +#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol) + +/* Begin user sect3 */ + +typedef unsigned char YY_CHAR; + +typedef int yy_state_type; + +#define yytext_ptr yytext_r + +static yy_state_type yy_get_previous_state (yyscan_t yyscanner ); +static yy_state_type yy_try_NUL_trans (yy_state_type current_state ,yyscan_t yyscanner); +static int yy_get_next_buffer (yyscan_t yyscanner ); +static void yy_fatal_error (yyconst char msg[] ,yyscan_t yyscanner ); + +/* Done after the current pattern has been matched and before the + * corresponding action - sets up yytext. + */ +#define YY_DO_BEFORE_ACTION \ + yyg->yytext_ptr = yy_bp; \ + yyleng = (yy_size_t) (yy_cp - yy_bp); \ + yyg->yy_hold_char = *yy_cp; \ + *yy_cp = '\0'; \ + yyg->yy_c_buf_p = yy_cp; + +#define YY_NUM_RULES 31 +#define YY_END_OF_BUFFER 32 +/* This struct is not used in this scanner, + but its presence is necessary. */ +struct yy_trans_info + { + flex_int32_t yy_verify; + flex_int32_t yy_nxt; + }; +static yyconst flex_int16_t yy_accept[114] = + { 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 32, 30, 17, 28, + 29, 29, 18, 27, 27, 28, 28, 19, 19, 29, + 27, 29, 30, 17, 28, 29, 29, 28, 26, 24, + 25, 9, 9, 9, 9, 9, 9, 9, 16, 28, + 29, 16, 6, 4, 19, 28, 19, 19, 28, 20, + 29, 5, 29, 0, 0, 0, 0, 3, 0, 28, + 24, 23, 10, 0, 12, 13, 0, 0, 7, 8, + 19, 19, 19, 3, 0, 2, 0, 2, 2, 21, + 22, 23, 11, 14, 15, 19, 19, 19, 19, 19, + + 19, 0, 2, 0, 2, 0, 0, 0, 0, 0, + 1, 0, 0 + } ; + +static yyconst flex_int32_t yy_ec[256] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 4, 5, 6, 7, 1, 8, 9, 10, 11, + 12, 13, 14, 1, 15, 16, 17, 18, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 1, 20, 21, + 22, 23, 24, 25, 26, 26, 26, 26, 27, 28, + 29, 29, 29, 29, 29, 30, 29, 29, 29, 29, + 29, 29, 29, 29, 31, 29, 29, 29, 29, 29, + 32, 1, 33, 1, 29, 1, 26, 26, 26, 26, + + 34, 28, 29, 29, 29, 29, 29, 30, 29, 35, + 29, 29, 29, 29, 29, 29, 31, 36, 29, 37, + 29, 29, 38, 39, 40, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 + } ; + +static yyconst flex_int32_t yy_meta[41] = + { 0, + 1, 2, 3, 2, 4, 5, 6, 1, 1, 5, + 1, 1, 6, 1, 4, 7, 8, 9, 9, 1, + 4, 10, 1, 4, 6, 9, 9, 9, 9, 9, + 9, 1, 1, 9, 9, 9, 9, 1, 1, 1 + } ; + +static yyconst flex_int16_t yy_base[125] = + { 0, + 0, 39, 39, 59, 214, 213, 210, 209, 208, 207, + 208, 207, 213, 212, 207, 206, 215, 358, 208, 0, + 358, 204, 202, 197, 358, 0, 194, 90, 112, 185, + 190, 165, 145, 38, 44, 199, 184, 38, 358, 174, + 358, 358, 178, 179, 151, 176, 180, 174, 174, 0, + 358, 168, 358, 0, 0, 38, 31, 140, 137, 140, + 358, 358, 358, 0, 154, 139, 133, 0, 155, 141, + 122, 54, 358, 118, 358, 358, 131, 113, 358, 358, + 53, 84, 174, 0, 164, 96, 62, 100, 28, 358, + 358, 74, 358, 358, 358, 157, 71, 0, 0, 77, + + 0, 88, 14, 94, 108, 96, 207, 131, 175, 134, + 243, 165, 358, 280, 290, 295, 301, 302, 312, 321, + 326, 328, 337, 347 + } ; + +static yyconst flex_int16_t yy_def[125] = + { 0, + 113, 1, 114, 114, 115, 115, 115, 115, 115, 115, + 115, 115, 115, 115, 115, 115, 113, 113, 113, 116, + 113, 113, 113, 113, 113, 116, 116, 113, 116, 117, + 113, 113, 113, 113, 116, 113, 113, 116, 113, 118, + 113, 113, 113, 113, 113, 113, 113, 113, 113, 116, + 113, 113, 113, 116, 29, 116, 116, 116, 116, 117, + 113, 113, 113, 33, 113, 113, 113, 119, 120, 116, + 118, 121, 113, 113, 113, 113, 113, 113, 113, 113, + 113, 116, 29, 119, 120, 85, 122, 85, 88, 113, + 113, 121, 113, 113, 113, 113, 113, 116, 83, 83, + + 83, 122, 88, 122, 88, 122, 123, 122, 123, 124, + 123, 124, 0, 113, 113, 113, 113, 113, 113, 113, + 113, 113, 113, 113 + } ; + +static yyconst flex_int16_t yy_nxt[399] = + { 0, + 18, 18, 18, 18, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 21, 21, 20, 27, 28, 29, 25, + 30, 18, 18, 18, 20, 20, 20, 20, 20, 20, + 20, 25, 25, 20, 20, 20, 20, 31, 32, 25, + 33, 68, 33, 49, 34, 35, 36, 68, 69, 105, + 54, 81, 81, 37, 70, 38, 40, 40, 57, 90, + 57, 41, 103, 91, 40, 40, 40, 40, 40, 40, + 96, 96, 40, 40, 40, 40, 40, 40, 102, 113, + 97, 41, 97, 113, 40, 40, 40, 40, 40, 40, + 81, 81, 40, 40, 40, 40, 50, 113, 97, 113, + + 97, 113, 50, 113, 102, 55, 50, 55, 55, 107, + 102, 107, 102, 98, 50, 50, 56, 57, 50, 57, + 58, 104, 95, 56, 50, 50, 59, 55, 106, 55, + 55, 108, 107, 88, 107, 107, 94, 107, 56, 57, + 93, 57, 58, 72, 68, 56, 64, 102, 64, 65, + 65, 65, 65, 65, 83, 83, 85, 68, 85, 66, + 113, 67, 83, 83, 83, 85, 107, 85, 107, 82, + 83, 86, 87, 87, 96, 96, 113, 80, 113, 79, + 86, 87, 87, 78, 97, 77, 97, 76, 89, 99, + 75, 99, 99, 74, 73, 72, 110, 89, 65, 99, + + 100, 101, 68, 63, 62, 61, 54, 100, 107, 53, + 107, 52, 51, 49, 113, 48, 48, 47, 47, 46, + 46, 45, 45, 44, 44, 43, 43, 113, 110, 113, + 113, 113, 111, 111, 111, 111, 111, 111, 113, 113, + 111, 111, 111, 111, 113, 113, 113, 113, 113, 113, + 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, + 113, 113, 113, 113, 110, 113, 113, 113, 111, 111, + 111, 111, 111, 111, 113, 113, 111, 111, 111, 111, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + + 50, 50, 50, 50, 60, 113, 113, 60, 60, 60, + 71, 71, 84, 84, 113, 84, 84, 84, 84, 84, + 84, 84, 88, 113, 113, 113, 113, 113, 88, 88, + 92, 113, 113, 113, 92, 87, 87, 109, 109, 109, + 109, 109, 109, 109, 109, 109, 109, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 17, 113, 113, + 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, + 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, + 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, + 113, 113, 113, 113, 113, 113, 113, 113 + + } ; + +static yyconst flex_int16_t yy_chk[399] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 34, 2, 34, 2, 2, 2, 35, 35, 103, + 38, 56, 56, 2, 38, 2, 3, 3, 57, 72, + 57, 3, 89, 72, 3, 3, 3, 3, 3, 3, + 81, 81, 3, 3, 3, 3, 4, 4, 87, 92, + 81, 4, 81, 92, 4, 4, 4, 4, 4, 4, + 100, 100, 4, 4, 4, 4, 28, 86, 97, 86, + + 97, 88, 28, 88, 102, 28, 28, 28, 28, 105, + 104, 105, 106, 82, 28, 28, 28, 28, 28, 28, + 28, 102, 78, 28, 28, 28, 28, 29, 104, 29, + 29, 106, 108, 88, 108, 110, 77, 110, 29, 29, + 74, 29, 29, 71, 70, 29, 33, 108, 33, 67, + 33, 33, 33, 66, 59, 59, 69, 65, 69, 33, + 60, 33, 59, 59, 59, 85, 112, 85, 112, 58, + 59, 69, 69, 69, 96, 96, 109, 52, 109, 49, + 85, 85, 85, 48, 96, 47, 96, 46, 69, 83, + 45, 83, 83, 44, 43, 40, 109, 85, 37, 83, + + 83, 83, 36, 32, 31, 30, 27, 83, 107, 24, + 107, 23, 22, 19, 17, 16, 15, 14, 13, 12, + 11, 10, 9, 8, 7, 6, 5, 0, 107, 0, + 0, 0, 107, 107, 107, 107, 107, 107, 0, 0, + 107, 107, 107, 107, 111, 0, 111, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 111, 0, 0, 0, 111, 111, + 111, 111, 111, 111, 0, 0, 111, 111, 111, 111, + 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, + 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, + + 116, 116, 116, 116, 117, 0, 0, 117, 117, 117, + 118, 118, 119, 119, 0, 119, 119, 119, 119, 119, + 119, 119, 120, 0, 0, 0, 0, 0, 120, 120, + 121, 0, 0, 0, 121, 122, 122, 123, 123, 123, + 123, 123, 123, 123, 123, 123, 123, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 113, 113, 113, + 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, + 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, + 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, + 113, 113, 113, 113, 113, 113, 113, 113 + + } ; + +/* The intent behind this definition is that it'll catch + * any uses of REJECT which flex missed. + */ +#define REJECT reject_used_but_not_detected +#define yymore() yymore_used_but_not_detected +#define YY_MORE_ADJ 0 +#define YY_RESTORE_YY_MORE_OFFSET +#line 1 "tokenizer.l" +#line 2 "tokenizer.l" + +#include "linguist.h" + +#define feed_token(tok, typ) do { \ + yyextra->token = (tok); \ + yyextra->type = (typ); \ + } while (0) + +#define eat_until_eol() do { \ + int c; \ + while ((c = input(yyscanner)) != '\n' && c != EOF && c); \ + if (c == EOF || !c) \ + return 0; \ + } while (0) + +#define eat_until_unescaped(q) do { \ + int c; \ + while ((c = input(yyscanner)) != EOF && c) { \ + if (c == '\n') \ + break; \ + if (c == '\\') { \ + c = input(yyscanner); \ + if (c == EOF || !c) \ + return 0; \ + } else if (c == q) \ + break; \ + } \ + if (c == EOF || !c) \ + return 0; \ + } while (0) + + +#line 589 "lex.linguist_yy.c" + +#define INITIAL 0 +#define sgml 1 +#define c_comment 2 +#define xml_comment 3 +#define haskell_comment 4 +#define ocaml_comment 5 +#define python_dcomment 6 +#define python_scomment 7 + +#ifndef YY_NO_UNISTD_H +/* Special case for "unistd.h", since it is non-ANSI. We include it way + * down here because we want the user's section 1 to have been scanned first. + * The user has a chance to override it with an option. + */ +#include +#endif + +#define YY_EXTRA_TYPE struct tokenizer_extra * + +/* Holds the entire state of the reentrant scanner. */ +struct yyguts_t + { + + /* User-defined. Not touched by flex. */ + YY_EXTRA_TYPE yyextra_r; + + /* The rest are the same as the globals declared in the non-reentrant scanner. */ + FILE *yyin_r, *yyout_r; + size_t yy_buffer_stack_top; /**< index of top of stack. */ + size_t yy_buffer_stack_max; /**< capacity of stack. */ + YY_BUFFER_STATE * yy_buffer_stack; /**< Stack as an array. */ + char yy_hold_char; + yy_size_t yy_n_chars; + yy_size_t yyleng_r; + char *yy_c_buf_p; + int yy_init; + int yy_start; + int yy_did_buffer_switch_on_eof; + int yy_start_stack_ptr; + int yy_start_stack_depth; + int *yy_start_stack; + yy_state_type yy_last_accepting_state; + char* yy_last_accepting_cpos; + + int yylineno_r; + int yy_flex_debug_r; + + char *yytext_r; + int yy_more_flag; + int yy_more_len; + + }; /* end struct yyguts_t */ + +static int yy_init_globals (yyscan_t yyscanner ); + +int linguist_yylex_init (yyscan_t* scanner); + +int linguist_yylex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner); + +/* Accessor methods to globals. + These are made visible to non-reentrant scanners for convenience. */ + +int linguist_yylex_destroy (yyscan_t yyscanner ); + +int linguist_yyget_debug (yyscan_t yyscanner ); + +void linguist_yyset_debug (int debug_flag ,yyscan_t yyscanner ); + +YY_EXTRA_TYPE linguist_yyget_extra (yyscan_t yyscanner ); + +void linguist_yyset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner ); + +FILE *linguist_yyget_in (yyscan_t yyscanner ); + +void linguist_yyset_in (FILE * in_str ,yyscan_t yyscanner ); + +FILE *linguist_yyget_out (yyscan_t yyscanner ); + +void linguist_yyset_out (FILE * out_str ,yyscan_t yyscanner ); + +yy_size_t linguist_yyget_leng (yyscan_t yyscanner ); + +char *linguist_yyget_text (yyscan_t yyscanner ); + +int linguist_yyget_lineno (yyscan_t yyscanner ); + +void linguist_yyset_lineno (int line_number ,yyscan_t yyscanner ); + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int linguist_yywrap (yyscan_t yyscanner ); +#else +extern int linguist_yywrap (yyscan_t yyscanner ); +#endif +#endif + +#ifndef yytext_ptr +static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner); +#endif + +#ifndef YY_NO_INPUT + +#ifdef __cplusplus +static int yyinput (yyscan_t yyscanner ); +#else +static int input (yyscan_t yyscanner ); +#endif + +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#define YY_READ_BUF_SIZE 8192 +#endif + +/* Copy whatever the last rule matched to the standard output. */ +#ifndef ECHO +/* This used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite(). + */ +#define ECHO fwrite( yytext, yyleng, 1, yyout ) +#endif + +/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, + * is returned in "result". + */ +#ifndef YY_INPUT +#define YY_INPUT(buf,result,max_size) \ + if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \ + { \ + int c = '*'; \ + yy_size_t n; \ + for ( n = 0; n < max_size && \ + (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ + buf[n] = (char) c; \ + if ( c == '\n' ) \ + buf[n++] = (char) c; \ + if ( c == EOF && ferror( yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + result = n; \ + } \ + else \ + { \ + errno=0; \ + while ( (result = fread(buf, 1, max_size, yyin))==0 && ferror(yyin)) \ + { \ + if( errno != EINTR) \ + { \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + break; \ + } \ + errno=0; \ + clearerr(yyin); \ + } \ + }\ +\ + +#endif + +/* No semi-colon after return; correct usage is to write "yyterminate();" - + * we don't want an extra ';' after the "return" because that will cause + * some compilers to complain about unreachable statements. + */ +#ifndef yyterminate +#define yyterminate() return YY_NULL +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Report a fatal error. */ +#ifndef YY_FATAL_ERROR +#define YY_FATAL_ERROR(msg) yy_fatal_error( msg , yyscanner) +#endif + +/* end tables serialization structures and prototypes */ + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL_IS_OURS 1 + +extern int linguist_yylex (yyscan_t yyscanner); + +#define YY_DECL int linguist_yylex (yyscan_t yyscanner) +#endif /* !YY_DECL */ + +/* Code executed at the beginning of each rule, after yytext and yyleng + * have been set up. + */ +#ifndef YY_USER_ACTION +#define YY_USER_ACTION +#endif + +/* Code executed at the end of each rule. */ +#ifndef YY_BREAK +#define YY_BREAK break; +#endif + +#define YY_RULE_SETUP \ + if ( yyleng > 0 ) \ + YY_CURRENT_BUFFER_LVALUE->yy_at_bol = \ + (yytext[yyleng - 1] == '\n'); \ + YY_USER_ACTION + +/** The main scanner function which does all the work. + */ +YY_DECL +{ + register yy_state_type yy_current_state; + register char *yy_cp, *yy_bp; + register int yy_act; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + +#line 38 "tokenizer.l" + + +#line 819 "lex.linguist_yy.c" + + if ( !yyg->yy_init ) + { + yyg->yy_init = 1; + +#ifdef YY_USER_INIT + YY_USER_INIT; +#endif + + if ( ! yyg->yy_start ) + yyg->yy_start = 1; /* first start state */ + + if ( ! yyin ) + yyin = stdin; + + if ( ! yyout ) + yyout = stdout; + + if ( ! YY_CURRENT_BUFFER ) { + linguist_yyensure_buffer_stack (yyscanner); + YY_CURRENT_BUFFER_LVALUE = + linguist_yy_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); + } + + linguist_yy_load_buffer_state(yyscanner ); + } + + while ( 1 ) /* loops until end-of-file is reached */ + { + yy_cp = yyg->yy_c_buf_p; + + /* Support of yytext. */ + *yy_cp = yyg->yy_hold_char; + + /* yy_bp points to the position in yy_ch_buf of the start of + * the current run. + */ + yy_bp = yy_cp; + + yy_current_state = yyg->yy_start; + yy_current_state += YY_AT_BOL(); +yy_match: + do + { + register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)]; + if ( yy_accept[yy_current_state] ) + { + yyg->yy_last_accepting_state = yy_current_state; + yyg->yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 114 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + ++yy_cp; + } + while ( yy_current_state != 113 ); + yy_cp = yyg->yy_last_accepting_cpos; + yy_current_state = yyg->yy_last_accepting_state; + +yy_find_action: + yy_act = yy_accept[yy_current_state]; + + YY_DO_BEFORE_ACTION; + +do_action: /* This label is used only to access EOF actions. */ + + switch ( yy_act ) + { /* beginning of action switch */ + case 0: /* must back up */ + /* undo the effects of YY_DO_BEFORE_ACTION */ + *yy_cp = yyg->yy_hold_char; + yy_cp = yyg->yy_last_accepting_cpos; + yy_current_state = yyg->yy_last_accepting_state; + goto yy_find_action; + +case 1: +/* rule 1 can match eol */ +YY_RULE_SETUP +#line 40 "tokenizer.l" +{ + const char *off = strrchr(yytext, ' '); + if (!off) + off = yytext; + else + ++off; + feed_token(strdup(off), SHEBANG_TOKEN); + eat_until_eol(); + return 1; + } + YY_BREAK +case 2: +YY_RULE_SETUP +#line 51 "tokenizer.l" +{ + const char *off = strrchr(yytext, '/'); + if (!off) + off = yytext; + else + ++off; + if (strcmp(off, "env") == 0) { + eat_until_eol(); + } else { + feed_token(strdup(off), SHEBANG_TOKEN); + eat_until_eol(); + return 1; + } + } + YY_BREAK +case 3: +YY_RULE_SETUP +#line 66 "tokenizer.l" +{ /* nothing */ } + YY_BREAK +case 4: +YY_RULE_SETUP +#line 68 "tokenizer.l" +{ BEGIN(c_comment); } + YY_BREAK +/* See below for xml_comment start. */ +case 5: +YY_RULE_SETUP +#line 70 "tokenizer.l" +{ BEGIN(haskell_comment); } + YY_BREAK +case 6: +YY_RULE_SETUP +#line 71 "tokenizer.l" +{ BEGIN(ocaml_comment); } + YY_BREAK +case 7: +YY_RULE_SETUP +#line 72 "tokenizer.l" +{ BEGIN(python_dcomment); } + YY_BREAK +case 8: +YY_RULE_SETUP +#line 73 "tokenizer.l" +{ BEGIN(python_scomment); } + YY_BREAK +case 9: +/* rule 9 can match eol */ +YY_RULE_SETUP +#line 75 "tokenizer.l" +{ /* nothing */ } + YY_BREAK +case 10: +YY_RULE_SETUP +#line 76 "tokenizer.l" +{ BEGIN(INITIAL); } + YY_BREAK +case 11: +YY_RULE_SETUP +#line 77 "tokenizer.l" +{ BEGIN(INITIAL); } + YY_BREAK +case 12: +YY_RULE_SETUP +#line 78 "tokenizer.l" +{ BEGIN(INITIAL); } + YY_BREAK +case 13: +YY_RULE_SETUP +#line 79 "tokenizer.l" +{ BEGIN(INITIAL); } + YY_BREAK +case 14: +YY_RULE_SETUP +#line 80 "tokenizer.l" +{ BEGIN(INITIAL); } + YY_BREAK +case 15: +YY_RULE_SETUP +#line 81 "tokenizer.l" +{ BEGIN(INITIAL); } + YY_BREAK +case 16: +YY_RULE_SETUP +#line 83 "tokenizer.l" +{ /* nothing */ } + YY_BREAK +case 17: +YY_RULE_SETUP +#line 84 "tokenizer.l" +{ eat_until_unescaped('"'); } + YY_BREAK +case 18: +YY_RULE_SETUP +#line 85 "tokenizer.l" +{ eat_until_unescaped('\''); } + YY_BREAK +case 19: +YY_RULE_SETUP +#line 86 "tokenizer.l" +{ /* nothing */ } + YY_BREAK +case 20: +YY_RULE_SETUP +#line 87 "tokenizer.l" +{ + if (strcmp(yytext, "