diff --git a/Dev/src/fry_script/token_generator.cpp b/Dev/src/fry_script/token_generator.cpp index 989990c..432070e 100644 --- a/Dev/src/fry_script/token_generator.cpp +++ b/Dev/src/fry_script/token_generator.cpp @@ -4,71 +4,162 @@ using namespace foundation; -static const char CharsToIgnore[] = +namespace fry_script { - // Whitespaces - ' ', - '\t', - '\n', - '\r' -}; + /// Internal version of namespace, clients don't need to know this + namespace token_generator + { + struct GeneratorState; -#include + typedef void (*StatChange)( GeneratorState& Generator, const char* Code, size_t Idx ); + typedef void (*ParsingFunction)( GeneratorState& Generator, const char* Code, size_t Idx ); + + /// State for parsing tokens + struct ParsingState + { + StatChange BeginState; + StatChange EndState; + + ParsingFunction ParseToken; + }; + + enum EParsingState + { + PS_ParseWhitespace, + PS_ParseToken, + PS_DONE + }; + + + + void BeginParseWhitespace( GeneratorState& State, const char* Code, size_t Idx ); + void EndParseWhitespace( GeneratorState& State, const char* Code, size_t Idx ); + void ParseWhitespace( GeneratorState& State, const char* Code, size_t Idx ); + + void BeginParseToken( GeneratorState& State, const char* Code, size_t Idx ); + void EndParseToken( GeneratorState& State, const char* Code, size_t Idx ); + void ParseToken( GeneratorState& State, const char* Code, size_t Idx ); + + ParsingState TokenGenerationStates[] = + { + { BeginParseWhitespace, EndParseWhitespace, ParseWhitespace }, + { BeginParseToken, EndParseToken, ParseToken }, + { NULL, NULL, NULL } + }; + + void ChangeState( GeneratorState& State, EParsingState NewState, const char* Code, size_t Idx ); + + struct GeneratorState + { + /// No default constructor + GeneratorState(); + + GeneratorState( Array& TokensToEmit ) : + LastFoundIdx( 0 ), + CurrentParsingState( &TokenGenerationStates[PS_ParseWhitespace] ), + EmittedTokens( TokensToEmit ) + { + } + + size_t LastFoundIdx; + ParsingState* CurrentParsingState; + Array& EmittedTokens; + }; + } + + -namespace fry_script -{ namespace token_generator { - bool GenerateTokens( const char* Code, Array& out_Tokens ) + void BeginParseWhitespace( GeneratorState& State, const char* Code, size_t Idx ) { - GeneratedToken Tok; - size_t LastFoundIdx = 0; + } - Tok.Location = Code; - Tok.Length = 0; + void EndParseWhitespace( GeneratorState& State, const char* Code, size_t Idx ) + { + } - size_t Idx = 0; - for( ; Idx < strlen( Code ); ++Idx ) + void ParseWhitespace( GeneratorState& State, const char* Code, size_t Idx ) + { + if( !IsWhitespace( Code[Idx] ) ) { - if( Code[Idx] == ' ' ) - { - Tok.Length = Idx - LastFoundIdx; + ChangeState( State, PS_ParseToken, Code, Idx ); + } } - array::push_back( out_Tokens, Tok ); + void BeginParseToken( GeneratorState& State, const char* Code, size_t Idx ) + { + State.LastFoundIdx = Idx; + } + + void EndParseToken( GeneratorState& State, const char* Code, size_t Idx ) + { + GeneratedToken Token; + + Token.Length = Idx - State.LastFoundIdx; + Token.Location = &Code[State.LastFoundIdx]; + + array::push_back( State.EmittedTokens, Token ); + } + + void ParseToken( GeneratorState& State, const char* Code, size_t Idx ) + { + if( IsWhitespace( Code[Idx] ) ) + { + ChangeState( State, PS_ParseWhitespace, Code, Idx ); + } + } - LastFoundIdx = Idx; - Tok.Location = &Code[Idx+1]; - Tok.Length = 0; - } + void ChangeState( GeneratorState& State, EParsingState NewState, const char* Code, size_t Idx ) + { + if( State.CurrentParsingState->EndState != NULL ) + { + State.CurrentParsingState->EndState( State, Code, Idx ); } + State.CurrentParsingState = &TokenGenerationStates[NewState]; + if( State.CurrentParsingState->BeginState != NULL ) + { + State.CurrentParsingState->BeginState( State, Code, Idx ); + } + } - Tok.Length = Idx - LastFoundIdx; - if( Tok.Length > 0 ) + bool GenerateTokens( const char* Code, Array& out_Tokens ) + { + GeneratorState State( out_Tokens ); + + const size_t CodeLength = strlen( Code ); + size_t Idx = 0; + for( ; Idx < CodeLength; ++Idx ) { - array::push_back( out_Tokens, Tok ); + State.CurrentParsingState->ParseToken( State, Code, Idx ); } + ChangeState( State, PS_DONE, Code, Idx ); return true; } bool GetNextToken( TokenGenerator* Generator, Array& Tokens, const char** out_NextToken ) { - size_t CurrentIndex = Generator->CurrentIndex++; + size_t CurrentIndex = Generator->_CurrentIndex++; if( CurrentIndex >= array::size( Tokens ) ) { - Generator->CurrentIndex = -1; - memory::mem_zero( Generator->TempToken, sizeof( Generator->TempToken ) ); + Generator->_CurrentIndex = -1; + memory::mem_zero( Generator->_TempToken, sizeof( Generator->_TempToken ) ); return false; } const GeneratedToken& CurrentToken = Tokens[CurrentIndex]; - memcpy_s( Generator->TempToken, sizeof( Generator->TempToken ), CurrentToken.Location, CurrentToken.Length ); - Generator->TempToken[CurrentToken.Length] = '\0'; - *out_NextToken = Generator->TempToken; + memcpy_s( Generator->_TempToken, sizeof( Generator->_TempToken ), CurrentToken.Location, CurrentToken.Length ); + Generator->_TempToken[CurrentToken.Length] = '\0'; + *out_NextToken = Generator->_TempToken; return true; } + + bool IsWhitespace( char Token ) + { + return Token == ' ' || Token == '\t' || Token == '\n' || Token == '\r'; + } } } \ No newline at end of file diff --git a/Dev/src/fry_script/token_generator.h b/Dev/src/fry_script/token_generator.h index 9d01ea9..5417d1a 100644 --- a/Dev/src/fry_script/token_generator.h +++ b/Dev/src/fry_script/token_generator.h @@ -14,8 +14,11 @@ namespace fry_script /// \param out_Tokens - the tokes generated from Code bool GenerateTokens( const char* Code, foundation::Array& out_Tokens ); - /// Copies a single token + /// Copies the next token in Tokens to out_NextToken bool GetNextToken( TokenGenerator* Generator, foundation::Array& Tokens, const char** out_NextToken ); + + /// Returns true if the token is a whitespace + inline bool IsWhitespace( char Token ); } } diff --git a/Dev/src/fry_script/token_generator_types.h b/Dev/src/fry_script/token_generator_types.h index 8c1ea66..a38d88d 100644 --- a/Dev/src/fry_script/token_generator_types.h +++ b/Dev/src/fry_script/token_generator_types.h @@ -4,7 +4,7 @@ namespace fry_script { /// Largest size of a allowed token - const s32 MaxTokenSize = 255; + const s32 MAX_TOKEN_SIZE = 255; /// Struct that describes the tokens generated from GenerateTokens /// It contains a pointer into the char* Code that is sent into the @@ -15,16 +15,18 @@ namespace fry_script u32 Length; }; - /// The state of a TokenGenerator (for multithread support) + /// The state of a TokenGenerator (for multithreading support) struct TokenGenerator { TokenGenerator() : - CurrentIndex( 0 ) + _CurrentIndex( 0 ) { } - int CurrentIndex; - char TempToken[MaxTokenSize]; + /// When using GetNextToken, use this to know what the next token is + int _CurrentIndex; + /// Temporary buffer that holds the current token + char _TempToken[MAX_TOKEN_SIZE]; }; } diff --git a/Dev/src/fry_script_test/token_generator_tests.h b/Dev/src/fry_script_test/token_generator_tests.h index 165df55..98e756d 100644 --- a/Dev/src/fry_script_test/token_generator_tests.h +++ b/Dev/src/fry_script_test/token_generator_tests.h @@ -5,7 +5,21 @@ using namespace fry_script; using namespace memory; +/// Useful to set to 1 when we want to check what tokens have failed to parse +#define PRINT_TOKEN_COMPARISON 0 + +#if PRINT_TOKEN_COMPARISON #include +#endif + +#if PRINT_TOKEN_COMPARISON + #define PRINT_COMPARISON( CURRENT_TOKEN, EXPECTED_RESULT ) \ + { \ + std::cout << "NextToken='" << CURRENT_TOKEN << "' EXPECTED_RESULT='" << EXPECTED_RESULT << "'" << std::endl; \ + } +#else + #define PRINT_COMPARISON( CURRENT_TOKEN, EXPECTED_RESULT ) +#endif /// Helper that compares a char*[] with a array, so that we can run tests easily #define CHECK_ARRAY_STRING_EQUAL( EXPECTED_RESULT, ACTUAL_RESULT ) \ @@ -19,6 +33,7 @@ using namespace memory; size_t Idx = 0; \ while( token_generator::GetNextToken( &Generator, ACTUAL_RESULT, &NextToken ) ) \ { \ + PRINT_COMPARISON( NextToken, EXPECTED_RESULT[Idx] ); \ CHECK_EQUAL( 0, strcmp( NextToken, EXPECTED_RESULT[Idx++] ) ); \ } \ } \ @@ -36,6 +51,7 @@ struct TokenGeneratorCleanSetup array::clear( GeneratedTokens ); } + TokenGenerator Generator; Array GeneratedTokens; }; @@ -79,6 +95,7 @@ SUITE( TokenGenerator ) CHECK_ARRAY_STRING_EQUAL( ExpectedResult, GeneratedTokens ); } + // I know that my initial code is vulnerable to this, so make sure it won't come up again TEST_FIXTURE( TokenGeneratorCleanSetup, SpaceAtEnd ) { const char* Code = "FirstToken SecondToken "; @@ -93,4 +110,20 @@ SUITE( TokenGenerator ) CHECK( Result ); CHECK_ARRAY_STRING_EQUAL( ExpectedResult, GeneratedTokens ); } + + // I know that my initial code is vulnerable to this, so make sure it won't come up again + TEST_FIXTURE( TokenGeneratorCleanSetup, SpaceAtBeginning ) + { + const char* Code = " FirstToken SecondToken"; + const char* ExpectedResult[] = + { + "FirstToken", + "SecondToken" + }; + + bool Result = token_generator::GenerateTokens( Code, GeneratedTokens ); + + CHECK( Result ); + CHECK_ARRAY_STRING_EQUAL( ExpectedResult, GeneratedTokens ); + } } \ No newline at end of file