Skip to content

Commit

Permalink
* Improved token generator stability and made it a state-machine int…
Browse files Browse the repository at this point in the history
…ernally
  • Loading branch information
MarkusRannare committed Feb 17, 2013
1 parent 97e743b commit 79f6059
Show file tree
Hide file tree
Showing 4 changed files with 169 additions and 40 deletions.
159 changes: 125 additions & 34 deletions Dev/src/fry_script/token_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,71 +4,162 @@

using namespace foundation;

static const char CharsToIgnore[] =
namespace fry_script
{
// Whitespaces
' ',
'\t',
'\n',
'\r'
};
/// Internal version of namespace, clients don't need to know this
namespace token_generator
{
struct GeneratorState;

#include <iostream>
typedef void (*StatChange)( GeneratorState& Generator, const char* Code, size_t Idx );
typedef void (*ParsingFunction)( GeneratorState& Generator, const char* Code, size_t Idx );

/// State for parsing tokens
struct ParsingState
{
StatChange BeginState;
StatChange EndState;

ParsingFunction ParseToken;
};

enum EParsingState
{
PS_ParseWhitespace,
PS_ParseToken,
PS_DONE
};



void BeginParseWhitespace( GeneratorState& State, const char* Code, size_t Idx );
void EndParseWhitespace( GeneratorState& State, const char* Code, size_t Idx );
void ParseWhitespace( GeneratorState& State, const char* Code, size_t Idx );

void BeginParseToken( GeneratorState& State, const char* Code, size_t Idx );
void EndParseToken( GeneratorState& State, const char* Code, size_t Idx );
void ParseToken( GeneratorState& State, const char* Code, size_t Idx );

ParsingState TokenGenerationStates[] =
{
{ BeginParseWhitespace, EndParseWhitespace, ParseWhitespace },
{ BeginParseToken, EndParseToken, ParseToken },
{ NULL, NULL, NULL }
};

void ChangeState( GeneratorState& State, EParsingState NewState, const char* Code, size_t Idx );

struct GeneratorState
{
/// No default constructor
GeneratorState();

GeneratorState( Array<GeneratedToken>& TokensToEmit ) :
LastFoundIdx( 0 ),
CurrentParsingState( &TokenGenerationStates[PS_ParseWhitespace] ),
EmittedTokens( TokensToEmit )
{
}

size_t LastFoundIdx;
ParsingState* CurrentParsingState;
Array<GeneratedToken>& EmittedTokens;
};
}



namespace fry_script
{
namespace token_generator
{
bool GenerateTokens( const char* Code, Array<GeneratedToken>& out_Tokens )
void BeginParseWhitespace( GeneratorState& State, const char* Code, size_t Idx )
{
GeneratedToken Tok;
size_t LastFoundIdx = 0;
}

Tok.Location = Code;
Tok.Length = 0;
void EndParseWhitespace( GeneratorState& State, const char* Code, size_t Idx )
{
}

size_t Idx = 0;
for( ; Idx < strlen( Code ); ++Idx )
void ParseWhitespace( GeneratorState& State, const char* Code, size_t Idx )
{
if( !IsWhitespace( Code[Idx] ) )
{
if( Code[Idx] == ' ' )
{
Tok.Length = Idx - LastFoundIdx;
ChangeState( State, PS_ParseToken, Code, Idx );
} }

array::push_back( out_Tokens, Tok );
void BeginParseToken( GeneratorState& State, const char* Code, size_t Idx )
{
State.LastFoundIdx = Idx;
}

void EndParseToken( GeneratorState& State, const char* Code, size_t Idx )
{
GeneratedToken Token;

Token.Length = Idx - State.LastFoundIdx;
Token.Location = &Code[State.LastFoundIdx];

array::push_back( State.EmittedTokens, Token );
}

void ParseToken( GeneratorState& State, const char* Code, size_t Idx )
{
if( IsWhitespace( Code[Idx] ) )
{
ChangeState( State, PS_ParseWhitespace, Code, Idx );
}
}

LastFoundIdx = Idx;
Tok.Location = &Code[Idx+1];
Tok.Length = 0;
}
void ChangeState( GeneratorState& State, EParsingState NewState, const char* Code, size_t Idx )
{
if( State.CurrentParsingState->EndState != NULL )
{
State.CurrentParsingState->EndState( State, Code, Idx );
}
State.CurrentParsingState = &TokenGenerationStates[NewState];
if( State.CurrentParsingState->BeginState != NULL )
{
State.CurrentParsingState->BeginState( State, Code, Idx );
}
}

Tok.Length = Idx - LastFoundIdx;
if( Tok.Length > 0 )
bool GenerateTokens( const char* Code, Array<GeneratedToken>& out_Tokens )
{
GeneratorState State( out_Tokens );

const size_t CodeLength = strlen( Code );
size_t Idx = 0;
for( ; Idx < CodeLength; ++Idx )
{
array::push_back( out_Tokens, Tok );
State.CurrentParsingState->ParseToken( State, Code, Idx );
}
ChangeState( State, PS_DONE, Code, Idx );

return true;
}

bool GetNextToken( TokenGenerator* Generator, Array<GeneratedToken>& Tokens, const char** out_NextToken )
{
size_t CurrentIndex = Generator->CurrentIndex++;
size_t CurrentIndex = Generator->_CurrentIndex++;
if( CurrentIndex >= array::size( Tokens ) )
{
Generator->CurrentIndex = -1;
memory::mem_zero( Generator->TempToken, sizeof( Generator->TempToken ) );
Generator->_CurrentIndex = -1;
memory::mem_zero( Generator->_TempToken, sizeof( Generator->_TempToken ) );

return false;
}

const GeneratedToken& CurrentToken = Tokens[CurrentIndex];

memcpy_s( Generator->TempToken, sizeof( Generator->TempToken ), CurrentToken.Location, CurrentToken.Length );
Generator->TempToken[CurrentToken.Length] = '\0';
*out_NextToken = Generator->TempToken;
memcpy_s( Generator->_TempToken, sizeof( Generator->_TempToken ), CurrentToken.Location, CurrentToken.Length );
Generator->_TempToken[CurrentToken.Length] = '\0';
*out_NextToken = Generator->_TempToken;

return true;
}

bool IsWhitespace( char Token )
{
return Token == ' ' || Token == '\t' || Token == '\n' || Token == '\r';
}
}
}
5 changes: 4 additions & 1 deletion Dev/src/fry_script/token_generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,11 @@ namespace fry_script
/// \param out_Tokens - the tokes generated from Code
bool GenerateTokens( const char* Code, foundation::Array<GeneratedToken>& out_Tokens );

/// Copies a single token
/// Copies the next token in Tokens to out_NextToken
bool GetNextToken( TokenGenerator* Generator, foundation::Array<GeneratedToken>& Tokens, const char** out_NextToken );

/// Returns true if the token is a whitespace
inline bool IsWhitespace( char Token );
}
}

Expand Down
12 changes: 7 additions & 5 deletions Dev/src/fry_script/token_generator_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
namespace fry_script
{
/// Largest size of a allowed token
const s32 MaxTokenSize = 255;
const s32 MAX_TOKEN_SIZE = 255;

/// Struct that describes the tokens generated from GenerateTokens
/// It contains a pointer into the char* Code that is sent into the
Expand All @@ -15,16 +15,18 @@ namespace fry_script
u32 Length;
};

/// The state of a TokenGenerator (for multithread support)
/// The state of a TokenGenerator (for multithreading support)
struct TokenGenerator
{
TokenGenerator() :
CurrentIndex( 0 )
_CurrentIndex( 0 )
{
}

int CurrentIndex;
char TempToken[MaxTokenSize];
/// When using GetNextToken, use this to know what the next token is
int _CurrentIndex;
/// Temporary buffer that holds the current token
char _TempToken[MAX_TOKEN_SIZE];
};
}

Expand Down
33 changes: 33 additions & 0 deletions Dev/src/fry_script_test/token_generator_tests.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,21 @@
using namespace fry_script;
using namespace memory;

/// Useful to set to 1 when we want to check what tokens have failed to parse
#define PRINT_TOKEN_COMPARISON 0

#if PRINT_TOKEN_COMPARISON
#include <iostream>
#endif

#if PRINT_TOKEN_COMPARISON
#define PRINT_COMPARISON( CURRENT_TOKEN, EXPECTED_RESULT ) \
{ \
std::cout << "NextToken='" << CURRENT_TOKEN << "' EXPECTED_RESULT='" << EXPECTED_RESULT << "'" << std::endl; \
}
#else
#define PRINT_COMPARISON( CURRENT_TOKEN, EXPECTED_RESULT )
#endif

/// Helper that compares a char*[] with a array<GeneratedToken>, so that we can run tests easily
#define CHECK_ARRAY_STRING_EQUAL( EXPECTED_RESULT, ACTUAL_RESULT ) \
Expand All @@ -19,6 +33,7 @@ using namespace memory;
size_t Idx = 0; \
while( token_generator::GetNextToken( &Generator, ACTUAL_RESULT, &NextToken ) ) \
{ \
PRINT_COMPARISON( NextToken, EXPECTED_RESULT[Idx] ); \
CHECK_EQUAL( 0, strcmp( NextToken, EXPECTED_RESULT[Idx++] ) ); \
} \
} \
Expand All @@ -36,6 +51,7 @@ struct TokenGeneratorCleanSetup
array::clear( GeneratedTokens );
}

TokenGenerator Generator;
Array<GeneratedToken> GeneratedTokens;
};

Expand Down Expand Up @@ -79,6 +95,7 @@ SUITE( TokenGenerator )
CHECK_ARRAY_STRING_EQUAL( ExpectedResult, GeneratedTokens );
}

// I know that my initial code is vulnerable to this, so make sure it won't come up again
TEST_FIXTURE( TokenGeneratorCleanSetup, SpaceAtEnd )
{
const char* Code = "FirstToken SecondToken ";
Expand All @@ -93,4 +110,20 @@ SUITE( TokenGenerator )
CHECK( Result );
CHECK_ARRAY_STRING_EQUAL( ExpectedResult, GeneratedTokens );
}

// I know that my initial code is vulnerable to this, so make sure it won't come up again
TEST_FIXTURE( TokenGeneratorCleanSetup, SpaceAtBeginning )
{
const char* Code = " FirstToken SecondToken";
const char* ExpectedResult[] =
{
"FirstToken",
"SecondToken"
};

bool Result = token_generator::GenerateTokens( Code, GeneratedTokens );

CHECK( Result );
CHECK_ARRAY_STRING_EQUAL( ExpectedResult, GeneratedTokens );
}
}

0 comments on commit 79f6059

Please sign in to comment.