Skip to content

Commit

Permalink
recognize GENERAL_PUNCTUATION unicode points as non-letters
Browse files Browse the repository at this point in the history
  • Loading branch information
scivey committed Nov 20, 2015
1 parent 7a4ca08 commit 4bebfab
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 7 deletions.
4 changes: 3 additions & 1 deletion src/libunicode/code_point_support.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ bool isLatin1SupplementLetter(uint32_t cp) {
if (cp == 215 || cp == 247) {
return false;
}
return false;

return true;
}

bool isMathematicalAlphanumericSymbolLetter(uint32_t cp) {
Expand All @@ -114,6 +115,7 @@ bool isLetterPoint(uint32_t cp, UnicodeBlock uBlock) {
case UnicodeBlock::LATIN_EXTENDED_D : return true;
case UnicodeBlock::LATIN_EXTENDED_E : return true;
case UnicodeBlock::LATIN_EXTENDED_ADDITIONAL : return true;
case UnicodeBlock::GENERAL_PUNCTUATION : return false;

case UnicodeBlock::ARROWS : return false;
case UnicodeBlock::BLOCK_ELEMENTS : return false;
Expand Down
26 changes: 25 additions & 1 deletion src/libunicode/test_unit/test_UnicodeBlock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,34 @@ using namespace relevanced;
using namespace relevanced::libunicode;
using namespace relevanced::util;

TEST(TestGetUnicodeBlock, Simple) {
TEST(TestGetUnicodeBlock, BasicLatin) {
uint32_t codepoint = 65; // 'A'
EXPECT_EQ(
UnicodeBlock::BASIC_LATIN,
getUnicodeBlock(codepoint)
);
}

TEST(TestGetUnicodeBlock, GeneralPunctuation) {
uint32_t codepoint = 8212; // unicode long dash
EXPECT_EQ(
UnicodeBlock::GENERAL_PUNCTUATION,
getUnicodeBlock(codepoint)
);
}

TEST(TestGetUnicodeBlock, Latin1Supplement) {
uint32_t codepoint = 250; // u with accent
EXPECT_EQ(
UnicodeBlock::LATIN_1_SUPPLEMENT,
getUnicodeBlock(codepoint)
);
}

TEST(TestGetUnicodeBlock, LatinExtendedA) {
uint32_t codepoint = 312; // an adorably small capital letter K
EXPECT_EQ(
UnicodeBlock::LATIN_EXTENDED_A,
getUnicodeBlock(codepoint)
);
}
31 changes: 26 additions & 5 deletions src/libunicode/test_unit/test_code_point_support.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,32 @@ using namespace relevanced;
using namespace relevanced::libunicode;
using namespace relevanced::util;

TEST(TestCodePointSupport, Simple) {
uint32_t codepoint = (uint32_t) ((unsigned char) 'a');
EXPECT_TRUE(isLetterPoint(codepoint));
TEST(TestIsLetterPoint, Simple) {
uint32_t codePoint = (uint32_t) ((unsigned char) 'a');
EXPECT_TRUE(isLetterPoint(codePoint));

codepoint = (uint32_t) ((unsigned char) '.');
EXPECT_FALSE(isLetterPoint(codepoint));
codePoint = (uint32_t) ((unsigned char) '.');
EXPECT_FALSE(isLetterPoint(codePoint));
}

TEST(TestIsLetterPoint, UnicodePunctuation) {
uint32_t codePoint = 8212; // unicode long dash
EXPECT_FALSE(isLetterPoint(codePoint));
}

TEST(TestIsLetterPoint, Numbers) {
for (unsigned char c = 0; c < 10; c++) {
uint32_t codePoint = c;
EXPECT_FALSE(isLetterPoint(codePoint));
}
}

TEST(TestIsLetterPoint, GermanLetter) {
uint32_t codePoint = 223; // Eszett, the big funny B
EXPECT_TRUE(isLetterPoint(codePoint));
}

TEST(TestIsLetterPoint, SpanishLetter) {
uint32_t codePoint = 209; // capital ene (n with tilde)
EXPECT_TRUE(isLetterPoint(codePoint));
}

0 comments on commit 4bebfab

Please sign in to comment.