-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: add c/cpp detect rule and tests for detector/tokenizer (#23)
- Loading branch information
Showing
5 changed files
with
228 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
import { describe, it, assert } from 'vitest'; | ||
import { detectLanguage } from '../../src'; | ||
|
||
// c language detection | ||
describe('c/cpp language detection', () => { | ||
it('case 1', () => { | ||
const code = ` | ||
#include <stdio.h> | ||
int main() { | ||
printf("hello world"); | ||
return 0; | ||
} | ||
`; | ||
const lang = detectLanguage(code); | ||
assert.include(['c', 'cpp'], lang); | ||
}); | ||
|
||
it('case 2', () => { | ||
const code = ` | ||
int main() { | ||
printf("hello world"); | ||
} | ||
`; | ||
const lang = detectLanguage(code); | ||
assert.include(['c', 'cpp'], lang); | ||
}); | ||
|
||
it('case 3', () => { | ||
const code = ` | ||
int foo() { | ||
int n; | ||
scanf("%d", &n); | ||
} | ||
`; | ||
const lang = detectLanguage(code); | ||
assert.include(['c', 'cpp'], lang); | ||
}); | ||
|
||
it('case 4', () => { | ||
const code = ` | ||
#include <iostream> | ||
template <typename T> | ||
T add(T a, T b) { | ||
return a + b; | ||
} | ||
int main() { | ||
std::cout << add<int>(10, 20); | ||
return 0; | ||
} | ||
`; | ||
const lang = detectLanguage(code); | ||
assert.equal('cpp', lang); | ||
}); | ||
|
||
it('case 5', () => { | ||
const code = ` | ||
#include <iostream> | ||
class Base { virtual void dummy() {} }; | ||
class Derived: public Base { int a; }; | ||
int main () { | ||
Base * b = new Base; | ||
Derived * d = dynamic_cast<Derived*>(b); | ||
if (d==nullptr) std::cout << "null"; | ||
else std::cout << "not null"; | ||
return 0; | ||
} | ||
`; | ||
const lang = detectLanguage(code); | ||
assert.equal('cpp', lang); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
import { describe, it, expect } from 'vitest'; | ||
import { tokenize } from '../../src/tokenizer'; | ||
import cppRules from '../../src/rules/cpp'; | ||
|
||
describe('c/cpp tokenizer', () => { | ||
it('can tokenize comment', () => { | ||
const tokens = tokenize( | ||
` | ||
// this is comment | ||
const str = "// not comment"; | ||
/* | ||
this is multiline comment | ||
*/ | ||
`, | ||
cppRules, | ||
); | ||
const commentTokens = tokens.filter((token) => token.kind === 'comment'); | ||
expect(commentTokens.length).toBe(2); | ||
expect( | ||
commentTokens.find((token) => token.value.includes('this is comment')), | ||
).not.toBeFalsy(); | ||
expect( | ||
commentTokens.find((token) => | ||
token.value.includes('this is multiline comment'), | ||
), | ||
).not.toBeFalsy(); | ||
}); | ||
|
||
it('can tokenize keyword', () => { | ||
const tokens = tokenize( | ||
` | ||
int main() { | ||
int num = 10; | ||
return 0; | ||
} | ||
`, | ||
cppRules, | ||
); | ||
const keywords = ['int', 'return']; | ||
keywords.forEach((keyword) => { | ||
expect( | ||
tokens.find( | ||
(token) => token.value === keyword && token.kind === 'keyword', | ||
), | ||
).not.toBeFalsy(); | ||
}); | ||
}); | ||
|
||
it('can tokenize number', () => { | ||
const tokens = tokenize( | ||
` | ||
int num = 123; | ||
`, | ||
cppRules, | ||
); | ||
const numberTokens = tokens.filter((token) => token.kind === 'number'); | ||
expect(numberTokens.find((token) => token.value === '123')).not.toBeFalsy(); | ||
}); | ||
|
||
it('can tokenize operator', () => { | ||
const tokens = tokenize( | ||
` | ||
int num = 1 + 2 - 3 * 4 / 5 % 6; | ||
`, | ||
cppRules, | ||
); | ||
const operators = ['=', '+', '-', '*', '/', '%']; | ||
const operatorTokens = tokens.filter((token) => token.kind === 'operator'); | ||
operators.forEach((operator) => { | ||
expect( | ||
operatorTokens.find((token) => token.value === operator), | ||
).not.toBeFalsy(); | ||
}); | ||
}); | ||
|
||
it('can tokenize function', () => { | ||
const tokens = tokenize( | ||
` | ||
int main() { | ||
// main | ||
} | ||
`, | ||
cppRules, | ||
); | ||
const functionNames = ['main']; | ||
const functionTokens = tokens.filter((token) => token.kind === 'function'); | ||
expect(functionTokens.length).toBe(functionNames.length); | ||
functionNames.forEach((name) => { | ||
expect( | ||
functionTokens.find((token) => token.value === name), | ||
).not.toBeFalsy(); | ||
}); | ||
}); | ||
|
||
it('can tokenize class', () => { | ||
const tokens = tokenize( | ||
` | ||
class MyClass { | ||
public: | ||
void greet() { | ||
std::cout << "Hello, World!"; | ||
} | ||
}; | ||
`, | ||
cppRules, | ||
); | ||
const classTokens = tokens.filter((token) => token.kind === 'class'); | ||
expect(classTokens.length).toBe(1); | ||
expect(classTokens[0].value).toBe('MyClass'); | ||
}); | ||
|
||
it('can tokenize string', () => { | ||
const tokens = tokenize( | ||
` | ||
#include <iostream> | ||
int main() { | ||
std::cout << "Hello, World!"; | ||
return 0; | ||
} | ||
`, | ||
cppRules, | ||
); | ||
const stringTokens = tokens.filter((token) => token.kind === 'string'); | ||
expect(stringTokens.length).toBe(1); | ||
expect(stringTokens[0].value).toBe('"Hello, World!"'); | ||
}); | ||
}); |