Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support multiple charsets and custom charset #2

Merged
merged 4 commits into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 45 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ console.log(res1, res2);

## API

### wisely(options)
### `wisely(options)`

Returns a `string` with the obsfucated text.

Expand All @@ -46,41 +46,71 @@ Type: `object`

##### text

Type: `string`
Required: `true`
- Type: `string`
- Required: `true`

The text to be obscured.

##### phrases

Type: `string[]` \
Required: `false`
- Type: `string[]`
- Required: `false`

The specific phrases to be obscured. If not specified, the whole text will be obscured.

##### caseSensitive

Type: `boolean` \
Default: `false`
- Type: `boolean`
- Default: `false`

Whether to obscure in a case-sensitive manner.

##### charSet
##### charSets

Type: `string` \
Default: `'latin'` \
Values: `'latin'` | `'latin-1'`
- Type: `(string | object)[]`
- Default: `['latin']`

The character set that will be used for obfuscation.
The character set that will be used for obfuscation. Put the **name of the** [**built-in character sets**](#character-sets) or a **custom character set objects**.

> In the future, we will add support for more character sets to improve the variety of the obsfucated text. Also, we will add support to define custom character sets.
The valid custom character set object must be an object that **each key is a single character** and **each value is an array of single characters** that will be used to replace the key. See the example below.

```js
const customCharSet = {
a: ['@', '4'],
e: ['3'],
i: ['1', '!'],
o: ['0'],
s: ['5', '$'],
t: ['7'],
};
```

### `isCharSetValid(charSet)`

Returns a `boolean` whether the character set is valid.

#### charSet

Type: `object`

The character set that will be checked.

### `mergeCharSets(...charSets)`

Returns a merged character set object.

#### charSets

Type: `string | object`

The character set that will be merged. Put the **name of the** [**built-in character sets**](#character-sets) or a **custom character set objects**.

## Character Sets

Below is the built-in character sets available. See the details of each character set in the [charsets](./charsets) directory.

| `charSet` | Block Name | Block Range |
| ---- | --------- | ----- |
| `charSet` Name | Block Name | Block Range |
| --- | --- | --- |
| `latin` | [Basic Latin](https://unicodeplus.com/block/0000) | \u0000 - \u007f |
| `latin-1` | [Latin-1 Supplement](https://unicodeplus.com/block/0080) | \u0080 - \u00ff |

Expand Down
2 changes: 1 addition & 1 deletion charsets/latin-1.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"A": ["\u00c0", "\u00c1", "\u00c2", "\u00c3", "\u00c4", "\u00c5"],
"a": ["\u00e0", "\u00e1", "\u00e2", "\u00e3", "\u00e4", "\u00e5", "\u00aa"],
"a": ["\u00aa", "\u00e0", "\u00e1", "\u00e2", "\u00e3", "\u00e4", "\u00e5"],
"B": ["\u00df"],
"C": ["\u00a2", "\u00a9", "\u00c7"],
"c": ["\u00e7"],
Expand Down
55 changes: 48 additions & 7 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,66 @@ import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';

export type CharSetNames = 'latin' | 'latin-1';
export const CharSets = {
LATIN: 'latin',
LATIN_1: 'latin-1',
} as const;
export type CharSetNames = typeof CharSets[keyof typeof CharSets];
export type CharSet = Record<string, string[] | undefined>;

const dirname = path.dirname(fileURLToPath(import.meta.url));

function getCharSet(name: CharSetNames = 'latin'): CharSet {
// Validating the name
if (!Object.values(CharSets).includes(name)) {
throw new Error(`Invalid charSet name: ${name}`);
}

const strJson = fs.readFileSync(
path.resolve(dirname, `../charsets/${name}.json`),
{ encoding: 'utf8' },
);

return JSON.parse(strJson) as CharSet;
}

function getChar(char: string, charSet: CharSet, caseSensitive?: boolean) {
const upperReplacements = charSet[char.toUpperCase()] ?? [];
const lowerReplacements = charSet[char.toLowerCase()] ?? [];
export function isCharSetValid(charSet: CharSet): boolean {
return typeof charSet === 'object'
&& Object.keys(charSet).every((key) => key.length === 1)
&& Object.values(charSet).every((replacements) => (
Array.isArray(replacements)
&& replacements.every((char) => char.length === 1)
));
}

export function mergeCharSets(...charSets: (CharSetNames | CharSet)[]): CharSet {
const res: CharSet = {};

for (const charSet of charSets) {
const charSetObj = typeof charSet === 'string' ? getCharSet(charSet) : charSet;

// Validate the charSet
if (!isCharSetValid(charSetObj)) {
throw new Error('Invalid charSet: each key and value must be a single character');
}

for (const [key, replacements] of Object.entries(charSetObj)) {
res[key] = Array.from(new Set([
...(res[key] ?? []),
...(replacements ?? []),
])).sort();
}
}

return res;
}

function getChar(char: string, charSet: CharSet, caseSensitive?: boolean) {
const replacements = caseSensitive ? charSet[char] ?? []
: Array.from(new Set([...upperReplacements, ...lowerReplacements]));
: Array.from(new Set([
...(charSet[char.toUpperCase()] ?? []),
...(charSet[char.toLowerCase()] ?? []),
]));

if (!replacements.length) {
return char;
Expand All @@ -33,11 +74,11 @@ export type Options = {
text: string;
phrases?: string[];
caseSensitive?: boolean;
charSet?: CharSetNames;
charSets?: (CharSetNames | CharSet)[];
};

export default function wisely(options: Options): string {
const charSet = getCharSet(options.charSet);
const charSet = mergeCharSets(...(options.charSets ?? ['latin']));

const censor = (phrase: string): string => phrase.split('')
.map((char) => getChar(char, charSet, options.caseSensitive))
Expand Down
17 changes: 17 additions & 0 deletions test/charsets.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import fs from 'node:fs';
import path from 'node:path';
import { expect, test } from 'vitest';
import { CharSet, isCharSetValid } from '~/index.js';

test.each([
{ name: 'latin' },
{ name: 'latin-1' },
])('validate charSet: $name', ({ name }) => {
const strJson = fs.readFileSync(
path.resolve(__dirname, `../charsets/${name}.json`),
{ encoding: 'utf8' },
);

const charSet = JSON.parse(strJson) as CharSet;
expect(isCharSetValid(charSet)).toBe(true);
});
109 changes: 104 additions & 5 deletions test/index.spec.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,85 @@
/* eslint-disable @typescript-eslint/ban-ts-comment */
import { describe, expect, test } from 'vitest';
import wisely from '~/index.js';
import wisely, { Options, mergeCharSets } from '~/index.js';

describe('mergeCharSets', () => {
test('merge two built-in charSets', () => {
const mergedCharSet = mergeCharSets('latin', 'latin-1');

expect(mergedCharSet).toEqual(
expect.objectContaining({
A: ['4', '\u00c0', '\u00c1', '\u00c2', '\u00c3', '\u00c4', '\u00c5'],
a: ['@', '\u00aa', '\u00e0', '\u00e1', '\u00e2', '\u00e3', '\u00e4', '\u00e5'],
}),
);
});

test('merge built-in charSets with custom charSets', () => {
const customCharSet = { a: ['b', 'c'], x: ['y', 'z'] };

expect(mergeCharSets('latin', customCharSet)).toEqual(
expect.objectContaining({
A: ['4'],
a: ['@', 'b', 'c'],
x: ['y', 'z'],
Z: ['2'],
}),
);
});

test('merge two custom charSets', () => {
const charSet1 = { a: ['b', 'c'], x: ['y', 'z'] };
const charSet2 = { a: ['c', 'd', 'e'], X: ['Y', 'Z'] };

expect(mergeCharSets(charSet1, charSet2)).toEqual({
a: ['b', 'c', 'd', 'e'],
x: ['y', 'z'],
X: ['Y', 'Z'],
});
});

test('charSet order should not affect the result', () => {
const customCharSet = { a: ['4', '@'] };

expect(mergeCharSets('latin', 'latin-1')).toEqual(mergeCharSets('latin-1', 'latin'));
expect(mergeCharSets('latin', customCharSet)).toEqual(mergeCharSets(customCharSet, 'latin'));
});

test('merge three custom charSets', () => {
const charSet1 = { a: ['b', 'c'], x: ['y', 'z'] };
const charSet2 = { a: ['c', 'd', 'e'], X: ['Y', 'Z'] };
const charSet3 = { a: ['e', 'f', 'g'], A: ['B', 'C'] };

expect(mergeCharSets(charSet1, charSet2, charSet3)).toEqual({
a: ['b', 'c', 'd', 'e', 'f', 'g'],
A: ['B', 'C'],
x: ['y', 'z'],
X: ['Y', 'Z'],
});
});

test('duplicate built-in charSets names', () => {
expect(mergeCharSets('latin', 'latin')).toEqual(
expect.objectContaining({
A: ['4'], a: ['@'], B: ['8'], b: ['6'], Z: ['2'],
}),
);
});

test('unknown charSets names', () => {
// @ts-expect-error
expect(() => mergeCharSets('')).toThrow();
// @ts-expect-error
expect(() => mergeCharSets('x')).toThrow();
});

test('invalid custom charSets', () => {
expect(() => mergeCharSets({ aa: ['b', 'c', 'd'] })).toThrow();
expect(() => mergeCharSets({ a: ['bc'] })).toThrow();
expect(() => mergeCharSets({ a: ['b', 'c', ''] })).toThrow();
expect(() => mergeCharSets({ a: ['b', 'c', 'd', ''] })).toThrow();
});
});

describe('wisely', () => {
const text = 'Palestine will be free! Freedom is the right of ALL nations!';
Expand Down Expand Up @@ -55,10 +135,29 @@ describe('wisely', () => {
expect(wisely({ text, phrases: [] })).toEqual(text);
});

test.each([
{ testText: 'AaBbCcDdXxZz', contains: '\u00df\u00d7Zz', notContains: 'AaBbCcDdXx' },
])('with specific charSet (latin-1): $testText', ({ testText, contains, notContains }) => {
const result = wisely({ text: testText, charSet: 'latin-1' });
test.each<{ testText: string, charSets: Options['charSets'], contains: string, notContains: string }>([
{
charSets: ['latin-1'],
testText: 'AaBbCcDdXxZz',
contains: '\u00df\u00d7Zz',
notContains: 'AaBbCcDdXx',
},
{
charSets: ['latin', 'latin-1'],
testText: 'AaBbCcDdXxZz',
contains: '\u00d72',
notContains: 'AaBbCcDdXxZz',
},
{
charSets: [{ a: ['b', 'c'], x: ['y', 'z'] }],
testText: 'AaBbCcDdXxZz',
contains: 'BbCcDdZz',
notContains: 'AaXx',
},
])('with specific charSet $charSets: $testText', ({
testText, charSets, contains, notContains,
}) => {
const result = wisely({ text: testText, charSets });

contains.split('').forEach((char) => {
expect(result).contain(char);
Expand Down