Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Char highlight #62

Merged
merged 5 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ end_of_line = lf
charset = utf-8

[*.js]
indent_size = 2
insert_final_newline = true
trim_trailing_whitespace = true

Expand All @@ -19,3 +20,6 @@ indent_style = space
indent_size = 4
insert_final_newline = true
trim_trailing_whitespace = true

[*.scss]
indent_size = 2
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ It is installable and autoloadable via [Composer](https://getcomposer.org/) as [
```json
{
"require": {
"bdk/debug": "3.0",
"bdk/debug": "^3.2",
}
}
```
Expand Down
4 changes: 4 additions & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@
"test" : [
"vendor/bin/phpunit -v"
],
"update-char-data": [
"bdk\\Debug\\Dev\\UpdateCharData::update"
],
"update-phpcs": [
"bdk\\Debug\\Dev\\ComposerScripts::updatePhpCsXml"
],
Expand All @@ -102,6 +105,7 @@
"coverage": "analyze code coverage",
"cs": "check coding standards",
"test": "run unit tests",
"update-char-data": "rebuild confusable char latest unicode data",
"update-phpcs": "update phpcs.xml.dist & phpcs.slevomat.xml",
"webserver": "run php's internal webserver"
},
Expand Down
166 changes: 166 additions & 0 deletions dev/UpdateCharData.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
<?php

namespace bdk\Debug\Dev;

/**
* Pull latest confusables.txt from unicode and save to php file
*
* @psalm-import-type charInfo from \bdk\Debug\Plugin\CharHighlight
*/
class UpdateCharData
{
/** @var string */
public static $filepathSrc = 'https://www.unicode.org/Public/security/latest/confusables.txt';

/** @var array<string, charInfo> */
protected $charData = array();

/**
* Update confusableData.php
*
* @return void
*/
public static function update()
{
$filepathOut = __DIR__ . '/../src/Debug/Dump/charData.php';
$comment = '/**
* This file is generated automatically from confusables.txt
* https://www.unicode.org/Public/security/latest/confusables.txt
*
* `composer run update-char-data`
*
* @phpcs:disable SlevomatCodingStandard.Arrays.AlphabeticallySortedByKeys
*/';
$php = '<?php // phpcs:ignore SlevomatCodingStandard.Files.FileLength' . "\n\n"
. \preg_replace('/^[ ]{12}/m', ' ', $comment) . "\n\n"
. 'return ' . self::varExportPretty(self::build()) . ";\n";
$php = \preg_replace_callback('/[\'"](.)[\'"] => /u', static function ($matches) {
$char = $matches[1];
$codePoint = \mb_ord($char);
return $codePoint < 0x80
? '"\\x' . \dechex($codePoint) . '" => '
: '\'' . $char . '\' => ';
}, $php);
\file_put_contents($filepathOut, $php);
}

/**
* Build char data
*
* @return array<string, array<string, string|bool>>
*/
public static function build()
{
$rows = self::getParsedRows();

// only interested in chars that are confusable with an ascii char
// not interested in ascii chars that are confusable with other ascii chars
$rows = \array_filter($rows, static function ($row) {
$isCharAAscii = \strlen($row['charA']) === 1 && \ord($row['charA']) < 0x80;
$isCharBAscii = \strlen($row['charB']) === 1 && \ord($row['charB']) < 0x80;
return $isCharAAscii === false && $isCharBAscii;
});

\usort($rows, static function ($rowA, $rowB) {
return \strcmp($rowA['charA'], $rowB['charA']);
});

// rekey
$rowsNew = require __DIR__ . '/charData.php';
foreach ($rows as $row) {
$key = $row['charA'];
if (isset($rowsNew[$key])) {
continue;
}
unset($row['charA']);
$rowsNew[$key] = array(
'codePoint' => $row['charACodePoint'],
'desc' => $row['charADesc'],
'similarTo' => $row['charB'],
);
}

\ksort($rowsNew);

return $rowsNew;
}

/**
* Return parsed data for all confusable data
*
* @return array<string, string|bool>[]
*/
private static function getParsedRows()
{
$rows = \file(self::$filepathSrc);
$rows = \array_filter($rows, static function ($row) {
$isEmptyOrComment = \strlen(\trim($row)) === 0 || $row[0] === '#';
return $isEmptyOrComment === false;
});

return \array_map(static function ($row) {
return self::parseRow($row);
}, $rows);
}

/**
* Parse confusable.txt row
*
* @param string $row non-comment row from data file
*
* @return array<string,mixed>
*/
protected static function parseRow($row)
{
$parts = \explode('; ', $row, 3);
$parts = \array_map('trim', $parts);
$parts = \array_combine(array('charACodePoint', 'charBCodePoint', 'comment'), $parts);

$parts['charACodePoint'] = \implode(' ', \array_map(static function ($codePoint) {
// remove leading 00 pairs
return \preg_replace('/^(00)+/', '', $codePoint);
}, \explode(' ', $parts['charACodePoint'])));

$parts['charBCodePoint'] = \implode(' ', \array_map(static function ($codePoint) {
// remove leading 00 pairs
return \preg_replace('/^(00)+/', '', $codePoint);
}, \explode(' ', $parts['charBCodePoint'])));

\preg_match('/^(?P<category>\w+)\t#(?P<notXid>\*?)\s*(?P<example>\(.*?\))\s*(?P<charADesc>.*?) → (?P<charBDesc>.*?)(\s+#.*)?$/u', $parts['comment'], $matches);
$parts = \array_merge($parts, $matches);

return array(
'charA' => \implode('', \array_map(static function ($hex) {
$codePoint = \hexdec($hex);
return \mb_chr($codePoint, 'UTF-8');
}, \explode(' ', $parts['charACodePoint']))),
'charACodePoint' => $parts['charACodePoint'],
'charADesc' => $parts['charADesc'],

'charB' => \implode('', \array_map(static function ($hex) {
$codePoint = \hexdec($hex);
return \mb_chr($codePoint, 'UTF-8');
}, \explode(' ', $parts['charBCodePoint']))),
'isXid' => empty($parts['notXid']),
);
}

/**
* export value as valid php
*
* @param mixed $val Value to export
*
* @return string
*/
protected static function varExportPretty($val)
{
$php = \var_export($val, true);
$php = \str_replace('array (', 'array(', $php);
$php = \preg_replace('/=> \n\s+array/', '=> array', $php);
$php = \preg_replace_callback('/^(\s*)/m', static function ($matches) {
return \str_repeat($matches[1], 2);
}, $php);
$php = \str_replace('\'\' . "\0" . \'\'', '"\x00"', $php);
return $php;
}
}
Loading