Skip to content

Commit

Permalink
feat: full sentence
Browse files Browse the repository at this point in the history
  • Loading branch information
overtrue committed Oct 27, 2023
1 parent fc79c87 commit 8f00a9d
Show file tree
Hide file tree
Showing 11 changed files with 91 additions and 37 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
/tests export-ignore
/vendor export-ignore
/sources export-ignore
/benchmark export-ignore
/bin/build export-ignore
/bin/utils.php export-ignore
.gitattributes export-ignore
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ echo Pinyin::sentence('带着希望去旅行,比到达终点更美好');
echo Pinyin::sentence('带着希望去旅行,比到达终点更美好', 'none');
// dai zhe xi wang qu lv xing , bi dao da zhong dian geng mei hao

// 保留所有其他字符
echo Pinyin::sentenceFull('ル是片假名,π是希腊字母', 'none');
// 保留所有非汉字字符
echo Pinyin::fullSentence('ル是片假名,π是希腊字母', 'none');
// ル shi pian jia ming ,π shi xi la zi mu
```

Expand Down
File renamed without changes.
8 changes: 5 additions & 3 deletions tests/benchmark.php → benchmark/run.php
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
<?php

require __DIR__.'/../vendor/autoload.php';
require __DIR__ . '/../vendor/autoload.php';

use Overtrue\Pinyin\Pinyin;

use function Termwind\{render};

$totalStart = microtime(true);
$text = file_get_contents(__DIR__.'/input.txt');
$text = file_get_contents(__DIR__ . '/input.txt');

$html = [];
foreach (['name', 'phrase', 'permalink', 'polyphones', 'chars', 'nameAbbr', 'abbr', 'sentence'] as $method) {
$methods = ['sentence','fullSentence','name','passportName','phrase','permalink','polyphones','chars','abbr','nameAbbr'];

foreach ($methods as $method) {
$start = microtime(true);
$result = call_user_func(Pinyin::class.'::'.$method, $text);
$usage = round(microtime(true) - $start, 5) * 1000;
Expand Down
40 changes: 17 additions & 23 deletions bin/pinyin
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,27 @@
<?php

require __DIR__ . '/../vendor/autoload.php';
require __DIR__ . '/utils.php';

use Overtrue\Pinyin\Pinyin;

$input = $argv[1] ?? null;
$methods = explode(',', 'name,phrase,permalink,polyphones,chars,nameAbbr,abbr,sentence');
$methods = ['sentence','fullSentence','name','passportName','phrase','permalink','polyphones','chars','abbr','nameAbbr'];
$method = 'sentence';
$inputOptions = [];
$methodAsString = implode('/', $methods);

$help = <<<"HELP"
Usage:
./pinyin [chinese] [method] [options]
Options:
-j, --json 输出 JSON 格式.
-c, --compact 不格式化输出 JSON.
-m, --method=[method] 转换方式,可选:sentence/sentenceFull/permalink/abbr/nameAbbr/name/passportName/phrase/polyphones/chars.
-m, --method=[method] 转换方式,可选:{$methodAsString}.
--no-tone 不使用音调.
--tone-style=[style] 音调风格,可选值:symbol/none/number, default: none.
-h, --help 显示帮助.
HELP;

foreach ($argv as $i => $arg) {
if ($i === 0) {
continue;
}
if (in_array($arg, $methods)) {
$method = $arg;
} elseif (str_starts_with($arg, '-')) {
[$key, $value] = array_pad(array_map('trim', explode('=', $arg, 2)), 2, null);
$inputOptions[$key] = $value;
}
}

function has_option($option, $alias = null): bool
{
global $inputOptions;
Expand All @@ -56,25 +45,30 @@ function get_option($option, $default = null, $alias = null): ?string
return $inputOptions[$option] ?? $default;
}

if (empty($input) || has_option('--help', '-h')) {
$inputOptions = parse_options($argv);
$input = $inputOptions[0] ?? null;

if (empty($input) || has_option('help', 'h')) {
echo $help;
exit(0);
}

if (has_option('--method', '-m')) {
$method = get_option('--method');
}
$method = get_option('method', $method, 'm');
$toneStyle = has_option('no-tone') ? 'none' : get_option('tone-style', 'none');

$toneStyle = has_option('--no-tone') ? 'none' : get_option('--tone-style', 'none');
if (! in_array($method, $methods)) {
echo "Method '{$method}' is not supported.\n";
exit(1);
}

$result = Pinyin::$method($input, $method === 'permalink' ? '-' : $toneStyle);

$toJson = has_option('--json', '-j') || in_array($method, ['polyphones']);
$toJson = has_option('json', 'j') || in_array($method, ['polyphones']);

if ($toJson) {
$options = JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT;

if (has_option('--compact', '-c')) {
if (has_option('compact', 'c')) {
$options = 0;
}

Expand Down
50 changes: 50 additions & 0 deletions bin/utils.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,53 @@ function parse_words(string $path, callable $fn = null): Generator
}
}
}

function parse_options(array $argv): array
{
$inputOptions = [];
$currentOption = null;

for ($i = 1; $i < count($argv); $i++) {
$arg = $argv[$i];

if (str_starts_with($arg, '--')) {
// 长选项
$option = substr($arg, 2);
if (str_contains($option, '=')) {
[$option, $value] = explode('=', $option, 2);
$inputOptions[$option] = $value;
} else {
$inputOptions[$option] = true;
$currentOption = $option;
}
} elseif (str_starts_with($arg, '-')) {
// 短选项
$option = substr($arg, 1);
if (strlen($option) > 1 && $option[1] !== '=') {
// 多个短选项,如 -abc
for ($j = 0; $j < strlen($option); $j++) {
$inputOptions[$option[$j]] = true;
}
} else {
// 单个短选项,如 -a 或 -a=value
if (str_contains($option, '=')) {
[$option, $value] = explode('=', $option, 2);
$inputOptions[$option] = $value;
} else {
$inputOptions[$option] = true;
$currentOption = $option;
}
}
} else {
// 参数值
if ($currentOption !== null) {
$inputOptions[$currentOption] = $arg;
$currentOption = null;
} else {
$inputOptions[] = $arg;
}
}
}

return $inputOptions;
}
3 changes: 2 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@
"pint": "vendor/bin/pint ./src ./tests",
"fix-style": "vendor/bin/pint ./src ./tests",
"test": "vendor/bin/phpunit --colors=always",
"build": "php ./bin/build"
"build": "php ./bin/build",
"benchmark": "php ./benchmark/run.php"
},
"scripts-descriptions": {
"test": "Run all tests.",
Expand Down
8 changes: 4 additions & 4 deletions src/Converter.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class Converter

protected bool $noWords = false;

protected bool $keepOtherCharacters = false;
protected bool $cleanup = true;

protected string $yuTo = 'v';

Expand Down Expand Up @@ -79,9 +79,9 @@ public function noWords(): static
return $this;
}

public function keepOtherCharacters(): static
public function noCleanup(): static
{
$this->keepOtherCharacters = true;
$this->cleanup = false;

return $this;
}
Expand Down Expand Up @@ -174,7 +174,7 @@ public function convert(string $string, callable $beforeSplit = null): Collectio
}, $string);

// 过滤掉不保留的字符
if (! $this->keepOtherCharacters) {
if ($this->cleanup) {
$string = \preg_replace(\sprintf('~[^%s]~u', \implode($this->regexps)), '', $string);
}

Expand Down
5 changes: 3 additions & 2 deletions src/Pinyin.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
* @method static Converter onlyHans()
* @method static Converter noAlpha()
* @method static Converter noNumber()
* @method static Converter noCleanup()
* @method static Converter noPunctuation()
* @method static Converter noTone()
* @method static Converter useNumberTone()
Expand Down Expand Up @@ -41,9 +42,9 @@ public static function sentence(string $string, string $toneStyle = Converter::T
return self::withToneStyle($toneStyle)->convert($string);
}

public static function sentenceFull(string $string, string $toneStyle = Converter::TONE_STYLE_SYMBOL): Collection
public static function fullSentence(string $string, string $toneStyle = Converter::TONE_STYLE_SYMBOL): Collection
{
return self::keepOtherCharacters()->withToneStyle($toneStyle)->convert($string);
return self::noCleanup()->withToneStyle($toneStyle)->convert($string);
}

public static function polyphones(string $string, string $toneStyle = Converter::TONE_STYLE_SYMBOL, bool $asList = false): Collection
Expand Down
5 changes: 5 additions & 0 deletions tests/ConverterTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ public function test_noPunctuation()
$this->assertPinyin('123 dài zhe xī 456 wàng qù lǚ xíng 789', Converter::make()->noPunctuation()->convert('123带着希456望去旅行789'));
}

public function test_noCleanup()
{
$this->assertPinyin('dài zhe ( xī wàng ) qù lǚ xíng , bǐ dào dá zhōng diǎn gèng kuài lè 。ル shì piàn jiǎ míng ,π shì xī là zì mǔ 。', Converter::make()->noCleanup()->convert('带着(希望)去旅行,比到达终点更快乐。ル是片假名,π是希腊字母。'));
}

public function test_tone_style()
{
$this->assertPinyin('chóng qìng', Converter::make()->convert('重庆'));
Expand Down
4 changes: 2 additions & 2 deletions tests/PinyinTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -268,9 +268,9 @@ public function test_sentence()
$this->assertPinyin('java gōng chéng shī', Pinyin::sentence('java工程师'));
}

public function test_sentenceFull()
public function test_fullSentence()
{
$this->assertPinyin('ル shì piàn jiǎ míng ,π shì xī là zì mǔ', Pinyin::sentenceFull('ル是片假名,π是希腊字母'));
$this->assertPinyin('ル shì piàn jiǎ # míng ,π shì xī là zì …… ', Pinyin::fullSentence('ル是片。假#名,π是希腊字……母'));
}

public function test_issues()
Expand Down

0 comments on commit 8f00a9d

Please sign in to comment.