Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make Unicode::UTF8 mandatory #1906

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/make-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:
--verbose --no-interactive
--with-develop
--with-feature=Data::Password --with-feature=ldap
--with-feature=safe-unicode --with-feature=smime
--with-feature=smime
--with-feature=soap --with-feature=sqlite
${{ startsWith(matrix.os, 'macos') && '--with-feature=macos' || '' }}
- name: Run tests
Expand Down
2 changes: 1 addition & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
- . ~/bashrc
- coverage-install
- coverage-setup
- cpanm --quiet --notest --installdeps --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=safe-unicode --with-feature=smime --with-feature=soap --with-feature=sqlite .
- cpanm --quiet --notest --installdeps --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=smime --with-feature=soap --with-feature=sqlite .
- autoreconf -i
- ./configure
- cd src; make; cd ..
Expand Down
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ before_install:

install:
- cpan-install --coverage
- cpanm --installdeps --notest --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=safe-unicode --with-feature=smime --with-feature=soap --with-feature=sqlite .
- cpanm --installdeps --notest --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=smime --with-feature=soap --with-feature=sqlite .

before_script:
- coverage-setup
Expand Down
22 changes: 11 additions & 11 deletions cpanfile
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,14 @@ requires 'Time::HiRes', '>= 1.29';
# Used to get Unix time from local time
requires 'Time::Local', '>= 1.23';

# Normalizes file names represented by Unicode.
# Note: Perl 5.8.1 bundles version 0.23.
# Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0).
requires 'Unicode::Normalize', '>= 1.03';

# Sanitizes inputs with Unicode text.
requires 'Unicode::UTF8', '>= 0.58';

# Used to create URI containing non URI-canonical characters.
# Note: '3.28' is the version included in URI-1.35.
requires 'URI::Escape', '>= 3.28';
Expand Down Expand Up @@ -190,13 +198,6 @@ recommends 'Net::DNS', '>= 0.65';
# This is required if you set "list_check_smtp" sympa.conf parameter, used to check existing aliases before mailing list creation.
recommends 'Net::SMTP';

# Normalizes file names represented by Unicode
# Note: Perl 5.8.1 bundles version 0.23.
# Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0).
recommends 'Unicode::Normalize', '>= 1.03';

recommends 'Unicode::UTF8', '>= 0.60';

### Features
##

Expand Down Expand Up @@ -324,10 +325,9 @@ feature 'soap', 'Required if you want to run the Sympa SOAP server that provides
};

feature 'safe-unicode', 'Sanitizes inputs with Unicode text.' => sub {
# Note: Perl 5.8.1 bundles version 0.23.
# Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0).
requires 'Unicode::Normalize', '>= 1.03';
requires 'Unicode::UTF8', '>= 0.60';
# Note: These became required (>=6.2.73b).
#requires 'Unicode::Normalize', '>= 1.03';
#requires 'Unicode::UTF8', '>= 0.58';
};

on 'test' => sub {
Expand Down
18 changes: 2 additions & 16 deletions src/cgi/wwsympa.fcgi.in
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ use IO::File qw();
use MIME::EncWords;
use MIME::Lite::HTML;
use POSIX qw();
use Unicode::UTF8;
use URI;
use Data::Dumper; # tentative

Expand All @@ -52,10 +53,6 @@ BEGIN {
and $Archive::Zip::SimpleZip::VERSION;
}

BEGIN {
eval 'use Unicode::UTF8 qw()';
}

use Sympa;
use Sympa::Archive;
use Conf;
Expand Down Expand Up @@ -1063,18 +1060,7 @@ while ($query = Sympa::WWW::FastCGI->new) {
while (my ($k, $v) = each %in) {
next if ref $v;
next if Encode::is_utf8($v);

my $valid_utf8;
if ($Unicode::UTF8::VERSION) {
$valid_utf8 = Unicode::UTF8::valid_utf8($v);
} else {
eval {
my $u = $v;
Encode::decode('UTF-8', $u, Encode::FB_CROAK());
$valid_utf8 = 1;
};
}
unless ($valid_utf8) {
unless (Unicode::UTF8::valid_utf8($v)) {
$log->syslog('err', 'Parameter in invalid UTF-8 %s="%s": Ignored',
$k, sprintf("\\x%*v02X", "\\x", $v));
delete $in{$k};
Expand Down
24 changes: 7 additions & 17 deletions src/lib/Sympa/Tools/Text.pm
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ use MIME::EncWords;
use Text::LineFold;
use Unicode::GCString;
use URI::Escape qw();
BEGIN { eval 'use Unicode::Normalize qw()'; }
BEGIN { eval 'use Unicode::UTF8 qw()'; }
use Unicode::Normalize qw();
use Unicode::UTF8;

use Sympa::Language;
use Sympa::Regexps;
Expand Down Expand Up @@ -141,15 +141,11 @@ sub canonic_text {
my $utext;
if (Encode::is_utf8($text)) {
$utext = $text;
} elsif ($Unicode::UTF8::VERSION) {
} else {
no warnings 'utf8';
$utext = Unicode::UTF8::decode_utf8($text);
} else {
$utext = Encode::decode_utf8($text);
}
if ($Unicode::Normalize::VERSION) {
$utext = Unicode::Normalize::normalize('NFC', $utext);
}
$utext = Unicode::Normalize::normalize('NFC', $utext);

# Remove DOS linefeeds (^M) that cause problems with Outlook 98, AOL,
# and EIMS:
Expand Down Expand Up @@ -313,13 +309,8 @@ sub guessed_to_utf8 {
and length $text
and $text =~ /[^\x00-\x7F]/;

my $utf8;
if ($Unicode::UTF8::VERSION) {
$utf8 = Unicode::UTF8::decode_utf8($text)
if Unicode::UTF8::valid_utf8($text);
} else {
$utf8 = eval { Encode::decode_utf8($text, Encode::FB_CROAK()) };
}
my $utf8 = Unicode::UTF8::decode_utf8($text)
if Unicode::UTF8::valid_utf8($text);
unless (defined $utf8) {
foreach my $charset (map { $_ ? @$_ : () } @legacy_charsets{@langs}) {
$utf8 =
Expand All @@ -332,8 +323,7 @@ sub guessed_to_utf8 {
}

# Apply NFC: e.g. for modified-NFD by Mac OS X.
$utf8 = Unicode::Normalize::normalize('NFC', $utf8)
if $Unicode::Normalize::VERSION;
$utf8 = Unicode::Normalize::normalize('NFC', $utf8);

return Encode::encode_utf8($utf8);
}
Expand Down
21 changes: 8 additions & 13 deletions t/Tools_Text.t
Original file line number Diff line number Diff line change
Expand Up @@ -46,18 +46,13 @@ is $dec, $unicode_email, 'decode_filesystem_safe, Unicode';
# ToDo: foldcase()
# ToDo: wrap_text()

SKIP: {
skip 'Unicode::Normalize and Unicode::UTF8 required.'
unless $Unicode::Normalize::VERSION and $Unicode::UTF8::VERSION;

# Noncharacters: U+D800, U+10FFE, U+110000, U+200000
is Sympa::Tools::Text::canonic_text(
"\xED\xA0\x80\n\xF4\x8F\xBF\xBE\n\xF4\x90\x80\x80\n\xF8\x88\x80\x80\x80\n"
),
Encode::encode_utf8(
"\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n"
),
'canonic_text';
}
# Noncharacters: U+D800, U+10FFE, U+110000, U+200000
is Sympa::Tools::Text::canonic_text(
"\xED\xA0\x80\n\xF4\x8F\xBF\xBE\n\xF4\x90\x80\x80\n\xF8\x88\x80\x80\x80\n"
),
Encode::encode_utf8(
"\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n"
),
'canonic_text';

done_testing();
Loading