diff --git a/.github/workflows/make-check.yml b/.github/workflows/make-check.yml index 9f420fe72..f4b83d409 100644 --- a/.github/workflows/make-check.yml +++ b/.github/workflows/make-check.yml @@ -60,7 +60,7 @@ jobs: --verbose --no-interactive --with-develop --with-feature=Data::Password --with-feature=ldap - --with-feature=safe-unicode --with-feature=smime + --with-feature=smime --with-feature=soap --with-feature=sqlite ${{ startsWith(matrix.os, 'macos') && '--with-feature=macos' || '' }} - name: Run tests diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index af24d8a83..4a71b5ea3 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -7,7 +7,7 @@ - . ~/bashrc - coverage-install - coverage-setup - - cpanm --quiet --notest --installdeps --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=safe-unicode --with-feature=smime --with-feature=soap --with-feature=sqlite . + - cpanm --quiet --notest --installdeps --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=smime --with-feature=soap --with-feature=sqlite . - autoreconf -i - ./configure - cd src; make; cd .. diff --git a/.travis.yml b/.travis.yml index 494137317..c03b076ed 100644 --- a/.travis.yml +++ b/.travis.yml @@ -25,7 +25,7 @@ before_install: install: - cpan-install --coverage - - cpanm --installdeps --notest --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=safe-unicode --with-feature=smime --with-feature=soap --with-feature=sqlite . + - cpanm --installdeps --notest --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=smime --with-feature=soap --with-feature=sqlite . before_script: - coverage-setup diff --git a/cpanfile b/cpanfile index a6246c39c..01959b611 100644 --- a/cpanfile +++ b/cpanfile @@ -150,6 +150,14 @@ requires 'Time::HiRes', '>= 1.29'; # Used to get Unix time from local time requires 'Time::Local', '>= 1.23'; +# Normalizes file names represented by Unicode. +# Note: Perl 5.8.1 bundles version 0.23. +# Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0). +requires 'Unicode::Normalize', '>= 1.03'; + +# Sanitizes inputs with Unicode text. +requires 'Unicode::UTF8', '>= 0.58'; + # Used to create URI containing non URI-canonical characters. # Note: '3.28' is the version included in URI-1.35. requires 'URI::Escape', '>= 3.28'; @@ -190,13 +198,6 @@ recommends 'Net::DNS', '>= 0.65'; # This is required if you set "list_check_smtp" sympa.conf parameter, used to check existing aliases before mailing list creation. recommends 'Net::SMTP'; -# Normalizes file names represented by Unicode -# Note: Perl 5.8.1 bundles version 0.23. -# Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0). -recommends 'Unicode::Normalize', '>= 1.03'; - -recommends 'Unicode::UTF8', '>= 0.60'; - ### Features ## @@ -324,10 +325,9 @@ feature 'soap', 'Required if you want to run the Sympa SOAP server that provides }; feature 'safe-unicode', 'Sanitizes inputs with Unicode text.' => sub { - # Note: Perl 5.8.1 bundles version 0.23. - # Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0). - requires 'Unicode::Normalize', '>= 1.03'; - requires 'Unicode::UTF8', '>= 0.60'; + # Note: These became required (>=6.2.73b). + #requires 'Unicode::Normalize', '>= 1.03'; + #requires 'Unicode::UTF8', '>= 0.58'; }; on 'test' => sub { diff --git a/src/cgi/wwsympa.fcgi.in b/src/cgi/wwsympa.fcgi.in index e99579f95..e88d9275a 100644 --- a/src/cgi/wwsympa.fcgi.in +++ b/src/cgi/wwsympa.fcgi.in @@ -39,6 +39,7 @@ use IO::File qw(); use MIME::EncWords; use MIME::Lite::HTML; use POSIX qw(); +use Unicode::UTF8; use URI; use Data::Dumper; # tentative @@ -1057,6 +1058,15 @@ while ($query = Sympa::WWW::FastCGI->new) { ## Get params in a hash %in = $query->Vars; + while (my ($k, $v) = each %in) { + next if ref $v; + next if Encode::is_utf8($v); + unless (Unicode::UTF8::valid_utf8($v)) { + $log->syslog('err', 'Parameter in invalid UTF-8 %s="%s": Ignored', + $k, sprintf("\\x%*v02X", "\\x", $v)); + delete $in{$k}; + } + } # Determin robot. $robot = $ENV{SYMPA_DOMAIN}; @@ -1868,24 +1878,28 @@ sub _split_params { } if (@params) { - $in{'action'} = $params[0]; + $in{'action'} = shift @params; my @args = @{$action_args{$in{'action'}} // $action_args{'default'}}; - my $i = 1; foreach my $p (@args) { - my $pname; - ## More than 1 param + my ($k, $v); if ($p =~ /^\@(\w+)$/) { - $pname = $1; - $in{$pname} = join '/', @params[$i .. $#params]; - $in{$pname} .= '/' if $ending_slash; - last; + $k = $1; + $v = join '/', @params; + $v .= '/' if $ending_slash; } else { - $pname = $p; - $in{$pname} = $params[$i]; + $k = $p; + $v = shift @params; + } + $in{$k} = $v; + + unless (Encode::is_utf8($v) or Unicode::UTF8::valid_utf8($v)) { + $log->syslog('err', + 'Parameter in invalid UTF-8 %s="%s": Ignored', + $k, sprintf("\\x%*v02X", "\\x", $v)); + delete $in{$k}; } - wwslog('debug', 'Incoming parameter: %s=%s', $pname, $in{$pname}); - $i++; + last if 0 == index $p, '@'; } } } diff --git a/src/lib/Sympa/Tools/Text.pm b/src/lib/Sympa/Tools/Text.pm index fc240fe32..e72b510fc 100644 --- a/src/lib/Sympa/Tools/Text.pm +++ b/src/lib/Sympa/Tools/Text.pm @@ -39,8 +39,8 @@ use MIME::EncWords; use Text::LineFold; use Unicode::GCString; use URI::Escape qw(); -BEGIN { eval 'use Unicode::Normalize qw()'; } -BEGIN { eval 'use Unicode::UTF8 qw()'; } +use Unicode::Normalize qw(); +use Unicode::UTF8; use Sympa::Language; use Sympa::Regexps; @@ -141,15 +141,11 @@ sub canonic_text { my $utext; if (Encode::is_utf8($text)) { $utext = $text; - } elsif ($Unicode::UTF8::VERSION) { + } else { no warnings 'utf8'; $utext = Unicode::UTF8::decode_utf8($text); - } else { - $utext = Encode::decode_utf8($text); - } - if ($Unicode::Normalize::VERSION) { - $utext = Unicode::Normalize::normalize('NFC', $utext); } + $utext = Unicode::Normalize::normalize('NFC', $utext); # Remove DOS linefeeds (^M) that cause problems with Outlook 98, AOL, # and EIMS: @@ -313,13 +309,8 @@ sub guessed_to_utf8 { and length $text and $text =~ /[^\x00-\x7F]/; - my $utf8; - if ($Unicode::UTF8::VERSION) { - $utf8 = Unicode::UTF8::decode_utf8($text) - if Unicode::UTF8::valid_utf8($text); - } else { - $utf8 = eval { Encode::decode_utf8($text, Encode::FB_CROAK()) }; - } + my $utf8 = Unicode::UTF8::decode_utf8($text) + if Unicode::UTF8::valid_utf8($text); unless (defined $utf8) { foreach my $charset (map { $_ ? @$_ : () } @legacy_charsets{@langs}) { $utf8 = @@ -332,8 +323,7 @@ sub guessed_to_utf8 { } # Apply NFC: e.g. for modified-NFD by Mac OS X. - $utf8 = Unicode::Normalize::normalize('NFC', $utf8) - if $Unicode::Normalize::VERSION; + $utf8 = Unicode::Normalize::normalize('NFC', $utf8); return Encode::encode_utf8($utf8); } diff --git a/t/Tools_Text.t b/t/Tools_Text.t index ff08480fd..642a65f29 100644 --- a/t/Tools_Text.t +++ b/t/Tools_Text.t @@ -46,18 +46,13 @@ is $dec, $unicode_email, 'decode_filesystem_safe, Unicode'; # ToDo: foldcase() # ToDo: wrap_text() -SKIP: { - skip 'Unicode::Normalize and Unicode::UTF8 required.' - unless $Unicode::Normalize::VERSION and $Unicode::UTF8::VERSION; - - # Noncharacters: U+D800, U+10FFE, U+110000, U+200000 - is Sympa::Tools::Text::canonic_text( - "\xED\xA0\x80\n\xF4\x8F\xBF\xBE\n\xF4\x90\x80\x80\n\xF8\x88\x80\x80\x80\n" - ), - Encode::encode_utf8( - "\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n" - ), - 'canonic_text'; -} +# Noncharacters: U+D800, U+10FFE, U+110000, U+200000 +is Sympa::Tools::Text::canonic_text( + "\xED\xA0\x80\n\xF4\x8F\xBF\xBE\n\xF4\x90\x80\x80\n\xF8\x88\x80\x80\x80\n" + ), + Encode::encode_utf8( + "\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n" + ), + 'canonic_text'; done_testing();