Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WWSympa: Invalid UTF-8 sequences in input may trigger crashing (#1884) #1885

Merged
merged 3 commits into from
Sep 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/make-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
--verbose --no-interactive
--with-develop
--with-feature=Data::Password --with-feature=ldap
--with-feature=safe-unicode --with-feature=smime
--with-feature=smime
--with-feature=soap --with-feature=sqlite
${{ startsWith(matrix.os, 'macos') && '--with-feature=macos' || '' }}
- name: Run tests
Expand Down
2 changes: 1 addition & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
- . ~/bashrc
- coverage-install
- coverage-setup
- cpanm --quiet --notest --installdeps --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=safe-unicode --with-feature=smime --with-feature=soap --with-feature=sqlite .
- cpanm --quiet --notest --installdeps --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=smime --with-feature=soap --with-feature=sqlite .
- autoreconf -i
- ./configure
- cd src; make; cd ..
Expand Down
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ before_install:

install:
- cpan-install --coverage
- cpanm --installdeps --notest --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=safe-unicode --with-feature=smime --with-feature=soap --with-feature=sqlite .
- cpanm --installdeps --notest --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=smime --with-feature=soap --with-feature=sqlite .

before_script:
- coverage-setup
Expand Down
22 changes: 11 additions & 11 deletions cpanfile
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,14 @@ requires 'Time::HiRes', '>= 1.29';
# Used to get Unix time from local time
requires 'Time::Local', '>= 1.23';

# Normalizes file names represented by Unicode.
# Note: Perl 5.8.1 bundles version 0.23.
# Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0).
requires 'Unicode::Normalize', '>= 1.03';

# Sanitizes inputs with Unicode text.
requires 'Unicode::UTF8', '>= 0.58';

# Used to create URI containing non URI-canonical characters.
# Note: '3.28' is the version included in URI-1.35.
requires 'URI::Escape', '>= 3.28';
Expand Down Expand Up @@ -190,13 +198,6 @@ recommends 'Net::DNS', '>= 0.65';
# This is required if you set "list_check_smtp" sympa.conf parameter, used to check existing aliases before mailing list creation.
recommends 'Net::SMTP';

# Normalizes file names represented by Unicode
# Note: Perl 5.8.1 bundles version 0.23.
# Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0).
recommends 'Unicode::Normalize', '>= 1.03';

recommends 'Unicode::UTF8', '>= 0.60';

### Features
##

Expand Down Expand Up @@ -324,10 +325,9 @@ feature 'soap', 'Required if you want to run the Sympa SOAP server that provides
};

feature 'safe-unicode', 'Sanitizes inputs with Unicode text.' => sub {
# Note: Perl 5.8.1 bundles version 0.23.
# Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0).
requires 'Unicode::Normalize', '>= 1.03';
requires 'Unicode::UTF8', '>= 0.60';
# Note: These became required (>=6.2.73b).
#requires 'Unicode::Normalize', '>= 1.03';
#requires 'Unicode::UTF8', '>= 0.58';
};

on 'test' => sub {
Expand Down
38 changes: 26 additions & 12 deletions src/cgi/wwsympa.fcgi.in
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ use IO::File qw();
use MIME::EncWords;
use MIME::Lite::HTML;
use POSIX qw();
use Unicode::UTF8;
use URI;
use Data::Dumper; # tentative

Expand Down Expand Up @@ -1057,6 +1058,15 @@ while ($query = Sympa::WWW::FastCGI->new) {

## Get params in a hash
%in = $query->Vars;
while (my ($k, $v) = each %in) {
next if ref $v;
next if Encode::is_utf8($v);
unless (Unicode::UTF8::valid_utf8($v)) {
$log->syslog('err', 'Parameter in invalid UTF-8 %s="%s": Ignored',
$k, sprintf("\\x%*v02X", "\\x", $v));
delete $in{$k};
}
}

# Determin robot.
$robot = $ENV{SYMPA_DOMAIN};
Expand Down Expand Up @@ -1868,24 +1878,28 @@ sub _split_params {
}

if (@params) {
$in{'action'} = $params[0];
$in{'action'} = shift @params;
my @args = @{$action_args{$in{'action'}} // $action_args{'default'}};

my $i = 1;
foreach my $p (@args) {
my $pname;
## More than 1 param
my ($k, $v);
if ($p =~ /^\@(\w+)$/) {
$pname = $1;
$in{$pname} = join '/', @params[$i .. $#params];
$in{$pname} .= '/' if $ending_slash;
last;
$k = $1;
$v = join '/', @params;
$v .= '/' if $ending_slash;
} else {
$pname = $p;
$in{$pname} = $params[$i];
$k = $p;
$v = shift @params;
}
$in{$k} = $v;

unless (Encode::is_utf8($v) or Unicode::UTF8::valid_utf8($v)) {
$log->syslog('err',
'Parameter in invalid UTF-8 %s="%s": Ignored',
$k, sprintf("\\x%*v02X", "\\x", $v));
delete $in{$k};
}
wwslog('debug', 'Incoming parameter: %s=%s', $pname, $in{$pname});
$i++;
last if 0 == index $p, '@';
}
}
}
Expand Down
24 changes: 7 additions & 17 deletions src/lib/Sympa/Tools/Text.pm
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ use MIME::EncWords;
use Text::LineFold;
use Unicode::GCString;
use URI::Escape qw();
BEGIN { eval 'use Unicode::Normalize qw()'; }
BEGIN { eval 'use Unicode::UTF8 qw()'; }
use Unicode::Normalize qw();
use Unicode::UTF8;

use Sympa::Language;
use Sympa::Regexps;
Expand Down Expand Up @@ -141,15 +141,11 @@ sub canonic_text {
my $utext;
if (Encode::is_utf8($text)) {
$utext = $text;
} elsif ($Unicode::UTF8::VERSION) {
} else {
no warnings 'utf8';
$utext = Unicode::UTF8::decode_utf8($text);
} else {
$utext = Encode::decode_utf8($text);
}
if ($Unicode::Normalize::VERSION) {
$utext = Unicode::Normalize::normalize('NFC', $utext);
}
$utext = Unicode::Normalize::normalize('NFC', $utext);

# Remove DOS linefeeds (^M) that cause problems with Outlook 98, AOL,
# and EIMS:
Expand Down Expand Up @@ -313,13 +309,8 @@ sub guessed_to_utf8 {
and length $text
and $text =~ /[^\x00-\x7F]/;

my $utf8;
if ($Unicode::UTF8::VERSION) {
$utf8 = Unicode::UTF8::decode_utf8($text)
if Unicode::UTF8::valid_utf8($text);
} else {
$utf8 = eval { Encode::decode_utf8($text, Encode::FB_CROAK()) };
}
my $utf8 = Unicode::UTF8::decode_utf8($text)
if Unicode::UTF8::valid_utf8($text);
unless (defined $utf8) {
foreach my $charset (map { $_ ? @$_ : () } @legacy_charsets{@langs}) {
$utf8 =
Expand All @@ -332,8 +323,7 @@ sub guessed_to_utf8 {
}

# Apply NFC: e.g. for modified-NFD by Mac OS X.
$utf8 = Unicode::Normalize::normalize('NFC', $utf8)
if $Unicode::Normalize::VERSION;
$utf8 = Unicode::Normalize::normalize('NFC', $utf8);

return Encode::encode_utf8($utf8);
}
Expand Down
21 changes: 8 additions & 13 deletions t/Tools_Text.t
Original file line number Diff line number Diff line change
Expand Up @@ -46,18 +46,13 @@ is $dec, $unicode_email, 'decode_filesystem_safe, Unicode';
# ToDo: foldcase()
# ToDo: wrap_text()

SKIP: {
skip 'Unicode::Normalize and Unicode::UTF8 required.'
unless $Unicode::Normalize::VERSION and $Unicode::UTF8::VERSION;

# Noncharacters: U+D800, U+10FFE, U+110000, U+200000
is Sympa::Tools::Text::canonic_text(
"\xED\xA0\x80\n\xF4\x8F\xBF\xBE\n\xF4\x90\x80\x80\n\xF8\x88\x80\x80\x80\n"
),
Encode::encode_utf8(
"\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n"
),
'canonic_text';
}
# Noncharacters: U+D800, U+10FFE, U+110000, U+200000
is Sympa::Tools::Text::canonic_text(
"\xED\xA0\x80\n\xF4\x8F\xBF\xBE\n\xF4\x90\x80\x80\n\xF8\x88\x80\x80\x80\n"
),
Encode::encode_utf8(
"\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n"
),
'canonic_text';

done_testing();
Loading