From e4a4bd88bb70cc57a2700ebff19c5be143f9d1f5 Mon Sep 17 00:00:00 2001 From: Marius Kittler Date: Wed, 15 Feb 2023 12:54:08 +0100 Subject: [PATCH 1/2] Avoid caveats of `Archive::Extract` using `bsdtar` instead We observed extracted assets that are zero-sized on o3. The related jobs were nevertheless executed. Maybe the error handling of the asset extraction via `Archive::Extract` is broken (by an internal problem of `Archive::Extract` and its underlying modules). At least it does not seem to be an issue on our side (the download itself has proper error handling and we check for errors reported by `Archive::Extract`). By simply using `bsdtar` (or `bsdcat`, also in the `bsdtar` package) instead of relying on Perl modules of questionable qualify we can avoid the problem. Note that `bsdcat` will not complain if a file does not appear to be compressed. It will then just output the file contents as-is. This was previously an error. I think this is a change in behavior we can live with. As a plus, with `bsdtar` and `bsdcat` we no longer rely on the extension to determine the file format. So e.g. an xz compressed file is uncompressed as expected, even without or a wrong extension. We also gain support for many more formats like zstd. This change will likely also make the extraction faster because `Archive::Extract` only used a pure Perl implementation in the current configuration. Related ticket/note: https://progress.opensuse.org/issues/123175#note-27 --- container/ci/Dockerfile | 2 +- cpanfile | 1 - dependencies.yaml | 3 +- dist/rpm/openQA.spec | 6 +-- external/os-autoinst-common/tools/update-deps | 1 - lib/OpenQA/Downloader.pm | 45 ++++++++++------ .../apparmor.d/usr.share.openqa.script.openqa | 2 + .../apparmor.d/usr.share.openqa.script.worker | 2 + t/25-downloader.t | 52 ++++++++++++------- t/lib/OpenQA/Test/Utils.pm | 12 ++++- tools/ci/ci-packages.txt | 1 + 11 files changed, 85 insertions(+), 42 deletions(-) diff --git a/container/ci/Dockerfile b/container/ci/Dockerfile index 0b548105e4b..e531d27a9a2 100644 --- a/container/ci/Dockerfile +++ b/container/ci/Dockerfile @@ -32,6 +32,7 @@ RUN zypper in -y -C \ postgresql-devel \ qemu \ qemu-tools \ + bsdtar \ tar \ optipng \ python3-base \ @@ -49,7 +50,6 @@ RUN zypper in -y -C \ python3-yamllint \ sudo \ 'perl(App::cpanminus)' \ - 'perl(Archive::Extract)' \ 'perl(BSD::Resource)' \ 'perl(CSS::Minifier::XS)' \ 'perl(Carp::Always)' \ diff --git a/cpanfile b/cpanfile index 28ed0e8183d..967af1cfcd3 100644 --- a/cpanfile +++ b/cpanfile @@ -4,7 +4,6 @@ # from dependencies.yaml ################################################## -requires 'Archive::Extract', '> 0.7'; requires 'BSD::Resource'; requires 'CSS::Minifier::XS', '>= 0.01'; requires 'Capture::Tiny'; diff --git a/dependencies.yaml b/dependencies.yaml index e6022b50787..a511f429f29 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -28,7 +28,6 @@ build_requires: common_requires: perl: '>= 5.20.0' - perl(Archive::Extract): '> 0.7' perl(Carp::Always): '>= 0.14.02' perl(Config::IniFiles): perl(Cpanel::JSON::XS): '>= 4.09' @@ -86,6 +85,7 @@ main_requires: '%assetpack_requires': git-core: hostname: # for script/configure-web-proxy + bsdtar: # for extraction feature of OpenQA::Downloader perl(aliased): perl(base): perl(constant): @@ -159,6 +159,7 @@ worker_requires: openQA-client: optipng: sqlite3: '>= 3.24.0' # for INSERT INTO … ON CONFLICT … DO UPDATE SET … required by cache service + bsdtar: # for extraction feature of OpenQA::Downloader perl(Capture::Tiny): perl(Mojo::IOLoop::ReadWriteProcess): '>= 0.26' perl(Minion::Backend::SQLite): '>= 5.0.7' diff --git a/dist/rpm/openQA.spec b/dist/rpm/openQA.spec index 7271df3db0f..ae7daae5d42 100644 --- a/dist/rpm/openQA.spec +++ b/dist/rpm/openQA.spec @@ -49,14 +49,14 @@ # The following line is generated from dependencies.yaml %define assetpack_requires perl(CSS::Minifier::XS) >= 0.01 perl(JavaScript::Minifier::XS) >= 0.11 perl(Mojolicious::Plugin::AssetPack) >= 1.36 # The following line is generated from dependencies.yaml -%define common_requires perl >= 5.20.0 perl(Archive::Extract) > 0.7 perl(Carp::Always) >= 0.14.02 perl(Config::IniFiles) perl(Config::Tiny) perl(Cpanel::JSON::XS) >= 4.09 perl(Cwd) perl(Data::Dump) perl(Data::Dumper) perl(Digest::MD5) perl(Filesys::Df) perl(Getopt::Long) perl(Minion) >= 10.25 perl(Mojolicious) >= 9.30 perl(Regexp::Common) perl(Storable) perl(Time::Moment) perl(Try::Tiny) +%define common_requires perl >= 5.20.0 perl(Carp::Always) >= 0.14.02 perl(Config::IniFiles) perl(Config::Tiny) perl(Cpanel::JSON::XS) >= 4.09 perl(Cwd) perl(Data::Dump) perl(Data::Dumper) perl(Digest::MD5) perl(Filesys::Df) perl(Getopt::Long) perl(Minion) >= 10.25 perl(Mojolicious) >= 9.30 perl(Regexp::Common) perl(Storable) perl(Time::Moment) perl(Try::Tiny) # runtime requirements for the main package that are not required by other sub-packages # The following line is generated from dependencies.yaml -%define main_requires %assetpack_requires git-core hostname perl(BSD::Resource) perl(Carp) perl(CommonMark) perl(Config::Tiny) perl(DBD::Pg) >= 3.7.4 perl(DBI) >= 1.632 perl(DBIx::Class) >= 0.082801 perl(DBIx::Class::DeploymentHandler) perl(DBIx::Class::DynamicDefault) perl(DBIx::Class::OptimisticLocking) perl(DBIx::Class::ResultClass::HashRefInflator) perl(DBIx::Class::Schema::Config) perl(DBIx::Class::Storage::Statistics) perl(Date::Format) perl(DateTime) perl(DateTime::Duration) perl(DateTime::Format::Pg) perl(Exporter) perl(Fcntl) perl(File::Basename) perl(File::Copy) perl(File::Copy::Recursive) perl(File::Path) perl(File::Spec) perl(FindBin) perl(Getopt::Long::Descriptive) perl(IO::Handle) perl(IPC::Run) perl(JSON::Validator) perl(LWP::UserAgent) perl(Module::Load::Conditional) perl(Module::Pluggable) perl(Mojo::Base) perl(Mojo::ByteStream) perl(Mojo::IOLoop) perl(Mojo::JSON) perl(Mojo::Pg) perl(Mojo::RabbitMQ::Client) >= 0.2 perl(Mojo::URL) perl(Mojo::Util) perl(Mojolicious::Commands) perl(Mojolicious::Plugin) perl(Mojolicious::Static) perl(Net::OpenID::Consumer) perl(POSIX) perl(Pod::POM) perl(SQL::Translator) perl(Scalar::Util) perl(Sort::Versions) perl(Text::Diff) perl(Time::HiRes) perl(Time::ParseDate) perl(Time::Piece) perl(Time::Seconds) perl(URI::Escape) perl(YAML::PP) >= 0.026 perl(YAML::XS) perl(aliased) perl(base) perl(constant) perl(diagnostics) perl(strict) perl(warnings) +%define main_requires %assetpack_requires bsdtar git-core hostname perl(BSD::Resource) perl(Carp) perl(CommonMark) perl(Config::Tiny) perl(DBD::Pg) >= 3.7.4 perl(DBI) >= 1.632 perl(DBIx::Class) >= 0.082801 perl(DBIx::Class::DeploymentHandler) perl(DBIx::Class::DynamicDefault) perl(DBIx::Class::OptimisticLocking) perl(DBIx::Class::ResultClass::HashRefInflator) perl(DBIx::Class::Schema::Config) perl(DBIx::Class::Storage::Statistics) perl(Date::Format) perl(DateTime) perl(DateTime::Duration) perl(DateTime::Format::Pg) perl(Exporter) perl(Fcntl) perl(File::Basename) perl(File::Copy) perl(File::Copy::Recursive) perl(File::Path) perl(File::Spec) perl(FindBin) perl(Getopt::Long::Descriptive) perl(IO::Handle) perl(IPC::Run) perl(JSON::Validator) perl(LWP::UserAgent) perl(Module::Load::Conditional) perl(Module::Pluggable) perl(Mojo::Base) perl(Mojo::ByteStream) perl(Mojo::IOLoop) perl(Mojo::JSON) perl(Mojo::Pg) perl(Mojo::RabbitMQ::Client) >= 0.2 perl(Mojo::URL) perl(Mojo::Util) perl(Mojolicious::Commands) perl(Mojolicious::Plugin) perl(Mojolicious::Static) perl(Net::OpenID::Consumer) perl(POSIX) perl(Pod::POM) perl(SQL::Translator) perl(Scalar::Util) perl(Sort::Versions) perl(Text::Diff) perl(Time::HiRes) perl(Time::ParseDate) perl(Time::Piece) perl(Time::Seconds) perl(URI::Escape) perl(YAML::PP) >= 0.026 perl(YAML::XS) perl(aliased) perl(base) perl(constant) perl(diagnostics) perl(strict) perl(warnings) # The following line is generated from dependencies.yaml %define client_requires curl git-core jq perl(Getopt::Long::Descriptive) perl(IO::Socket::SSL) >= 2.009 perl(IPC::Run) perl(JSON::Validator) perl(LWP::Protocol::https) perl(LWP::UserAgent) perl(Test::More) perl(YAML::PP) >= 0.020 perl(YAML::XS) # The following line is generated from dependencies.yaml -%define worker_requires openQA-client optipng os-autoinst < 5 perl(Capture::Tiny) perl(File::Map) perl(Minion::Backend::SQLite) >= 5.0.7 perl(Mojo::IOLoop::ReadWriteProcess) >= 0.26 perl(Mojo::SQLite) psmisc sqlite3 >= 3.24.0 +%define worker_requires bsdtar openQA-client optipng os-autoinst < 5 perl(Capture::Tiny) perl(File::Map) perl(Minion::Backend::SQLite) >= 5.0.7 perl(Mojo::IOLoop::ReadWriteProcess) >= 0.26 perl(Mojo::SQLite) psmisc sqlite3 >= 3.24.0 # The following line is generated from dependencies.yaml %define build_requires %assetpack_requires rubygem(sass) diff --git a/external/os-autoinst-common/tools/update-deps b/external/os-autoinst-common/tools/update-deps index fb70a58fc01..53caceb4a73 100755 --- a/external/os-autoinst-common/tools/update-deps +++ b/external/os-autoinst-common/tools/update-deps @@ -137,7 +137,6 @@ sub get_modules { } sub _requires_line { - # requires 'Archive::Extract', '> 0.7'; my ($hash, $module) = @_; my $version = $hash->{$module}; my $line = "requires '$module'"; diff --git a/lib/OpenQA/Downloader.pm b/lib/OpenQA/Downloader.pm index 0e51a583e84..27a1628ab2c 100644 --- a/lib/OpenQA/Downloader.pm +++ b/lib/OpenQA/Downloader.pm @@ -48,6 +48,28 @@ sub download ($self, $url, $target, $options = {}) { return $err ? $err : "No error message recorded"; } +sub _extract_asset ($self, $to_extract, $target) { + my $cmd; + if ($to_extract =~ qr/\.tar(\..*)?/) { + # invoke bsdtar to extract (compressed) tar archives + eval { $target->make_path } or return $@; + $cmd = "bsdtar -x --directory '$target' -f '$to_extract' 2>&1"; + } + else { + # invoke bsdcat to extract compressed raw files + $cmd = "bsdcat '$to_extract' 2>&1 1>'$target'"; + } + + my $stderr = `$cmd`; + my ($res, $err) = ($?, $!); + my ($signal, $return_code) = ($res & 127, $res >> 8); + chomp $stderr and $stderr = ": $stderr" if $stderr; + return "Failed to invoke \"$cmd\": $err" if $res == -1; # uncoverable statement + return "Command \"$cmd\" died with signal $signal$stderr" if $signal; # uncoverable statement + return "Command \"$cmd\" exited with non-zero return code $return_code$stderr" if $return_code != 0; + return undef; +} + sub _get ($self, $url, $target, $options) { my $ua = $self->ua; my $log = $self->log; @@ -87,27 +109,18 @@ sub _get ($self, $url, $target, $options) { my $ret; my $err; if ($size == $headers->content_length) { - if ($options->{extract}) { - my $e = load_class 'Archive::Extract'; - die "Failed to load module 'Archive::Extract': $e" if ref $e; my $tempfile = path($ENV{MOJO_TMPDIR}, Mojo::URL->new($url)->path->parts->[-1])->to_string; $log->info(qq{Extracting "$tempfile" to "$target"}); $asset->move_to($tempfile); - - # Extract the temp archive file to the requested asset location - try { - die "Could not determine archive type\n" - unless my $ae = Archive::Extract->new(archive => $tempfile); - die $ae->error unless $ae->extract(to => $target); - } - catch { - $log->error(qq{Extracting "$tempfile" failed: $_}); - $err = $_; - $ret = $code; - }; - + $target = path($target); + $err = $self->_extract_asset($tempfile, $target); unlink $tempfile; + if ($err) { + $ret = $code; + $log->error(qq{Extracting "$tempfile" failed: $err}); + eval { $target->remove_tree } or $log->error("Unable to remove leftovers after failed extraction: $@"); + } } else { $asset->move_to($target) } diff --git a/profiles/apparmor.d/usr.share.openqa.script.openqa b/profiles/apparmor.d/usr.share.openqa.script.openqa index 03d627880d1..5530d594153 100644 --- a/profiles/apparmor.d/usr.share.openqa.script.openqa +++ b/profiles/apparmor.d/usr.share.openqa.script.openqa @@ -17,6 +17,8 @@ /etc/ssl/openssl.cnf r, /proc/meminfo r, /tmp/** rwk, + /usr/bin/bsdcat rix, + /usr/bin/bsdtar rix, /usr/bin/dash ix, /usr/bin/perl ix, /usr/bin/pwd ix, diff --git a/profiles/apparmor.d/usr.share.openqa.script.worker b/profiles/apparmor.d/usr.share.openqa.script.worker index 545446ced06..907acd156fc 100644 --- a/profiles/apparmor.d/usr.share.openqa.script.worker +++ b/profiles/apparmor.d/usr.share.openqa.script.worker @@ -18,6 +18,8 @@ network netbeui raw, /{usr/,}bin/{b,d}ash rix, + /usr/bin/bsdcat rix, + /usr/bin/bsdtar rix, /{usr/,}bin/pkill rix, /boot/*.fd rk, /dev/ r, diff --git a/t/25-downloader.t b/t/25-downloader.t index ebea62a8632..4209eeeb704 100644 --- a/t/25-downloader.t +++ b/t/25-downloader.t @@ -10,7 +10,6 @@ use FindBin; use lib "$FindBin::Bin/lib", "$FindBin::Bin/../external/os-autoinst-common/lib"; use OpenQA::Downloader; -use Archive::Extract; use IO::Socket::INET; use Mojo::Server::Daemon; use Mojo::IOLoop::Server; @@ -25,9 +24,6 @@ use Mojo::File qw(tempdir); my $port = Mojo::IOLoop::Server->generate_port; my $host = "127.0.0.1:$port"; -# avoid cluttering log with warnings from the Archive::Extract module -$Archive::Extract::WARN = 0; - # Capture logs my $log = Mojo::Log->new; $log->unsubscribe('message'); @@ -152,32 +148,52 @@ subtest 'Size differs' => sub { $cache_log = ''; }; -subtest 'Decompressing archive failed' => sub { - $to = $tempdir->child('test.gz'); +subtest 'Non-compressed files kept as-is (no complaint about unknown archive format)' => sub { + $to = $tempdir->child('test.txt'); my $from = "http://$host/test"; # don't check the error message as it is not interesting # (it's a generic error message that the archive is invalid) - ok defined($downloader->download($from, $to, {extract => 1})), 'Failed'; - - ok !-e $to, 'File not downloaded'; - - like $cache_log, qr/Downloading "test.gz" from "$from"/, 'Download attempt'; - like $cache_log, qr/Extracting ".*test" to ".*test.gz"/, 'Extracting download'; - like $cache_log, qr/Extracting ".*test" failed: Could not determine archive type/, 'Extracting failed'; + is $downloader->download($from, $to, {extract => 1}), undef, 'Success'; + ok -e $to, 'File downloaded'; + like $cache_log, qr/Downloading "test.txt" from "$from"/, 'Download attempt'; + like $cache_log, qr/Extracting ".*test" to ".*test.txt"/, 'Extracting download'; + is $to->slurp, 'This file was not compressed!', 'Contents extracted even though file was not compressed'; $cache_log = ''; }; -subtest 'Decompressing archive' => sub { +subtest 'Decompressing file' => sub { $to = $tempdir->child('test'); my $from = "http://$host/test.gz"; is $downloader->download($from, $to, {extract => 1}), undef, 'Success'; - - ok -e $to, 'File downloaded'; + ok -e $to, 'File downloaded and decompressed'; is $to->slurp, 'This file was compressed!', 'File was decompressed'; - like $cache_log, qr/Downloading "test" from "$from"/, 'Download attempt'; like $cache_log, qr/Extracting ".*test.gz" to ".*test"/, 'Extracting download'; - unlike $cache_log, qr/Extracting ".*test.gz" failed:/, 'Extracting did not fail'; + unlike $cache_log, qr/Extracting ".*test.*" failed:/, 'Extracting did not fail'; + $cache_log = ''; +}; + +subtest 'Decompressing archive' => sub { + $to = $tempdir->child('test'); + my $from = "http://$host/test.tar.xz"; + is $downloader->download($from, $to, {extract => 1}), undef, 'Success'; + ok -d $to, 'File downloaded, uncompressed and extracted'; + is $to->child('test-file')->slurp, "Archived file!\n", 'File was extracted'; + like $cache_log, qr/Downloading "test" from "$from"/, 'Download attempt'; + like $cache_log, qr/Extracting ".*test\.tar\.xz" to ".*test"/, 'Extracting download'; + unlike $cache_log, qr/Extracting ".*test.*" failed:/, 'Extracting did not fail'; + $cache_log = ''; +}; + +subtest 'Error when decompressing archive' => sub { + $to = $tempdir->child('fake-archive'); + my $from = "http://$host/fake-archive.tar.xz"; + like $downloader->download($from, $to, {extract => 1}), qr/Unrecognized archive format/, 'Failed'; + ok !-e $to, 'Target not created'; + like $cache_log, qr/Downloading "fake-archive" from "$from"/, 'Download attempt'; + like $cache_log, qr/Extracting ".*fake-archive\.tar\.xz" to ".*fake-archive"/, 'Extracting download'; + like $cache_log, qr/Extracting ".*fake-archive\.tar\.xz" failed:.*Unrecognized archive format.*/, + 'Extracting failed'; $cache_log = ''; }; diff --git a/t/lib/OpenQA/Test/Utils.pm b/t/lib/OpenQA/Test/Utils.pm index de0e934ae6c..284e77281e7 100644 --- a/t/lib/OpenQA/Test/Utils.pm +++ b/t/lib/OpenQA/Test/Utils.pm @@ -25,7 +25,7 @@ use Mojo::Util 'dumper'; use Cwd qw(abs_path getcwd); use IPC::Run qw(start); use Mojolicious; -use Mojo::Util 'gzip'; +use Mojo::Util qw(b64_decode gzip); use Test::Output 'combined_like'; use Mojo::IOLoop; use Mojo::IOLoop::ReadWriteProcess 'process'; @@ -126,6 +126,16 @@ sub fake_asset_server { my $archive = gzip 'This file was compressed!'; $c->render(data => $archive); }); + $r->get( + '/test.tar.xz' => sub ($c) { + # an xz compressed tar file containing the single file "test-file" with the contents "Archived file!\n" + my $data = '/Td6WFoAAATm1rRGAgAhARYAAAB0L+Wj4Af/AHFdADoZSs4dfe+Arz18uNRLppL12Hz5MDHYcLt5 + /PvM5kdTuJD3iSCXTYLW9zb9MXA9LqgK7raUtSCXM7VLT8xqwGE4kxz2xTNfbS/DRFiYMsutZ7Xq + iWj1mj0AgYJejwqLTPCcO1J/4gLpWvPaWPIhX58AAAAAAJpvxi0MvTYnAAGNAYAQAAAg72StscRn + +wIAAAAABFla'; + $c->render(data => b64_decode $data); + }); + $r->get('/fake-archive.tar.xz' => sub ($c) { $c->render(data => 'This is not an actual archive!') }); $r->get( '/test' => sub { my $c = shift; diff --git a/tools/ci/ci-packages.txt b/tools/ci/ci-packages.txt index b115d2663a6..b37b09aabdb 100644 --- a/tools/ci/ci-packages.txt +++ b/tools/ci/ci-packages.txt @@ -5,6 +5,7 @@ alsa-utils-1.2.6 aspell-0.60.8 aspell-en-2020.12.07 aspell-spell-0.60.8 +bsdtar cmark-0.30.2 desktop-data-openSUSE-15.2.20200107 dialog-1.3 From 7ed949e88be4c46b221603b3529aab66ff89d5de Mon Sep 17 00:00:00 2001 From: Marius Kittler Date: Thu, 16 Feb 2023 10:29:10 +0100 Subject: [PATCH 2/2] Track coverage of test server spawned by `t/25-downloader.t` --- t/25-downloader.t | 1 + 1 file changed, 1 insertion(+) diff --git a/t/25-downloader.t b/t/25-downloader.t index 4209eeeb704..bf4b7f9b265 100644 --- a/t/25-downloader.t +++ b/t/25-downloader.t @@ -41,6 +41,7 @@ END { session->clean } my $server_instance = process sub { # uncoverable statement Mojo::Server::Daemon->new(app => fake_asset_server, listen => ["http://$host"], silent => 1)->run; + Devel::Cover::report() if Devel::Cover->can('report'); _exit(0); # uncoverable statement to ensure proper exit code of complete test at cleanup }, max_kill_attempts => 0,