From e962779960de423e993f0a94727133ddb3dfc49e Mon Sep 17 00:00:00 2001 From: Thomas Ubensee <34603111+tomuben@users.noreply.github.com> Date: Fri, 22 Nov 2024 09:01:14 -0300 Subject: [PATCH] #772: Check installation of R packages (#479) related to exasol/script-languages-release#772 --- .../install_scripts/install_via_r_remotes.pl | 91 +++++++++++++++--- .../install_scripts/install_via_r_versions.pl | 96 ------------------- .../install_scripts/run_r_versions_tests.sh | 36 ------- .../tests/install_scripts/run_tests.sh | 3 - .../standard-flavor/all/import_r_modules.py | 68 +++++++++++++ 5 files changed, 145 insertions(+), 149 deletions(-) delete mode 100755 ext/scripts/install_scripts/install_via_r_versions.pl delete mode 100644 ext/scripts/tests/install_scripts/run_r_versions_tests.sh create mode 100644 test_container/tests/test/standard-flavor/all/import_r_modules.py diff --git a/ext/scripts/install_scripts/install_via_r_remotes.pl b/ext/scripts/install_scripts/install_via_r_remotes.pl index 342fe3d9c..c4182d00f 100755 --- a/ext/scripts/install_scripts/install_via_r_remotes.pl +++ b/ext/scripts/install_scripts/install_via_r_remotes.pl @@ -7,14 +7,15 @@ =head1 SYNOPSIS install_via_r_versions.pl [OPTIONS] Options: - --help Brief help message - --dry-run Doesn't execute the command, only prints it to STDOUT - --file Input file with each line represents a input. - A line can have multiple elements separated by --element-separator. - Lines everything after a # is interpreted as comment - --with-versions Uses versions specified in the input file in the second element of each line - --allow-no-versions If --with-versions is active, allow packages to have no version specified - --rscript-binary Rscript binary to use for installation + --help Brief help message + --dry-run Doesn't execute the command, only prints it to STDOUT + --file Input file with each line represents a input. + A line can have multiple elements separated by --element-separator. + Lines everything after a # is interpreted as comment + --with-versions Uses versions specified in the input file in the second element of each line + --allow-no-versions If --with-versions is active, allow packages to have no version specified + --no-version-validation If --with-versions is active, this flag controls if the version validation should be executed. + --rscript-binary Rscript binary to use for installation =cut @@ -33,6 +34,7 @@ =head1 SYNOPSIS my $rscript_binary = ''; my $with_versions = 0; my $allow_no_version = 0; +my $no_version_validation = 0; GetOptions ( "help" => \$help, @@ -40,6 +42,7 @@ =head1 SYNOPSIS "file=s" => \$file, "with-versions" => \$with_versions, "allow-no-version" => \$allow_no_version, + "no-version-validation" => \$no_version_validation, "rscript-binary=s" => \$rscript_binary, ) or package_mgmt_utils::print_usage_and_abort(__FILE__,"Error in command line arguments",2); package_mgmt_utils::print_usage_and_abort(__FILE__,"",0) if $help; @@ -54,11 +57,65 @@ =head1 SYNOPSIS } -my $combining_template = "library(remotes)\n<<<<0>>>>"; +my $combining_template_install = ' +library(remotes) +install_or_fail <- function(package_name, version){ + + tryCatch({install_version(package_name, version, repos="https://cloud.r-project.org", Ncpus=4, upgrade="never") + library(package_name, character.only = TRUE)}, + error = function(e){ + print(e) + stop(paste("installation failed for:",package_name ))}, + warning = function(w){ + catch <- + grepl("download of package .* failed", w$message) || + grepl("(dependenc|package).*(is|are) not available", w$message) || + grepl("installation of package.*had non-zero exit status", w$message) || + grepl("installation of one or more packages failed", w$message) + if(catch){ print(w$message) + stop(paste("installation failed for:",package_name ))}} + ) + + } + +<<<<0>>>> +'; + +my $combining_template_validation = ' + +installed_packages <- installed.packages() +installed_package_names <- installed_packages[, "Package"] + +validate_or_fail <- function(package_name, version){ + # Check if the package is in the list of available packages + is_installed <- package_name %in% installed_package_names + + # Check the result + if (!is_installed) { + stop(paste("Package nor installed:", package_name)) + } + + if (!is.null(version)) { + desc <- packageDescription(package_name) + if (version != desc$Version) { + stop(paste("Version of installed installed package does not match:", package_name)) + } + } +} + +<<<<0>>>> +'; + + my @separators = ("\n"); -my @templates = ('install_version("<<<<0>>>>",NULL,repos="https://cloud.r-project.org", Ncpus=4)'); +my @install_templates = ('install_or_fail("<<<<0>>>>",NULL)'); if($with_versions){ - @templates = ('install_version("<<<<0>>>>","<<<<1>>>>",repos="https://cloud.r-project.org", Ncpus=4)'); + @install_templates = ('install_or_fail("<<<<0>>>>","<<<<1>>>>")'); +} + +my @validation_templates = ('validate_or_fail("<<<<0>>>>", NULL)'); +if($with_versions && !$no_version_validation){ + @validation_templates = ('validate_or_fail("<<<<0>>>>","<<<<1>>>>")'); } sub identity { @@ -73,14 +130,20 @@ sub replace_missing_version{ return $line; } -my @rendered_line_transformation_functions = (\&identity); +my @rendered_line_transformation_functions_install = (\&identity); +my @rendered_line_transformation_functions_validation = (\&identity); if($with_versions and $allow_no_version){ - @rendered_line_transformation_functions = (\&replace_missing_version); + @rendered_line_transformation_functions_install = (\&replace_missing_version); + @rendered_line_transformation_functions_validation = (\&replace_missing_version); } my $script = package_mgmt_utils::generate_joined_and_transformed_string_from_file( - $file,$element_separator,$combining_template,\@templates,\@separators,\@rendered_line_transformation_functions); + $file,$element_separator,$combining_template_install,\@install_templates,\@separators,\@rendered_line_transformation_functions_install) . + package_mgmt_utils::generate_joined_and_transformed_string_from_file( + $file,$element_separator,$combining_template_validation,\@validation_templates,\@separators,\@rendered_line_transformation_functions_validation); + + if($with_versions and not $allow_no_version){ if (index($script, "<<<<1>>>>") != -1) { diff --git a/ext/scripts/install_scripts/install_via_r_versions.pl b/ext/scripts/install_scripts/install_via_r_versions.pl deleted file mode 100755 index e79d6c710..000000000 --- a/ext/scripts/install_scripts/install_via_r_versions.pl +++ /dev/null @@ -1,96 +0,0 @@ -#!/usr/bin/perl -w - -=pod - -=head1 SYNOPSIS - Installs R packages with the remotes package https://github.com/r-lib/remotes - - install_via_r_versions.pl [OPTIONS] - Options: - --help Brief help message - --dry-run Doesn't execute the command, only prints it to STDOUT - --file Input file with each line represents a input. - A line can have multiple elements separated by --element-separator. - Lines everything after a # is interpreted as comment - --with-versions Uses versions specified in the input file in the second element of each line - --allow-no-versions If --with-versions is active, allow packages to have no version specified - --rscript-binary Rscript binary to use for installation - -=cut - -use strict; -use File::Basename; -use lib dirname (__FILE__); -use package_mgmt_utils; -use Getopt::Long; -use Pod::Usage; -#use IPC::System::Simple; - -my $help = 0; -my $dry_run = 0; -my $file = ''; -my $element_separator = "\\|"; -my $rscript_binary = ''; -my $with_versions = 0; -my $allow_no_version = 0; - -GetOptions ( - "help" => \$help, - "dry-run" => \$dry_run, - "file=s" => \$file, - "with-versions" => \$with_versions, - "allow-no-version" => \$allow_no_version, - "rscript-binary=s" => \$rscript_binary, - ) or package_mgmt_utils::print_usage_and_abort(__FILE__,"Error in command line arguments",2); -package_mgmt_utils::print_usage_and_abort(__FILE__,"",0) if $help; - - -if($file eq ''){ - package_mgmt_utils::print_usage_and_abort(__FILE__,"Error in command line arguments: --file was not specified",1); -} - -if($rscript_binary eq ''){ - package_mgmt_utils::print_usage_and_abort(__FILE__,"Error in command line arguments: --rscript-binary was not specified",1); -} - - -my $combining_template = "$rscript_binary -e 'library(versions);install.versions(c(<<<<0>>>>))'"; -if($with_versions){ - $combining_template = "$rscript_binary -e 'library(versions);install.versions(c(<<<<0>>>>),c(<<<<1>>>>))'"; -} - -my @templates = ('"<<<<0>>>>"','"<<<<1>>>>"'); -my @separators = (",",","); - -sub identity { - my ($line) = @_; - return $line -} - - -sub replace_missing_version{ - my ($line) = @_; - $line =~ s/"<<<<1>>>>"/NULL/g; - return $line; -} - -my @rendered_line_transformation_functions = (\&identity,\&identity); -if($with_versions and $allow_no_version){ - @rendered_line_transformation_functions = (\&identity,\&replace_missing_version); -} - -my $cmd = - package_mgmt_utils::generate_joined_and_transformed_string_from_file( - $file,$element_separator,$combining_template,\@templates,\@separators,\@rendered_line_transformation_functions); - - -if($with_versions and not $allow_no_version){ - if (index($cmd, "<<<<1>>>>") != -1) { - die "Command '$cmd' contains packages with unspecified versions, please check the package file '$file' or specifiy --allow-no-version"; - } -} - -if($cmd ne ""){ - package_mgmt_utils::execute("$rscript_binary -e 'install.packages(\"versions\",repos=\"http://cran.uk.r-project.org\")'",$dry_run); - package_mgmt_utils::execute($cmd,$dry_run); -} diff --git a/ext/scripts/tests/install_scripts/run_r_versions_tests.sh b/ext/scripts/tests/install_scripts/run_r_versions_tests.sh deleted file mode 100644 index f1fc13634..000000000 --- a/ext/scripts/tests/install_scripts/run_r_versions_tests.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash - -set -e -set -u -set -o pipefail - -PATH_TO_INSTALL_SCRIPTS="../../install_scripts" -SCRIPT_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" -PATH_TO_INSTALL_SCRIPTS="$SCRIPT_DIR/../../install_scripts" -DRY_RUN_OPTION=--dry-run -if [ "${1-}" == "--no-dry-run" ] -then - DRY_RUN_OPTION= -fi - -echo ./install_via_r_versions.pl with empty -"$PATH_TO_INSTALL_SCRIPTS/install_via_r_versions.pl" --file test_files/empty_test_file --rscript-binary Rscript "$DRY_RUN_OPTION" -echo - -echo ./install_via_r_versions.pl without versions -"$PATH_TO_INSTALL_SCRIPTS/install_via_r_versions.pl" --file test_files/r/versions/without_versions --rscript-binary Rscript "$DRY_RUN_OPTION" -echo - -echo ./install_via_r_versions.pl with versions, without allow-no-version -"$PATH_TO_INSTALL_SCRIPTS/install_via_r_versions.pl" --file test_files/r/versions/with_versions/all_versions_specified --with-versions --rscript-binary Rscript "$DRY_RUN_OPTION" -echo - -echo ./install_via_r_versions.pl with versions, with allow-no-version, all versions specified -"$PATH_TO_INSTALL_SCRIPTS/install_via_r_versions.pl" --file test_files/r/versions/with_versions/all_versions_specified --with-versions --allow-no-version --rscript-binary Rscript "$DRY_RUN_OPTION" -echo - -echo ./install_via_r_versions.pl with versions, with allow-no-version, some versions missing -"$PATH_TO_INSTALL_SCRIPTS/install_via_r_versions.pl" --file test_files/r/versions/with_versions/some_missing_versions --with-versions --allow-no-version --rscript-binary Rscript "$DRY_RUN_OPTION" -echo - -echo "All r versions tests passed" diff --git a/ext/scripts/tests/install_scripts/run_tests.sh b/ext/scripts/tests/install_scripts/run_tests.sh index 648576e5a..85a8a581e 100644 --- a/ext/scripts/tests/install_scripts/run_tests.sh +++ b/ext/scripts/tests/install_scripts/run_tests.sh @@ -17,8 +17,5 @@ bash run_apt_tests.sh "$@" echo Run Pip Tests bash run_pip_tests.sh "$@" -# echo Run R versions Tests -# bash run_r_versions_tests.sh "$@" - echo Run R remotes Tests bash run_r_remotes_tests.sh "$@" diff --git a/test_container/tests/test/standard-flavor/all/import_r_modules.py b/test_container/tests/test/standard-flavor/all/import_r_modules.py new file mode 100644 index 000000000..62f31727e --- /dev/null +++ b/test_container/tests/test/standard-flavor/all/import_r_modules.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +from typing import List, Tuple + +from exasol_python_test_framework import udf +from exasol_python_test_framework.udf.udf_debug import UdfDebugger + +class ImportAllModulesTest(udf.TestCase): + + def setUp(self): + self.query('create schema import_all_r_modules', ignore_errors=True) + + def get_all_root_modules(self) -> List[Tuple[str, str]]: + self.query(udf.fixindent(''' + CREATE OR REPLACE r SCALAR SCRIPT import_all_r_modules.get_all_root_modules() + EMITS (module_name VARCHAR(200000), version VARCHAR(200)) AS + run <- function(ctx) { + library(data.table) + file_pattern <- "cran_packages" + directory <- "/build_info/packages" + files <- list.files(path = directory, pattern = file_pattern, full.names = TRUE, recursive = TRUE) + for (input_file in files) { + package_list <- tryCatch(read.table(file = input_file, header=FALSE, sep = "|", comment.char = "#"), error=function(e) NULL) + if (!is.null(package_list)) { + package_names <- package_list[,1] + versions <- package_list[,2] + ctx$emit(package_names, versions) + } + } + } + / + ''')) + rows = self.query('''SELECT import_all_r_modules.get_all_root_modules() FROM dual''') + print("Number of modules:",len(rows)) + root_modules = [(row[0], row[1]) for row in rows] + print(f"Found {len(root_modules)} root modules.") + return root_modules + + def create_check_installed_package_udf(self): + self.query(udf.fixindent(''' + CREATE OR REPLACE r SCALAR SCRIPT + import_all_r_modules.check_installed_package(package_name VARCHAR(200000), version VARCHAR(200)) + RETURNS DECIMAL(11,0) AS + run <- function(ctx) { + library(ctx$package_name, character.only = TRUE) + desc <- packageDescription(ctx$package_name) + if (ctx$version != desc$Version) { + stop(paste("Version of installed installed package does not match:", ctx$package_name)) + return(1) + } + 0 + } + / + ''')) + + def test_import_all_modules(self): + root_modules = self.get_all_root_modules() + assert len(root_modules) > 0 + self.create_check_installed_package_udf() + for root_module in root_modules: + # with UdfDebugger(test_case=self): + rows = self.query(f'''SELECT import_all_r_modules.check_installed_package('{root_module[0]}', '{root_module[1]}') FROM dual''') + + def tearDown(self): + self.query("drop schema import_all_r_modules cascade", ignore_errors=True) + + +if __name__ == '__main__': + udf.main()