From ab1e94be139b849571b54ecff7b7295f93759287 Mon Sep 17 00:00:00 2001 From: Bas Zalmstra Date: Mon, 2 Oct 2023 16:33:08 +0200 Subject: [PATCH] feat: env markers (#37) Adds support for detecting and evaluating environment markers. Environment markers can be extracted from a python executable by running a simple python script. Fixes #34 as the proper python markers are now parsed and evaluated: ``` >rip "scipy>=1.4.0" 2023-10-02T13:34:12.758759Z INFO rip: extracted the following environment markers from the system python interpreter: Pep508EnvMakers { os_name: "nt", sys_platform: "win32", platform_machine: "AMD64", platform_python_implementation: "CPython", platform_release: "10", platform_system: "Windows", platform_version: "10.0.22621", python_version: "3.8", python_full_version: "3.8.17", implementation_name: "cpython", implementation_version: "3.8.17", } 2023-10-02T13:34:12.759610Z INFO rattler_installs_packages::resolve: collecting scipy 2023-10-02T13:34:12.759831Z INFO rattler_installs_packages::http: executing request url=https://pypi.org/simple/scipy/ cache_mode=Default 2023-10-02T13:34:12.830482Z WARN rattler_installs_packages::resolve: Not considering scipy 1.11.0rc2, 1.11.0rc1, 1.10.0rc2, 1.10.0rc1, 1.9.0rc3, 1.9.0rc2, 1.9.0rc1, 1.8.0rc4, 1.8.0rc3, 1.8.0rc2, 1.8.0rc1, 0.8.0 because there are no wheel artifacts available 2023-10-02T13:34:12.830699Z WARN rattler_installs_packages::resolve: Not considering scipy 1.11.3, 1.11.2, 1.11.1, 1.11.0 because none of the artifacts are compatible with Python 3.8.17 2023-10-02T13:34:12.831023Z INFO rattler_installs_packages::resolve: obtaining dependency information from scipy=1.10.1 2023-10-02T13:34:12.831685Z INFO rattler_installs_packages::resolve: collecting numpy 2023-10-02T13:34:12.831821Z INFO rattler_installs_packages::http: executing request url=https://pypi.org/simple/numpy/ cache_mode=Default 2023-10-02T13:34:12.874279Z WARN rattler_installs_packages::resolve: Not considering numpy 1.26.0rc1, 1.26.0b1, 1.25.0rc1, 1.24.0rc2, 1.24.0rc1, 1.23.0rc3, 1.23.0rc2, 1.23.0rc1, 1.10.0.post2, 1.5.0, 1.4.1, 1.3.0 because there are no wheel artifacts available 2023-10-02T13:34:12.874502Z WARN rattler_installs_packages::resolve: Not considering numpy 1.26.0, 1.25.2, 1.25.1, 1.25.0 because none of the artifacts are compatible with Python 3.8.17 2023-10-02T13:34:12.874934Z INFO rattler_installs_packages::resolve: collecting scipy[test] 2023-10-02T13:34:12.876837Z INFO rattler_installs_packages::resolve: collecting scipy[dev] 2023-10-02T13:34:12.878637Z INFO rattler_installs_packages::resolve: collecting scipy[doc] 2023-10-02T13:34:12.880546Z INFO rattler_installs_packages::resolve: obtaining dependency information from numpy=1.24.4 Resolved environment: - scipy >= 1.4.0 Name Version numpy 1.24.4 scipy 1.10.1 ``` @notatallshaw You might be interested in this! :) Fix #2 --- Cargo.lock | 26 ++++ crates/rattler_installs_packages/Cargo.toml | 3 +- .../src/env_markers.rs | 123 ++++++++++++++++++ crates/rattler_installs_packages/src/html.rs | 2 +- crates/rattler_installs_packages/src/lib.rs | 2 + .../rattler_installs_packages/src/pep508.py | 40 ++++++ .../src/requirement.rs | 6 + .../rattler_installs_packages/src/resolve.rs | 118 ++++++++++++----- crates/rip_bin/src/main.rs | 14 +- 9 files changed, 296 insertions(+), 38 deletions(-) create mode 100644 crates/rattler_installs_packages/src/env_markers.rs create mode 100644 crates/rattler_installs_packages/src/pep508.py diff --git a/Cargo.lock b/Cargo.lock index 74037f09..cd04d03b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -947,6 +947,15 @@ dependencies = [ "digest", ] +[[package]] +name = "html-escape" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476" +dependencies = [ + "utf8-width", +] + [[package]] name = "http" version = "0.2.9" @@ -1764,6 +1773,7 @@ dependencies = [ "elsa", "fs4", "futures", + "html-escape", "http", "http-cache-semantics", "indexmap 2.0.1", @@ -2223,6 +2233,15 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "signal-hook-registry" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" +dependencies = [ + "libc", +] + [[package]] name = "similar" version = "2.2.1" @@ -2483,6 +2502,7 @@ dependencies = [ "mio", "num_cpus", "pin-project-lite", + "signal-hook-registry", "socket2 0.5.4", "tokio-macros", "windows-sys 0.48.0", @@ -2689,6 +2709,12 @@ dependencies = [ "serde", ] +[[package]] +name = "utf8-width" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1" + [[package]] name = "utf8parse" version = "0.2.1" diff --git a/crates/rattler_installs_packages/Cargo.toml b/crates/rattler_installs_packages/Cargo.toml index 8849500d..38904f4c 100644 --- a/crates/rattler_installs_packages/Cargo.toml +++ b/crates/rattler_installs_packages/Cargo.toml @@ -24,6 +24,7 @@ data-encoding = "2.4.0" elsa = "1.9.0" fs4 = "0.6.6" futures = "0.3.28" +html-escape = "0.2.13" http = "0.2.9" http-cache-semantics = { version = "1.0.1", default-features = false, features = ["with_serde", "reqwest"] } indexmap = "2.0.1" @@ -45,7 +46,7 @@ smallvec = { version = "1.11.1", features = ["const_generics", "const_new"] } tempfile = "3.8.0" thiserror = "1.0.49" tl = "0.7.7" -tokio = { version = "1.32.0" } +tokio = { version = "1.32.0", features = ["process"] } tokio-util = { version = "0.7.9", features = ["compat"] } tracing = { version = "0.1.37", default-features = false, features = ["attributes"] } url = { version = "2.4.1", features = ["serde"] } diff --git a/crates/rattler_installs_packages/src/env_markers.rs b/crates/rattler_installs_packages/src/env_markers.rs new file mode 100644 index 00000000..3fe3b4c5 --- /dev/null +++ b/crates/rattler_installs_packages/src/env_markers.rs @@ -0,0 +1,123 @@ +use crate::marker::Env; +use serde::{Deserialize, Serialize}; +use std::io; +use std::io::ErrorKind; +use std::path::Path; +use std::process::ExitStatus; +use thiserror::Error; + +/// Describes the environment markers that can be used in dependency specifications to enable or +/// disable certain dependencies based on runtime environment. +/// +/// Exactly the markers defined in this struct must be present during version resolution. Unknown +/// variables should raise an error. +/// +/// Note that the "extra" variable is not defined in this struct because it depends on the wheel +/// that is being inspected. +/// +/// The behavior and the names of the markers are described in PEP 508. +#[derive(Default, Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)] +#[allow(missing_docs)] +pub struct Pep508EnvMakers { + pub os_name: String, + pub sys_platform: String, + pub platform_machine: String, + pub platform_python_implementation: String, + pub platform_release: String, + pub platform_system: String, + pub platform_version: String, + pub python_version: String, + pub python_full_version: String, + pub implementation_name: String, + pub implementation_version: String, +} + +#[derive(Debug, Error)] +pub enum FromPythonError { + #[error("could not find python executable")] + CouldNotFindPythonExecutable, + + #[error(transparent)] + FailedToExecute(#[from] io::Error), + + #[error(transparent)] + FailedToParse(#[from] serde_json::Error), + + #[error("execution failed with exit code {0}")] + FailedToRun(ExitStatus), +} + +impl Pep508EnvMakers { + /// Try to determine the environment markers by executing python. + pub async fn from_env() -> Result { + Self::from_python(Path::new("python")).await + } + + /// Try to determine the environment markers from an existing python executable. The executable + /// is used to run a simple python program to extract the information. + pub async fn from_python(python: &Path) -> Result { + let pep508_bytes = include_str!("pep508.py"); + + // Execute the python executable + let output = match tokio::process::Command::new(python) + .arg("-c") + .arg(pep508_bytes) + .output() + .await + { + Err(e) if e.kind() == ErrorKind::NotFound => { + return Err(FromPythonError::CouldNotFindPythonExecutable) + } + Err(e) => return Err(FromPythonError::FailedToExecute(e)), + Ok(output) => output, + }; + + // Ensure that we have a valid success code + if !output.status.success() { + return Err(FromPythonError::FailedToRun(output.status)); + } + + // Convert the JSON + let stdout = String::from_utf8_lossy(&output.stdout); + Ok(serde_json::from_str(stdout.trim())?) + } +} + +impl Env for Pep508EnvMakers { + fn get_marker_var(&self, var: &str) -> Option<&str> { + match var { + "os_name" => Some(&self.os_name), + "sys_platform" => Some(&self.sys_platform), + "platform_machine" => Some(&self.platform_machine), + "platform_python_implementation" => Some(&self.platform_python_implementation), + "platform_release" => Some(&self.platform_release), + "platform_system" => Some(&self.platform_system), + "platform_version" => Some(&self.platform_version), + "python_version" => Some(&self.python_version), + "python_full_version" => Some(&self.python_full_version), + "implementation_name" => Some(&self.implementation_name), + "implementation_version" => Some(&self.implementation_version), + _ => None, + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[tokio::test] + pub async fn test_from_env() { + match Pep508EnvMakers::from_env().await { + Err(FromPythonError::CouldNotFindPythonExecutable) => { + // This is fine, the test machine does not include a python binary. + } + Err(e) => panic!("{e}"), + Ok(env) => { + println!( + "Found the following environment markers on the current system:\n\n{env:#?}" + ) + } + } + } +} diff --git a/crates/rattler_installs_packages/src/html.rs b/crates/rattler_installs_packages/src/html.rs index adfd5924..bc6d0966 100644 --- a/crates/rattler_installs_packages/src/html.rs +++ b/crates/rattler_installs_packages/src/html.rs @@ -36,7 +36,7 @@ fn into_artifact_info(base: &Url, tag: &HTMLTag) -> Option { let requires_python = attributes .get("data-requires-python") .flatten() - .map(|a| a.as_utf8_str().to_string()); + .map(|a| html_escape::decode_html_entities(&a.as_utf8_str()).into_owned()); let metadata_attr = attributes .get("data-dist-info-metadata") diff --git a/crates/rattler_installs_packages/src/lib.rs b/crates/rattler_installs_packages/src/lib.rs index cfe39ba3..4819713f 100644 --- a/crates/rattler_installs_packages/src/lib.rs +++ b/crates/rattler_installs_packages/src/lib.rs @@ -21,6 +21,7 @@ mod seek_slice; mod specifier; mod utils; +mod env_markers; #[cfg(feature = "resolvo")] mod resolve; @@ -35,6 +36,7 @@ pub use artifact_name::{ ArtifactName, BuildTag, InnerAsArtifactName, ParseArtifactNameError, SDistFormat, SDistName, WheelName, }; +pub use env_markers::Pep508EnvMakers; pub use extra::Extra; pub use package_name::{NormalizedPackageName, PackageName, ParsePackageNameError}; pub use pep440::Version; diff --git a/crates/rattler_installs_packages/src/pep508.py b/crates/rattler_installs_packages/src/pep508.py new file mode 100644 index 00000000..cf9c9830 --- /dev/null +++ b/crates/rattler_installs_packages/src/pep508.py @@ -0,0 +1,40 @@ +# A program that outputs PEP 508 environment markers in a JSON format. Most of the +# implementation has been taken from the example in the PEP. +# +# See: https://peps.python.org/pep-0508/ + +import os +import sys +import platform +import json + + +def format_full_version(info): + version = '{0.major}.{0.minor}.{0.micro}'.format(info) + kind = info.releaselevel + if kind != 'final': + version += kind[0] + str(info.serial) + return version + + +if hasattr(sys, 'implementation'): + implementation_version = format_full_version(sys.implementation.version) + implementation_name = sys.implementation.name +else: + implementation_version = '0' + implementation_name = '' +bindings = { + 'implementation_name': implementation_name, + 'implementation_version': implementation_version, + 'os_name': os.name, + 'platform_machine': platform.machine(), + 'platform_python_implementation': platform.python_implementation(), + 'platform_release': platform.release(), + 'platform_system': platform.system(), + 'platform_version': platform.version(), + 'python_full_version': platform.python_version(), + 'python_version': '.'.join(platform.python_version_tuple()[:2]), + 'sys_platform': sys.platform, +} + +json.dump(bindings, sys.stdout) diff --git a/crates/rattler_installs_packages/src/requirement.rs b/crates/rattler_installs_packages/src/requirement.rs index a776a0eb..b2859f6e 100644 --- a/crates/rattler_installs_packages/src/requirement.rs +++ b/crates/rattler_installs_packages/src/requirement.rs @@ -101,6 +101,12 @@ pub mod marker { fn get_marker_var(&self, var: &str) -> Option<&str>; } + impl Env for &E { + fn get_marker_var(&self, var: &str) -> Option<&str> { + (*self).get_marker_var(var) + } + } + impl + Eq + Hash> Env for HashMap { fn get_marker_var(&self, var: &str) -> Option<&str> { self.get(var).map(|s| s.borrow()) diff --git a/crates/rattler_installs_packages/src/resolve.rs b/crates/rattler_installs_packages/src/resolve.rs index 5f1537a9..6a456024 100644 --- a/crates/rattler_installs_packages/src/resolve.rs +++ b/crates/rattler_installs_packages/src/resolve.rs @@ -6,6 +6,8 @@ //! with [`resolvo`]. //! //! See the `rip_bin` crate for an example of how to use the [`resolve`] function in the: [RIP Repo](https://github.com/prefix-dev/rip) +use crate::env_markers::Pep508EnvMakers; +use crate::marker::Env; use crate::{ CompareOp, Extra, NormalizedPackageName, PackageDb, PackageName, Requirement, Specifier, Specifiers, UserRequirement, Version, Wheel, @@ -101,23 +103,35 @@ impl Display for PypiPackageName { } /// This is a [`DependencyProvider`] for PyPI packages -struct PypiDependencyProvider<'db> { +struct PypiDependencyProvider<'db, E> { pool: Pool, package_db: &'db PackageDb, + env_markers: E, + python_version: Version, } -impl<'db> PypiDependencyProvider<'db> { +impl<'db, E: Env> PypiDependencyProvider<'db, E> { /// Creates a new PypiDependencyProvider /// for use with the [`resolvo`] crate - pub fn new(package_db: &'db PackageDb) -> Self { - Self { + pub fn new(package_db: &'db PackageDb, env_markers: E) -> miette::Result { + let version = env_markers + .get_marker_var("python_full_version") + .ok_or(miette::miette!( + "missing 'python_full_version' environment marker variable" + ))? + .parse() + .map_err(|e| miette::miette!("failed to parse 'python_full_version': {e}"))?; + + Ok(Self { pool: Pool::new(), package_db, - } + env_markers, + python_version: version, + }) } } -impl DependencyProvider for PypiDependencyProvider<'_> { +impl DependencyProvider for PypiDependencyProvider<'_, E> { fn pool(&self) -> &Pool { &self.pool } @@ -161,9 +175,10 @@ impl DependencyProvider for PypiDependencyProvi }; let mut candidates = Candidates::default(); let mut no_wheels = Vec::new(); + let mut incompatible_python = Vec::new(); for (version, artifacts) in artifacts.iter() { // Filter only artifacts we can work with - let available_artifacts = artifacts + let mut artifacts = artifacts .iter() // We are only interested in wheels .filter(|a| a.is::()) @@ -174,12 +189,37 @@ impl DependencyProvider for PypiDependencyProvi .collect::>(); // Check if there are wheel artifacts for this version - if available_artifacts.is_empty() { + if artifacts.is_empty() { // If there are no wheel artifacts, we're just gonna skip it no_wheels.push(version); continue; } + // Filter artifacts that are incompatible with the python version + // BasZ: Does it matter that this changes the order of the artifacts? + let mut idx = 0; + while idx < artifacts.len() { + let artifact = &artifacts[idx]; + if let Some(requires_python) = artifact.requires_python.as_ref() { + let python_specifier: Specifiers = requires_python + .parse() + .expect("invalid requires_python specifier"); + if !python_specifier + .satisfied_by(&self.python_version) + .expect("failed to determine satisfiability of requires_python specifier") + { + artifacts.remove(idx); + continue; + } + } + idx += 1; + } + + if artifacts.is_empty() { + incompatible_python.push(version); + continue; + } + // Filter yanked artifacts let non_yanked_artifacts = artifacts .iter() @@ -189,6 +229,7 @@ impl DependencyProvider for PypiDependencyProvi if non_yanked_artifacts.is_empty() { continue; } + let solvable_id = self .pool .intern_solvable(name, PypiVersion(version.clone())); @@ -204,6 +245,15 @@ impl DependencyProvider for PypiDependencyProvi ); } + if !incompatible_python.is_empty() && package_name.extra().is_none() { + tracing::warn!( + "Not considering {} {} because none of the artifacts are compatible with Python {}", + package_name, + incompatible_python.iter().format(", "), + &self.python_version + ); + } + Some(candidates) } @@ -217,26 +267,10 @@ impl DependencyProvider for PypiDependencyProvi solvable.inner() ); - // TODO: https://peps.python.org/pep-0508/#environment-markers - let env = HashMap::from_iter([ - // TODO: We should add some proper values here. - // See: https://peps.python.org/pep-0508/#environment-markers - ("os_name", ""), - ("sys_platform", ""), - ("platform_machine", ""), - ("platform_python_implementation", ""), - ("platform_release", ""), - ("platform_system", ""), - ("platform_version", ""), - ("python_version", "3.9"), - ("python_full_version", ""), - ("implementation_name", ""), - ("implementation_version", ""), - ( - "extra", - package_name.extra().map(|e| e.as_str()).unwrap_or(""), - ), - ]); + let env = ExtraEnv { + env: &self.env_markers, + extra: package_name.extra().map(|e| e.as_str()).unwrap_or(""), + }; let mut dependencies = Dependencies::default(); // Add a dependency to the base dependency when we have an extra @@ -353,14 +387,31 @@ impl DependencyProvider for PypiDependencyProvi } } +/// Combines an extra env marker with another object that implements [`Env`]. +struct ExtraEnv<'e, E> { + env: &'e E, + extra: &'e str, +} + +impl<'e, E: Env> Env for ExtraEnv<'e, E> { + fn get_marker_var(&self, var: &str) -> Option<&str> { + if var == "extra" { + Some(self.extra) + } else { + self.env.get_marker_var(var) + } + } +} + /// Resolves an environment that contains the given requirements and all dependencies of those /// requirements. pub async fn resolve( package_db: &PackageDb, requirements: impl IntoIterator, -) -> Result)>, String> { + env_markers: &Pep508EnvMakers, +) -> miette::Result)>> { // Construct a provider - let provider = PypiDependencyProvider::new(package_db); + let provider = PypiDependencyProvider::new(package_db, env_markers)?; let pool = provider.pool(); let requirements = requirements.into_iter(); @@ -419,8 +470,9 @@ pub async fn resolve( } Ok(result) } - Err(e) => Err(e - .display_user_friendly(&solver, &DefaultSolvableDisplay) - .to_string()), + Err(e) => Err(miette::miette!( + "{}", + e.display_user_friendly(&solver, &DefaultSolvableDisplay) + )), } } diff --git a/crates/rip_bin/src/main.rs b/crates/rip_bin/src/main.rs index 27f608a9..bdb8c30a 100644 --- a/crates/rip_bin/src/main.rs +++ b/crates/rip_bin/src/main.rs @@ -9,7 +9,7 @@ use tracing_subscriber::filter::Directive; use tracing_subscriber::{fmt, layer::SubscriberExt, util::SubscriberInitExt, EnvFilter}; use url::Url; -use rattler_installs_packages::{normalize_index_url, resolve, UserRequirement}; +use rattler_installs_packages::{normalize_index_url, resolve, Pep508EnvMakers, UserRequirement}; #[derive(Parser)] #[command(author, version, about, long_about = None)] @@ -52,8 +52,15 @@ async fn actual_main() -> miette::Result<()> { ) .into_diagnostic()?; + // Determine the environment markers for the current machine + let env_markers = Pep508EnvMakers::from_env().await.into_diagnostic()?; + tracing::info!( + "extracted the following environment markers from the system python interpreter:\n{:#?}", + env_markers + ); + // Solve the environment - let blueprint = match resolve(&package_db, &args.specs).await { + let blueprint = match resolve(&package_db, &args.specs, &env_markers).await { Ok(blueprint) => blueprint, Err(err) => miette::bail!("Could not solve for the requested requirements:\n{err}"), }; @@ -99,7 +106,8 @@ async fn main() { /// Constructs a default [`EnvFilter`] that is used when the user did not specify a custom RUST_LOG. pub fn get_default_env_filter(verbose: bool) -> EnvFilter { - let mut result = EnvFilter::new("rattler_installs_packages=info"); + let mut result = EnvFilter::new("rip=info") + .add_directive(Directive::from_str("rattler_installs_packages=info").unwrap()); if verbose { result = result.add_directive(Directive::from_str("resolvo=info").unwrap());