diff --git a/src/compiler/c.rs b/src/compiler/c.rs index 180b22ad3..c4f1cfc85 100644 --- a/src/compiler/c.rs +++ b/src/compiler/c.rs @@ -179,6 +179,7 @@ pub trait CCompilerImpl: Clone + fmt::Debug + Send + Sync + 'static { &self, arguments: &[OsString], cwd: &Path, + env_vars: &[(OsString, OsString)], ) -> CompilerArguments; /// Run the C preprocessor with the specified set of arguments. #[allow(clippy::too_many_arguments)] @@ -308,7 +309,7 @@ impl Compiler for CCompiler { cwd: &Path, env_vars: &[(OsString, OsString)], ) -> CompilerArguments + 'static>> { - match self.compiler.parse_arguments(arguments, cwd) { + match self.compiler.parse_arguments(arguments, cwd, env_vars) { CompilerArguments::Ok(mut args) => { // Handle SCCACHE_EXTRAFILES for (k, v) in env_vars.iter() { diff --git a/src/compiler/cicc.rs b/src/compiler/cicc.rs index e6200c1a6..47c8a693a 100644 --- a/src/compiler/cicc.rs +++ b/src/compiler/cicc.rs @@ -56,6 +56,7 @@ impl CCompilerImpl for Cicc { &self, arguments: &[OsString], cwd: &Path, + _env_vars: &[(OsString, OsString)], ) -> CompilerArguments { parse_arguments(arguments, cwd, Language::Ptx, &ARGS[..]) } @@ -116,11 +117,11 @@ where let mut take_next = false; let mut outputs = HashMap::new(); let mut extra_dist_files = vec![]; + let mut gen_module_id_file = false; + let mut module_id_file_name = Option::::None; let mut common_args = vec![]; let mut unhashed_args = vec![]; - let mut gen_module_id_file = false; - let mut module_id_file_name = Option::::None; for arg in ArgsIter::new(args.iter().cloned(), arg_info) { match arg { @@ -142,12 +143,12 @@ where ); continue; } - Some(UnhashedGenModuleIdFileFlag) => { + Some(GenModuleIdFileFlag) => { take_next = false; gen_module_id_file = true; &mut unhashed_args } - Some(UnhashedModuleIdFileName(o)) => { + Some(ModuleIdFileName(o)) => { take_next = false; module_id_file_name = Some(cwd.join(o)); &mut unhashed_args @@ -279,6 +280,21 @@ pub fn generate_compile_commands( out_file.into(), ]); + if log_enabled!(log::Level::Trace) { + trace!( + "[{}]: {} command: {:?}", + out_file.file_name().unwrap().to_string_lossy(), + executable.file_name().unwrap().to_string_lossy(), + [ + &[format!("cd {} &&", cwd.to_string_lossy()).to_string()], + &[executable.to_str().unwrap_or_default().to_string()][..], + &dist::osstrings_to_strings(&arguments).unwrap_or_default()[..] + ] + .concat() + .join(" ") + ); + } + let command = SingleCompileCommand { executable: executable.to_owned(), arguments, @@ -310,12 +326,12 @@ pub fn generate_compile_commands( } ArgData! { pub + GenModuleIdFileFlag, + ModuleIdFileName(PathBuf), Output(PathBuf), - UnhashedOutput(PathBuf), - UnhashedFlag, - UnhashedGenModuleIdFileFlag, - UnhashedModuleIdFileName(PathBuf), PassThrough(OsString), + UnhashedFlag, + UnhashedOutput(PathBuf), } use self::ArgData::*; @@ -323,9 +339,9 @@ use self::ArgData::*; counted_array!(pub static ARGS: [ArgInfo; _] = [ take_arg!("--gen_c_file_name", PathBuf, Separated, UnhashedOutput), take_arg!("--gen_device_file_name", PathBuf, Separated, UnhashedOutput), - flag!("--gen_module_id_file", UnhashedGenModuleIdFileFlag), + flag!("--gen_module_id_file", GenModuleIdFileFlag), take_arg!("--include_file_name", OsString, Separated, PassThrough), - take_arg!("--module_id_file_name", PathBuf, Separated, UnhashedModuleIdFileName), + take_arg!("--module_id_file_name", PathBuf, Separated, ModuleIdFileName), take_arg!("--stub_file_name", PathBuf, Separated, UnhashedOutput), take_arg!("-o", PathBuf, Separated, Output), ]); diff --git a/src/compiler/clang.rs b/src/compiler/clang.rs index 442151207..312f1eeb1 100644 --- a/src/compiler/clang.rs +++ b/src/compiler/clang.rs @@ -94,6 +94,7 @@ impl CCompilerImpl for Clang { &self, arguments: &[OsString], cwd: &Path, + _env_vars: &[(OsString, OsString)], ) -> CompilerArguments { gcc::parse_arguments( arguments, @@ -206,12 +207,15 @@ counted_array!(pub static ARGS: [ArgInfo; _] = [ take_arg!("-MF", PathBuf, CanBeSeparated, DepArgumentPath), take_arg!("-MQ", OsString, CanBeSeparated, DepTarget), take_arg!("-MT", OsString, CanBeSeparated, DepTarget), + flag!("-Wno-unknown-cuda-version", PassThroughFlag), + flag!("-Wno-unused-parameter", PassThroughFlag), take_arg!("-Xclang", OsString, Separated, XClang), take_arg!("-add-plugin", OsString, Separated, PassThrough), take_arg!("-debug-info-kind", OsString, Concatenated('='), PassThrough), take_arg!("-dependency-file", PathBuf, Separated, DepArgumentPath), flag!("-emit-pch", PassThroughFlag), flag!("-fcolor-diagnostics", DiagnosticsColorFlag), + flag!("-fcuda-allow-variadic-functions", PassThroughFlag), flag!("-fcxx-modules", TooHardFlag), take_arg!("-fdebug-compilation-dir", OsString, Separated, PassThrough), take_arg!("-fembed-offload-object", PathBuf, Concatenated('='), ExtraHashFile), @@ -279,7 +283,7 @@ mod test { is_appleclang: false, version: None, } - .parse_arguments(&arguments, &std::env::current_dir().unwrap()) + .parse_arguments(&arguments, &std::env::current_dir().unwrap(), &[]) } macro_rules! parses { diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 35434571c..152f9e972 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -581,7 +581,10 @@ where out_pretty, fmt_duration_as_secs(&duration_compilation) ); - return Ok((CompileResult::CompileFailed, compiler_result)); + return Ok(( + CompileResult::CompileFailed(dist_type, duration_compilation), + compiler_result, + )); } if miss_type == MissType::ForcedNoCache { // Do not cache @@ -590,7 +593,10 @@ where out_pretty, fmt_duration_as_secs(&duration_compilation) ); - return Ok((CompileResult::NotCached, compiler_result)); + return Ok(( + CompileResult::NotCached(dist_type, duration_compilation), + compiler_result, + )); } if cacheable != Cacheable::Yes { // Not cacheable @@ -599,7 +605,10 @@ where out_pretty, fmt_duration_as_secs(&duration_compilation) ); - return Ok((CompileResult::NotCacheable, compiler_result)); + return Ok(( + CompileResult::NotCacheable(dist_type, duration_compilation), + compiler_result, + )); } debug!( "[{}]: Compiled in {}, storing in cache", @@ -1043,11 +1052,11 @@ pub enum CompileResult { Pin> + Send>>, ), /// Not in cache and do not cache the results of the compilation. - NotCached, + NotCached(DistType, Duration), /// Not in cache, but the compilation result was determined to be not cacheable. - NotCacheable, + NotCacheable(DistType, Duration), /// Not in cache, but compilation failed. - CompileFailed, + CompileFailed(DistType, Duration), } /// The state of `--color` options passed to a compiler. @@ -1068,9 +1077,15 @@ impl fmt::Debug for CompileResult { CompileResult::CacheMiss(ref m, ref dt, ref d, _) => { write!(f, "CompileResult::CacheMiss({:?}, {:?}, {:?}, _)", d, m, dt) } - CompileResult::NotCached => write!(f, "CompileResult::NotCached"), - CompileResult::NotCacheable => write!(f, "CompileResult::NotCacheable"), - CompileResult::CompileFailed => write!(f, "CompileResult::CompileFailed"), + CompileResult::NotCached(ref dt, ref d) => { + write!(f, "CompileResult::NotCached({:?}, {:?}_", dt, d) + } + CompileResult::NotCacheable(ref dt, ref d) => { + write!(f, "CompileResult::NotCacheable({:?}, {:?}_", dt, d) + } + CompileResult::CompileFailed(ref dt, ref d) => { + write!(f, "CompileResult::CompileFailed({:?}, {:?})", dt, d) + } } } } @@ -1084,9 +1099,11 @@ impl PartialEq for CompileResult { (CompileResult::CacheMiss(m, dt, _, _), CompileResult::CacheMiss(n, dt2, _, _)) => { m == n && dt == dt2 } - (&CompileResult::NotCached, &CompileResult::NotCached) => true, - (&CompileResult::NotCacheable, &CompileResult::NotCacheable) => true, - (&CompileResult::CompileFailed, &CompileResult::CompileFailed) => true, + (CompileResult::NotCached(dt, _), CompileResult::NotCached(dt2, _)) => dt == dt2, + (CompileResult::NotCacheable(dt, _), CompileResult::NotCacheable(dt2, _)) => dt == dt2, + (CompileResult::CompileFailed(dt, _), CompileResult::CompileFailed(dt2, _)) => { + dt == dt2 + } _ => false, } } diff --git a/src/compiler/diab.rs b/src/compiler/diab.rs index 0a7f32a92..60291b87f 100644 --- a/src/compiler/diab.rs +++ b/src/compiler/diab.rs @@ -56,6 +56,7 @@ impl CCompilerImpl for Diab { &self, arguments: &[OsString], cwd: &Path, + _env_vars: &[(OsString, OsString)], ) -> CompilerArguments { parse_arguments(arguments, cwd, &ARGS[..]) } diff --git a/src/compiler/gcc.rs b/src/compiler/gcc.rs index b854a4fbe..4f85d16d2 100644 --- a/src/compiler/gcc.rs +++ b/src/compiler/gcc.rs @@ -27,7 +27,7 @@ use fs_err as fs; use log::Level::Trace; use std::collections::HashMap; use std::env; -use std::ffi::OsString; +use std::ffi::{OsStr, OsString}; use std::io::Read; use std::path::{Path, PathBuf}; use std::process; @@ -56,6 +56,7 @@ impl CCompilerImpl for Gcc { &self, arguments: &[OsString], cwd: &Path, + _env_vars: &[(OsString, OsString)], ) -> CompilerArguments { parse_arguments(arguments, cwd, &ARGS[..], self.gplusplus, self.kind()) } @@ -856,8 +857,6 @@ where let _ = rewrite_includes_only; } - trace!("compile"); - let out_file = match parsed_args.outputs.get("obj") { Some(obj) => &obj.path, None => return Err(anyhow!("Missing object file output")), @@ -884,6 +883,16 @@ where arguments.push("--".into()); } arguments.push(parsed_args.input.clone().into()); + + trace!( + "compile: {} {}", + executable.to_string_lossy(), + arguments.join(OsStr::new(" ")).to_string_lossy() + ); + + let has_verbose_flag = arguments.contains(&OsString::from("-v")) + || arguments.contains(&OsString::from("--verbose")); + let command = SingleCompileCommand { executable: executable.to_owned(), arguments, @@ -894,56 +903,60 @@ where #[cfg(not(feature = "dist-client"))] let dist_command = None; #[cfg(feature = "dist-client")] - let dist_command = (|| { - // https://gcc.gnu.org/onlinedocs/gcc-4.9.0/gcc/Overall-Options.html - let mut language: Option = - language_to_arg(parsed_args.language).map(|lang| lang.into()); - if !rewrite_includes_only { - match parsed_args.language { - Language::C => language = Some("cpp-output".into()), - Language::GenericHeader | Language::CHeader | Language::CxxHeader => {} - _ => language.as_mut()?.push_str("-cpp-output"), + let dist_command = if has_verbose_flag { + None + } else { + (|| { + // https://gcc.gnu.org/onlinedocs/gcc-4.9.0/gcc/Overall-Options.html + let mut language: Option = + language_to_arg(parsed_args.language).map(|lang| lang.into()); + if !rewrite_includes_only { + match parsed_args.language { + Language::C => language = Some("cpp-output".into()), + Language::GenericHeader | Language::CHeader | Language::CxxHeader => {} + _ => language.as_mut()?.push_str("-cpp-output"), + } } - } - let mut arguments: Vec = vec![]; - // Language needs to be before input - if let Some(lang) = &language { - arguments.extend(vec!["-x".into(), lang.into()]) - } - arguments.extend(vec![ - parsed_args.compilation_flag.clone().into_string().ok()?, - path_transformer.as_dist(&parsed_args.input)?, - "-o".into(), - path_transformer.as_dist(out_file)?, - ]); - if let CCompilerKind::Gcc = kind { - // From https://gcc.gnu.org/onlinedocs/gcc/Preprocessor-Options.html: - // - // -fdirectives-only - // - // [...] - // - // With -fpreprocessed, predefinition of command line and most - // builtin macros is disabled. Macros such as __LINE__, which - // are contextually dependent, are handled normally. This - // enables compilation of files previously preprocessed with -E - // -fdirectives-only. - // - // Which is exactly what we do :-) - if rewrite_includes_only && !parsed_args.suppress_rewrite_includes_only { - arguments.push("-fdirectives-only".into()); + let mut arguments: Vec = vec![]; + // Language needs to be before input + if let Some(lang) = &language { + arguments.extend(vec!["-x".into(), lang.into()]) } - arguments.push("-fpreprocessed".into()); - } - arguments.extend(dist::osstrings_to_strings(&parsed_args.common_args)?); - Some(dist::CompileCommand { - executable: path_transformer.as_dist(executable)?, - arguments, - env_vars: dist::osstring_tuples_to_strings(env_vars)?, - cwd: path_transformer.as_dist_abs(cwd)?, - }) - })(); + arguments.extend(vec![ + parsed_args.compilation_flag.clone().into_string().ok()?, + path_transformer.as_dist(&parsed_args.input)?, + "-o".into(), + path_transformer.as_dist(out_file)?, + ]); + if let CCompilerKind::Gcc = kind { + // From https://gcc.gnu.org/onlinedocs/gcc/Preprocessor-Options.html: + // + // -fdirectives-only + // + // [...] + // + // With -fpreprocessed, predefinition of command line and most + // builtin macros is disabled. Macros such as __LINE__, which + // are contextually dependent, are handled normally. This + // enables compilation of files previously preprocessed with -E + // -fdirectives-only. + // + // Which is exactly what we do :-) + if rewrite_includes_only && !parsed_args.suppress_rewrite_includes_only { + arguments.push("-fdirectives-only".into()); + } + arguments.push("-fpreprocessed".into()); + } + arguments.extend(dist::osstrings_to_strings(&parsed_args.common_args)?); + Some(dist::CompileCommand { + executable: path_transformer.as_dist(executable)?, + arguments, + env_vars: dist::osstring_tuples_to_strings(env_vars)?, + cwd: path_transformer.as_dist_abs(cwd)?, + }) + })() + }; Ok((command, dist_command, Cacheable::Yes)) } diff --git a/src/compiler/msvc.rs b/src/compiler/msvc.rs index 2be9746ea..19eae82a1 100644 --- a/src/compiler/msvc.rs +++ b/src/compiler/msvc.rs @@ -59,6 +59,7 @@ impl CCompilerImpl for Msvc { &self, arguments: &[OsString], cwd: &Path, + _env_vars: &[(OsString, OsString)], ) -> CompilerArguments { parse_arguments(arguments, cwd, self.is_clang) } diff --git a/src/compiler/nvcc.rs b/src/compiler/nvcc.rs index c0d3bc11e..9087dca3d 100644 --- a/src/compiler/nvcc.rs +++ b/src/compiler/nvcc.rs @@ -86,11 +86,16 @@ impl CCompilerImpl for Nvcc { &self, arguments: &[OsString], cwd: &Path, + env_vars: &[(OsString, OsString)], ) -> CompilerArguments { let mut arguments = arguments.to_vec(); - if let Ok(flags) = std::env::var("NVCC_PREPEND_FLAGS") { - arguments = shlex::split(&flags) + if let Some(flags) = env_vars + .iter() + .find(|(k, _)| k == "NVCC_PREPEND_FLAGS") + .and_then(|(_, p)| p.to_str()) + { + arguments = shlex::split(flags) .unwrap_or_default() .iter() .map(|s| s.clone().into_arg_os_string()) @@ -98,9 +103,13 @@ impl CCompilerImpl for Nvcc { .collect::>(); } - if let Ok(flags) = std::env::var("NVCC_APPEND_FLAGS") { + if let Some(flags) = env_vars + .iter() + .find(|(k, _)| k == "NVCC_APPEND_FLAGS") + .and_then(|(_, p)| p.to_str()) + { arguments.extend( - shlex::split(&flags) + shlex::split(flags) .unwrap_or_default() .iter() .map(|s| s.clone().into_arg_os_string()), @@ -205,7 +214,20 @@ impl CCompilerImpl for Nvcc { .collect::>(), ); if log_enabled!(Trace) { - trace!("dependencies command: {:?}", dependency_cmd); + let output_file_name = &parsed_args + .outputs + .get("obj") + .context("Missing object file output") + .unwrap() + .path + .file_name() + .unwrap(); + + trace!( + "[{}]: dependencies command: {:?}", + output_file_name.to_string_lossy(), + dependency_cmd + ); } dependency_cmd }; @@ -223,7 +245,20 @@ impl CCompilerImpl for Nvcc { NvccHostCompiler::Gcc => "-Xcompiler=-P", }); if log_enabled!(Trace) { - trace!("preprocessor command: {:?}", preprocess_cmd); + let output_file_name = &parsed_args + .outputs + .get("obj") + .context("Missing object file output") + .unwrap() + .path + .file_name() + .unwrap(); + + trace!( + "[{}]: preprocessor command: {:?}", + output_file_name.to_string_lossy(), + preprocess_cmd + ); } preprocess_cmd }; @@ -311,10 +346,15 @@ pub fn generate_compile_commands( // Remove all occurrences of `-t=` or `--threads` because it's incompatible with --dryrun // Prefer the last occurrence of `-t=` or `--threads` to match nvcc behavior loop { - if let Some(idx) = unhashed_args.iter().position(|x| x.starts_with("-t=")) { + if let Some(idx) = unhashed_args.iter().position(|x| x.starts_with("-t")) { let arg = unhashed_args.get(idx); if let Some(arg) = arg.and_then(|arg| arg.to_str()) { - if let Ok(arg) = arg[3..arg.len()].parse::() { + let range = if arg.contains('=') { + 3..arg.len() + } else { + 2..arg.len() + }; + if let Ok(arg) = arg[range].parse::() { num_parallel = arg; } } @@ -417,6 +457,8 @@ pub fn generate_compile_commands( env_vars, cwd: cwd.to_owned(), host_compiler: host_compiler.clone(), + // Only here so we can include it in logs + output_file_name: output.file_name().unwrap().to_owned(), }; Ok((command, None, Cacheable::Yes)) @@ -432,6 +474,7 @@ pub struct NvccCompileCommand { pub env_vars: Vec<(OsString, OsString)>, pub cwd: PathBuf, pub host_compiler: NvccHostCompiler, + pub output_file_name: OsString, } #[async_trait] @@ -466,6 +509,7 @@ impl CompileCommandImpl for NvccCompileCommand { env_vars, cwd, host_compiler, + output_file_name, } = self; let nvcc_subcommand_groups = group_nvcc_subcommands_by_compilation_stage( @@ -477,6 +521,7 @@ impl CompileCommandImpl for NvccCompileCommand { keep_dir.clone(), env_vars, host_compiler, + output_file_name, ) .await?; @@ -528,11 +573,10 @@ impl CompileCommandImpl for NvccCompileCommand { nvcc_subcommand_groups[final_assembly_range].chunks(1), ] { for command_groups in command_group_chunks { - let results = - futures::future::join_all(command_groups.iter().map(|commands| { - run_nvcc_subcommands_group(service, creator, cwd, commands) - })) - .await; + let results = futures::future::join_all(command_groups.iter().map(|commands| { + run_nvcc_subcommands_group(service, creator, cwd, commands, output_file_name) + })) + .await; for result in results { output = aggregate_output(output, result.unwrap_or_else(error_to_output)); @@ -573,6 +617,7 @@ async fn group_nvcc_subcommands_by_compilation_stage( keep_dir: Option, env_vars: &[(OsString, OsString)], host_compiler: &NvccHostCompiler, + output_file_name: &OsStr, ) -> Result>> where T: CommandCreatorSync, @@ -619,6 +664,7 @@ where arguments, is_nvcc_exe, host_compiler, + output_file_name, ), // Get the host compile command lines with paths relative to `cwd` and absolute paths to `tmp` select_nvcc_subcommands( @@ -630,6 +676,7 @@ where &[arguments, &["--keep-dir".into(), tmp.into()][..]].concat(), |exe| !is_nvcc_exe(exe), host_compiler, + output_file_name, ), ) .await?; @@ -655,61 +702,114 @@ where // but can optionally be run in parallel to other groups if the user requested via // `nvcc --threads`. - let mut no_more_groups = false; - let mut command_groups: Vec> = vec![]; - let preprocessor_flag = match host_compiler { NvccHostCompiler::Msvc => "-P", _ => "-E", } .to_owned(); - for (_, dir, exe, args) in all_commands { - if log_enabled!(log::Level::Trace) { - trace!( - "transformed nvcc command: {:?}", - [ - &[format!("cd {} &&", dir.to_string_lossy()).to_string()], - &[exe.to_str().unwrap_or_default().to_string()][..], - &args[..] - ] - .concat() - .join(" ") - ); - } + let gen_module_id_file_flag = "--gen_module_id_file".to_owned(); + let mut cuda_front_end_group = Vec::::new(); + let mut final_assembly_group = Vec::::new(); + let mut device_compile_groups = HashMap::>::new(); - let (env_vars, cacheable) = match exe.file_stem().and_then(|s| s.to_str()) { + for (_, dir, exe, args) in all_commands { + let mut args = args.clone(); + + if let (env_vars, cacheable, Some(group)) = match exe.file_stem().and_then(|s| s.to_str()) { + // fatbinary and nvlink are not cacheable + Some("fatbinary") | Some("nvlink") => ( + env_vars.clone(), + Cacheable::No, + Some(&mut final_assembly_group), + ), // cicc and ptxas are cacheable - Some("cicc") | Some("ptxas") => (env_vars.clone(), Cacheable::Yes), - // cudafe++, nvlink, and fatbinary are not cacheable - Some("cudafe++") | Some("nvlink") => (env_vars.clone(), Cacheable::No), - Some("fatbinary") => { - // The fatbinary command represents the start of the last group - if !no_more_groups { - command_groups.push(vec![]); + Some("cicc") => { + let group = device_compile_groups.get_mut(&args[args.len() - 3]); + (env_vars.clone(), Cacheable::Yes, group) + } + Some("ptxas") => { + let group = device_compile_groups.values_mut().find(|cmds| { + if let Some(cicc) = cmds.last() { + if let Some(cicc_out) = cicc.args.last() { + return cicc_out == &args[args.len() - 3]; + } + } + false + }); + (env_vars.clone(), Cacheable::Yes, group) + } + // cudafe++ is not cacheable + Some("cudafe++") => { + // Fix for CTK < 12.0: + // Add `--gen_module_id_file` if the cudafe++ args include `--module_id_file_name` + if !args.contains(&gen_module_id_file_flag) { + if let Some(idx) = args.iter().position(|x| x == "--module_id_file_name") { + // Insert `--gen_module_id_file` just before `--module_id_file_name` to match nvcc behavior + args.splice(idx..idx, [gen_module_id_file_flag.clone()]); + } } - no_more_groups = true; - (env_vars.clone(), Cacheable::No) + ( + env_vars.clone(), + Cacheable::No, + Some(&mut cuda_front_end_group), + ) } _ => { - // All generated host compiler commands include `-D__CUDA_ARCH_LIST__=`. - // If this definition isn't present, this command is either a new binary + // All generated host compiler commands include one of these defines. + // If one of these isn't present, this command is either a new binary // in the CTK that we don't know about, or a line like `rm x_dlink.reg.c` // that nvcc generates in certain cases. - if !args - .iter() - .any(|arg| arg.starts_with("-D__CUDA_ARCH_LIST__")) - { + if !args.iter().any(|arg| { + arg.starts_with("-D__CUDACC__") + || arg.starts_with("-D__NVCC__") + || arg.starts_with("-D__CUDA_ARCH__") + || arg.starts_with("-D__CUDA_ARCH_LIST__") + }) { continue; } if args.contains(&preprocessor_flag) { - // Each preprocessor step represents the start of a new command - // group, unless it comes after a call to fatbinary. - if !no_more_groups { - command_groups.push(vec![]); + // Each preprocessor step represents the start of a new command group + if let Some(out_file) = if cfg!(target_os = "windows") { + args.iter() + .find(|x| x.starts_with("-Fi")) + .and_then(|x| x.strip_prefix("-Fi")) + } else { + args.iter() + .position(|x| x == "-o") + .and_then(|i| args.get(i + 1).map(|o| o.as_str())) + } + .map(PathBuf::from) + .and_then(|out_path| { + out_path + .file_name() + .and_then(|out_name| out_name.to_str()) + .map(|out_name| out_name.to_owned()) + }) + .and_then(|out_name| { + // If the output file ends with... + // * .cpp1.ii - cicc/ptxas input + // * .cpp4.ii - cudafe++ input + if out_name.ends_with(".cpp1.ii") { + Some(out_name.to_owned()) + } else { + None + } + }) { + let new_device_compile_group = vec![]; + device_compile_groups.insert(out_file.clone(), new_device_compile_group); + ( + env_vars.clone(), + Cacheable::No, + device_compile_groups.get_mut(&out_file), + ) + } else { + ( + env_vars.clone(), + Cacheable::No, + Some(&mut cuda_front_end_group), + ) } - // Do not run preprocessor calls through sccache - (env_vars.clone(), Cacheable::No) } else { // Returns Cacheable::Yes to indicate we _do_ want to run this host // compiler call through sccache (because it may be distributed), @@ -730,31 +830,41 @@ where .cloned() .collect::>(), Cacheable::Yes, + Some(&mut final_assembly_group), ) } } - }; + } { + if log_enabled!(log::Level::Trace) { + trace!( + "[{}]: transformed nvcc command: {:?}", + output_file_name.to_string_lossy(), + [ + &[format!("cd {} &&", dir.to_string_lossy()).to_string()], + &[exe.to_str().unwrap_or_default().to_string()][..], + &args[..] + ] + .concat() + .join(" ") + ); + } - // Initialize the first group in case the first command isn't a call to the host preprocessor, - // i.e. `nvcc -o test.o -c test.c` - if command_groups.is_empty() { - command_groups.push(vec![]); + group.push(NvccGeneratedSubcommand { + exe: exe.clone(), + args: args.clone(), + cwd: dir.into(), + env_vars, + cacheable, + }); } - - match command_groups.last_mut() { - None => {} - Some(group) => { - group.push(NvccGeneratedSubcommand { - exe: exe.clone(), - args: args.clone(), - cwd: dir.into(), - env_vars, - cacheable, - }); - } - }; } + let mut command_groups = vec![]; + + command_groups.push(cuda_front_end_group); + command_groups.extend(device_compile_groups.into_values()); + command_groups.push(final_assembly_group); + Ok(command_groups) } @@ -768,6 +878,7 @@ async fn select_nvcc_subcommands( arguments: &[OsString], select_subcommand: F, host_compiler: &NvccHostCompiler, + output_file_name: &OsStr, ) -> Result)>> where F: Fn(&str) -> bool, @@ -775,7 +886,8 @@ where { if log_enabled!(log::Level::Trace) { trace!( - "nvcc dryrun command: {:?}", + "[{}]: nvcc dryrun command: {:?}", + output_file_name.to_string_lossy(), [ &[executable.to_str().unwrap_or_default().to_string()][..], &dist::osstrings_to_strings(arguments).unwrap_or_default()[..], @@ -1007,19 +1119,22 @@ fn remap_generated_filenames( .entry(arg) .or_insert_with_key(|arg| { // Initialize or update the number of files with a given extension: - // compute_70.cudafe1.stub.c -> 0.cudafe1.stub.c - // compute_60.cudafe1.stub.c -> 1.cudafe1.stub.c + // compute_70.cudafe1.stub.c -> x_0.cudafe1.stub.c + // compute_60.cudafe1.stub.c -> x_1.cudafe1.stub.c // etc. let count = ext_counts .entry(extension.into()) .and_modify(|c| *c += 1) .or_insert(0) .to_string(); - // Return `/tmp/dir/{count}.{ext}` as the new name, i.e. `/tmp/dir/0.cudafe1.stub.c` + // Return `/tmp/dir/x_{count}.{ext}` as the new name, i.e. `/tmp/dir/x_0.cudafe1.stub.c` PathBuf::from(arg) .parent() .unwrap_or(Path::new("")) - .join(count + extension) + // Don't use the count as the first character of the file name, because the file name + // may be used as an identifier (via the __FILE__ macro) and identifiers with leading + // digits are not valid in C/C++, i.e. `x_0.cudafe1.cpp` instead of `0.cudafe1.cpp`. + .join("x_".to_owned() + &count + extension) .to_string_lossy() .to_string() }) @@ -1029,8 +1144,22 @@ fn remap_generated_filenames( // If the argument isn't a file name with one of our extensions, // it may _reference_ files we've renamed. Go through and replace // all old names with their new stable names. + // + // Sort by string length descending so we don't accidentally replace + // `zzz.cudafe1.cpp` with the new name for `zzz.cudafe1.c`. + // + // For example, if we have these renames: + // + // compute_70.cudafe1.cpp -> x_0.cudafe1.cpp + // compute_70.cudafe1.c -> x_2.cudafe1.c + // + // `compute_70.cudafe1.cpp` should be replaced with `x_0.cudafe1.cpp`, not `x_2.cudafe1.c` + // let mut arg = arg.clone(); - for (old, new) in old_to_new.iter() { + for (old, new) in old_to_new + .iter() + .sorted_by(|a, b| b.0.len().cmp(&a.0.len())) + { arg = arg.replace(old, new); } arg @@ -1051,6 +1180,7 @@ async fn run_nvcc_subcommands_group( creator: &T, cwd: &Path, commands: &[NvccGeneratedSubcommand], + output_file_name: &OsStr, ) -> Result where T: CommandCreatorSync, @@ -1072,7 +1202,8 @@ where if log_enabled!(log::Level::Trace) { trace!( - "run_commands_sequential cwd={:?}, cmd={:?}", + "[{}]: run_commands_sequential cwd={:?}, cmd={:?}", + output_file_name.to_string_lossy(), cwd, [ vec![exe.clone().into_os_string().into_string().unwrap()], @@ -1218,6 +1349,7 @@ counted_array!(pub static ARGS: [ArgInfo; _] = [ take_arg!("--compiler-bindir", OsString, CanBeSeparated('='), PassThrough), take_arg!("--compiler-options", OsString, CanBeSeparated('='), PreprocessorArgument), flag!("--cubin", DoCompilation), + take_arg!("--default-stream", OsString, CanBeSeparated('='), PassThrough), flag!("--device-c", DoCompilation), flag!("--device-w", DoCompilation), flag!("--expt-extended-lambda", PreprocessorArgumentFlag), @@ -1257,6 +1389,7 @@ counted_array!(pub static ARGS: [ArgInfo; _] = [ take_arg!("-code", OsString, CanBeSeparated('='), PassThrough), flag!("-cubin", DoCompilation), flag!("-dc", DoCompilation), + take_arg!("-default-stream", OsString, CanBeSeparated('='), PassThrough), flag!("-dw", DoCompilation), flag!("-expt-extended-lambda", PreprocessorArgumentFlag), flag!("-expt-relaxed-constexpr", PreprocessorArgumentFlag), @@ -1272,7 +1405,8 @@ counted_array!(pub static ARGS: [ArgInfo; _] = [ flag!("-ptx", DoCompilation), take_arg!("-rdc", OsString, CanBeSeparated('='), PreprocessorArgument), flag!("-save-temps", UnhashedFlag), - take_arg!("-t", OsString, CanBeSeparated('='), Unhashed), + take_arg!("-t", OsString, CanBeSeparated, Unhashed), + take_arg!("-t=", OsString, Concatenated, Unhashed), take_arg!("-x", OsString, CanBeSeparated('='), Language), ]); @@ -1293,7 +1427,7 @@ mod test { host_compiler_version: None, version: None, } - .parse_arguments(&arguments, ".".as_ref()) + .parse_arguments(&arguments, ".".as_ref(), &[]) } fn parse_arguments_msvc(arguments: Vec) -> CompilerArguments { let arguments = arguments.iter().map(OsString::from).collect::>(); @@ -1302,7 +1436,7 @@ mod test { host_compiler_version: None, version: None, } - .parse_arguments(&arguments, ".".as_ref()) + .parse_arguments(&arguments, ".".as_ref(), &[]) } fn parse_arguments_nvc(arguments: Vec) -> CompilerArguments { let arguments = arguments.iter().map(OsString::from).collect::>(); @@ -1311,7 +1445,7 @@ mod test { host_compiler_version: None, version: None, } - .parse_arguments(&arguments, ".".as_ref()) + .parse_arguments(&arguments, ".".as_ref(), &[]) } macro_rules! parses { @@ -1455,9 +1589,10 @@ mod test { #[test] fn test_parse_threads_argument_simple_cu() { let a = parses!( - "-t=1", + "-t1", + "-t=2", "-t", - "2", + "3", "--threads=1", "--threads=2", "-c", @@ -1479,7 +1614,7 @@ mod test { ); assert!(a.preprocessor_args.is_empty()); assert_eq!( - ovec!["-t=1", "-t=2", "--threads", "1", "--threads", "2"], + ovec!["-t1", "-t=2", "-t3", "--threads", "1", "--threads", "2"], a.unhashed_args ); } diff --git a/src/compiler/nvhpc.rs b/src/compiler/nvhpc.rs index 5c1788fe9..507e5d1a3 100644 --- a/src/compiler/nvhpc.rs +++ b/src/compiler/nvhpc.rs @@ -59,6 +59,7 @@ impl CCompilerImpl for Nvhpc { &self, arguments: &[OsString], cwd: &Path, + _env_vars: &[(OsString, OsString)], ) -> CompilerArguments { gcc::parse_arguments( arguments, @@ -238,7 +239,7 @@ mod test { nvcplusplus: false, version: None, } - .parse_arguments(&arguments, ".".as_ref()) + .parse_arguments(&arguments, ".".as_ref(), &[]) } macro_rules! parses { diff --git a/src/compiler/ptxas.rs b/src/compiler/ptxas.rs index 28a135fea..1f46f3c10 100644 --- a/src/compiler/ptxas.rs +++ b/src/compiler/ptxas.rs @@ -57,6 +57,7 @@ impl CCompilerImpl for Ptxas { &self, arguments: &[OsString], cwd: &Path, + _env_vars: &[(OsString, OsString)], ) -> CompilerArguments { cicc::parse_arguments(arguments, cwd, Language::Cubin, &ARGS[..]) } diff --git a/src/compiler/tasking_vx.rs b/src/compiler/tasking_vx.rs index f4f5f970d..ce07857be 100644 --- a/src/compiler/tasking_vx.rs +++ b/src/compiler/tasking_vx.rs @@ -59,6 +59,7 @@ impl CCompilerImpl for TaskingVX { &self, arguments: &[OsString], cwd: &Path, + _env_vars: &[(OsString, OsString)], ) -> CompilerArguments { parse_arguments(arguments, cwd, &ARGS[..]) } diff --git a/src/server.rs b/src/server.rs index 06e53346b..c61e52499 100644 --- a/src/server.rs +++ b/src/server.rs @@ -1358,6 +1358,9 @@ where match result { Ok((compiled, out)) => { + + let mut dist_type = DistType::NoDist; + match compiled { CompileResult::Error => { debug!("compile result: cache error"); @@ -1370,18 +1373,10 @@ where stats.cache_hits.increment(&kind, &lang); stats.cache_read_hit_duration += duration; } - CompileResult::CacheMiss(miss_type, dist_type, duration, future) => { - debug!("compile result: cache miss"); - - match dist_type { - DistType::NoDist => {} - DistType::Ok(id) => { - let server = id.addr().to_string(); - let server_count = stats.dist_compiles.entry(server).or_insert(0); - *server_count += 1; - } - DistType::Error => stats.dist_errors += 1, - } + CompileResult::CacheMiss(miss_type, dt, duration, future) => { + debug!("[{}]: compile result: cache miss", out_pretty); + dist_type = dt; + match miss_type { MissType::Normal => {} MissType::ForcedNoCache => {} @@ -1395,26 +1390,44 @@ where stats.cache_errors.increment(&kind, &lang); } } + stats.compilations += 1; stats.cache_misses.increment(&kind, &lang); stats.compiler_write_duration += duration; debug!("stats after compile result: {stats:?}"); cache_write = Some(future); } - CompileResult::NotCached => { + CompileResult::NotCached(dt, duration) => { debug!("[{}]: compile result: not cached", out_pretty); + dist_type = dt; + stats.compilations += 1; + stats.compiler_write_duration += duration; } - CompileResult::NotCacheable => { - debug!("compile result: not cacheable"); - - stats.cache_misses.increment(&kind, &lang); + CompileResult::NotCacheable(dt, duration) => { + debug!("[{}]: compile result: not cacheable", out_pretty); + dist_type = dt; + stats.compilations += 1; + stats.compiler_write_duration += duration; stats.non_cacheable_compilations += 1; } - CompileResult::CompileFailed => { - debug!("compile result: compile failed"); - + CompileResult::CompileFailed(dt, duration) => { + debug!("[{}]: compile result: compile failed", out_pretty); + dist_type = dt; + stats.compilations += 1; + stats.compiler_write_duration += duration; stats.compile_fails += 1; } }; + + match dist_type { + DistType::NoDist => {} + DistType::Ok(id) => { + let server = id.addr().to_string(); + let server_count = stats.dist_compiles.entry(server).or_insert(0); + *server_count += 1; + } + DistType::Error => stats.dist_errors += 1, + } + // Make sure the write guard has been dropped ASAP. drop(stats); @@ -1579,7 +1592,9 @@ pub struct ServerStats { pub cache_write_duration: Duration, /// The total time spent reading cache hits. pub cache_read_hit_duration: Duration, - /// The total time spent reading cache misses. + /// The number of compilations performed. + pub compilations: u64, + /// The total time spent compiling. pub compiler_write_duration: Duration, /// The count of compilation failures. pub compile_fails: u64, @@ -1632,6 +1647,7 @@ impl Default for ServerStats { cache_writes: u64::default(), cache_write_duration: Duration::new(0, 0), cache_read_hit_duration: Duration::new(0, 0), + compilations: u64::default(), compiler_write_duration: Duration::new(0, 0), compile_fails: u64::default(), not_cached: HashMap::new(), @@ -1720,13 +1736,15 @@ impl ServerStats { set_stat!(stats_vec, self.cache_read_errors, "Cache read errors"); set_stat!(stats_vec, self.forced_recaches, "Forced recaches"); set_stat!(stats_vec, self.cache_write_errors, "Cache write errors"); - set_stat!(stats_vec, self.compile_fails, "Compilation failures"); if advanced { set_compiler_stat!(stats_vec, self.cache_errors, "Cache errors"); } else { set_lang_stat!(stats_vec, self.cache_errors, "Cache errors"); } + set_stat!(stats_vec, self.compilations, "Compilations"); + set_stat!(stats_vec, self.compile_fails, "Compilation failures"); + set_stat!( stats_vec, self.non_cacheable_compilations, @@ -1756,7 +1774,7 @@ impl ServerStats { set_duration_stat!( stats_vec, self.compiler_write_duration, - self.cache_misses.all(), + self.compilations, "Average compiler" ); set_duration_stat!(