Skip to content

Commit

Permalink
Auto merge of rust-lang#123244 - Mark-Simulacrum:share-inline-never-g…
Browse files Browse the repository at this point in the history
…enerics, r=<try>

Enable -Zshare-generics for inline(never) functions

This avoids inlining cross-crate generic items when possible that are
already marked inline(never), implying that the author is not intending
for the function to be inlined by callers. As such, having a local copy
may make it easier for LLVM to optimize but mostly just adds to binary
bloat and codegen time. In practice our benchmarks indicate this is
indeed a win for larger compilations, where the extra cost in dynamic
linking to these symbols is diminished compared to the advantages in
fewer copies that need optimizing in each binary.

It might also make sense it expand this with other heuristics (e.g.,
`#[cold]`) in the future, but this seems like a good starting point.

FWIW, I expect that doing cleanup in where we make the decision
what should/shouldn't be shared is also a good idea. Way too
much code needed to be tweaked to check this. But I'm hoping
to leave that for a follow-up PR rather than blocking this on it.
  • Loading branch information
bors committed Nov 24, 2024
2 parents 15b663e + 8f190fb commit 65b6baa
Show file tree
Hide file tree
Showing 17 changed files with 98 additions and 26 deletions.
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4137,6 +4137,7 @@ name = "rustc_monomorphize"
version = "0.0.0"
dependencies = [
"rustc_abi",
"rustc_attr",
"rustc_data_structures",
"rustc_errors",
"rustc_fluent_macro",
Expand Down
5 changes: 4 additions & 1 deletion compiler/rustc_codegen_llvm/src/callee.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,10 @@ pub(crate) fn get_fn<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>, instance: Instance<'t

let is_hidden = if is_generic {
// This is a monomorphization of a generic function.
if !cx.tcx.sess.opts.share_generics() {
if !(cx.tcx.sess.opts.share_generics()
|| tcx.codegen_fn_attrs(instance_def_id).inline
== rustc_attr::InlineAttr::Never)
{
// When not sharing generics, all instances are in the same
// crate and have hidden visibility.
true
Expand Down
12 changes: 11 additions & 1 deletion compiler/rustc_codegen_ssa/src/back/symbol_export.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ fn exported_symbols_provider_local(
}));
}

if tcx.sess.opts.share_generics() && tcx.local_crate_exports_generics() {
if tcx.local_crate_exports_generics() {
use rustc_middle::mir::mono::{Linkage, MonoItem, Visibility};
use rustc_middle::ty::InstanceKind;

Expand Down Expand Up @@ -310,6 +310,16 @@ fn exported_symbols_provider_local(
continue;
}

if !tcx.sess.opts.share_generics() {
if tcx.codegen_fn_attrs(mono_item.def_id()).inline == rustc_attr::InlineAttr::Never
{
// this is OK, we explicitly allow sharing inline(never) across crates even
// without share-generics.
} else {
continue;
}
}

match *mono_item {
MonoItem::Fn(Instance { def: InstanceKind::Item(def), args }) => {
if args.non_erasable_generics().next().is_some() {
Expand Down
7 changes: 7 additions & 0 deletions compiler/rustc_middle/src/mir/mono.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,13 @@ impl<'tcx> MonoItem<'tcx> {
return InstantiationMode::GloballyShared { may_conflict: false };
}

if let InlineAttr::Never = tcx.codegen_fn_attrs(instance.def_id()).inline
&& self.is_generic_fn()
{
// Upgrade inline(never) to a globally shared instance.
return InstantiationMode::GloballyShared { may_conflict: true };
}

// At this point we don't have explicit linkage and we're an
// inlined function. If we're inlining into all CGUs then we'll
// be creating a local copy per CGU.
Expand Down
2 changes: 0 additions & 2 deletions compiler/rustc_middle/src/ty/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1950,8 +1950,6 @@ impl<'tcx> TyCtxt<'tcx> {

#[inline]
pub fn local_crate_exports_generics(self) -> bool {
debug_assert!(self.sess.opts.share_generics());

self.crate_types().iter().any(|crate_type| {
match crate_type {
CrateType::Executable
Expand Down
18 changes: 11 additions & 7 deletions compiler/rustc_middle/src/ty/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,19 +190,23 @@ impl<'tcx> Instance<'tcx> {
/// This method already takes into account the global `-Zshare-generics`
/// setting, always returning `None` if `share-generics` is off.
pub fn upstream_monomorphization(&self, tcx: TyCtxt<'tcx>) -> Option<CrateNum> {
// If we are not in share generics mode, we don't link to upstream
// monomorphizations but always instantiate our own internal versions
// instead.
if !tcx.sess.opts.share_generics() {
return None;
}

// If this is an item that is defined in the local crate, no upstream
// crate can know about it/provide a monomorphization.
if self.def_id().is_local() {
return None;
}

// If we are not in share generics mode, we don't link to upstream
// monomorphizations but always instantiate our own internal versions
// instead.
if !tcx.sess.opts.share_generics()
// However, if the def_id is marked inline(never), then it's fine to just reuse the
// upstream monomorphization.
&& tcx.codegen_fn_attrs(self.def_id()).inline != rustc_attr::InlineAttr::Never
{
return None;
}

// If this a non-generic instance, it cannot be a shared monomorphization.
self.args.non_erasable_generics().next()?;

Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_monomorphize/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ edition = "2021"
[dependencies]
# tidy-alphabetical-start
rustc_abi = { path = "../rustc_abi" }
rustc_attr = { path = "../rustc_attr" }
rustc_data_structures = { path = "../rustc_data_structures" }
rustc_errors = { path = "../rustc_errors" }
rustc_fluent_macro = { path = "../rustc_fluent_macro" }
Expand Down
32 changes: 24 additions & 8 deletions compiler/rustc_monomorphize/src/partitioning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,8 @@ where
// available to downstream crates. This depends on whether we are in
// share-generics mode and whether the current crate can even have
// downstream crates.
let export_generics =
cx.tcx.sess.opts.share_generics() && cx.tcx.local_crate_exports_generics();
let can_export_generics = cx.tcx.local_crate_exports_generics();
let always_export_generics = can_export_generics && cx.tcx.sess.opts.share_generics();

let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);
let cgu_name_cache = &mut UnordMap::default();
Expand Down Expand Up @@ -249,7 +249,8 @@ where
cx.tcx,
&mono_item,
&mut can_be_internalized,
export_generics,
can_export_generics,
always_export_generics,
);
if visibility == Visibility::Hidden && can_be_internalized {
internalization_candidates.insert(mono_item);
Expand Down Expand Up @@ -739,12 +740,19 @@ fn mono_item_linkage_and_visibility<'tcx>(
tcx: TyCtxt<'tcx>,
mono_item: &MonoItem<'tcx>,
can_be_internalized: &mut bool,
export_generics: bool,
can_export_generics: bool,
always_export_generics: bool,
) -> (Linkage, Visibility) {
if let Some(explicit_linkage) = mono_item.explicit_linkage(tcx) {
return (explicit_linkage, Visibility::Default);
}
let vis = mono_item_visibility(tcx, mono_item, can_be_internalized, export_generics);
let vis = mono_item_visibility(
tcx,
mono_item,
can_be_internalized,
can_export_generics,
always_export_generics,
);
(Linkage::External, vis)
}

Expand All @@ -767,7 +775,8 @@ fn mono_item_visibility<'tcx>(
tcx: TyCtxt<'tcx>,
mono_item: &MonoItem<'tcx>,
can_be_internalized: &mut bool,
export_generics: bool,
can_export_generics: bool,
always_export_generics: bool,
) -> Visibility {
let instance = match mono_item {
// This is pretty complicated; see below.
Expand Down Expand Up @@ -826,7 +835,11 @@ fn mono_item_visibility<'tcx>(

// Upstream `DefId` instances get different handling than local ones.
let Some(def_id) = def_id.as_local() else {
return if export_generics && is_generic {
return if is_generic
&& (always_export_generics
|| (can_export_generics
&& tcx.codegen_fn_attrs(def_id).inline == rustc_attr::InlineAttr::Never))
{
// If it is an upstream monomorphization and we export generics, we must make
// it available to downstream crates.
*can_be_internalized = false;
Expand All @@ -837,7 +850,10 @@ fn mono_item_visibility<'tcx>(
};

if is_generic {
if export_generics {
if always_export_generics
|| (can_export_generics
&& tcx.codegen_fn_attrs(def_id).inline == rustc_attr::InlineAttr::Never)
{
if tcx.is_unreachable_local_definition(def_id) {
// This instance cannot be used from another crate.
Visibility::Hidden
Expand Down
4 changes: 3 additions & 1 deletion library/alloc/src/raw_vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -757,7 +757,9 @@ impl<A: Allocator> RawVecInner<A> {
}
}

#[inline(never)]
// not marked inline(never) since we want optimizers to be able to observe the specifics of this
// function, see tests/codegen/vec-reserve-extend.rs.
#[cold]
fn finish_grow<A>(
new_layout: Layout,
current_memory: Option<(NonNull<u8>, Layout)>,
Expand Down
16 changes: 16 additions & 0 deletions library/std/src/panicking.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,22 @@ use crate::sys::backtrace;
use crate::sys::stdio::panic_output;
use crate::{fmt, intrinsics, process, thread};

// This forces codegen of the function called by panic!() inside the std crate, rather than in
// downstream crates. Primarily this is useful for rustc's codegen tests, which rely on noticing
// complete removal of panic from generated IR. Since begin_panic is inline(never), it's only
// codegen'd once per crate-graph so this pushes that to std rather than our codegen test crates.
//
// (See https://github.com/rust-lang/rust/pull/123244 for more info on why).
//
// If this is causing problems we can also modify those codegen tests to use a crate type like
// cdylib which doesn't export "Rust" symbols to downstream linkage units.
#[unstable(feature = "libstd_sys_internals", reason = "used by the panic! macro", issue = "none")]
#[doc(hidden)]
#[allow(dead_code)]
#[used]
pub static EMPTY_PANIC: fn(&'static str) -> ! =
begin_panic::<&'static str> as fn(&'static str) -> !;

// Binary interface to the panic runtime that the standard library depends on.
//
// The standard library is tagged with `#![needs_panic_runtime]` (introduced in
Expand Down
11 changes: 11 additions & 0 deletions tests/codegen-units/partitioning/auxiliary/cgu_generic_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,21 @@ pub fn foo<T>(x: T) -> (T, u32, i8) {
#[inline(never)]
fn bar<T>(x: T) -> (T, Struct) {
let _ = not_exported_and_not_generic(0);
exported_and_generic::<u32>(0);
(x, Struct(1))
}

pub static F: fn(u32) -> u32 = exported_and_generic::<u32>;

// These should not contribute to the codegen items of other crates.

// This is generic, but it's only instantiated with a u32 argument and that instantiation is present
// in the local crate (see F above).
#[inline(never)]
pub fn exported_and_generic<T>(x: T) -> T {
x
}

#[inline(never)]
pub fn exported_but_not_generic(x: i32) -> i64 {
x as i64
Expand Down
2 changes: 1 addition & 1 deletion tests/codegen/avr/avr-func-addrspace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ pub extern "C" fn test() {

// A call through the Fn trait must use address space 1.
//
// CHECK: call{{.+}}addrspace(1) void @call_through_fn_trait()
// CHECK: call{{.+}}addrspace(1) void @call_through_fn_trait({{.*}})
call_through_fn_trait(&mut update_bar_value);

// A call through a global variable must use address space 1.
Expand Down
5 changes: 4 additions & 1 deletion tests/codegen/issues/issue-13018.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

// A drop([...].clone()) sequence on an Rc should be a no-op
// In particular, no call to __rust_dealloc should be emitted
#![crate_type = "lib"]
//
// We use a cdylib since it's a leaf unit for Rust purposes, so doesn't codegen -Zshare-generics
// code.
#![crate_type = "cdylib"]
use std::rc::Rc;

pub fn foo(t: &Rc<Vec<usize>>) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
thread 'main' panicked at $DIR/issue-47429-short-backtraces.rs:23:5:
explicit panic
stack backtrace:
0: std::panicking::begin_panic
0: std::panicking::begin_panic::<&str>
1: issue_47429_short_backtraces::main
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.
2 changes: 1 addition & 1 deletion tests/ui/panics/runtime-switch.legacy.run.stderr
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
thread 'main' panicked at $DIR/runtime-switch.rs:26:5:
explicit panic
stack backtrace:
0: std::panicking::begin_panic
0: std::panicking::begin_panic::<&str>
1: runtime_switch::main
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
thread 'main' panicked at $DIR/short-ice-remove-middle-frames-2.rs:56:5:
debug!!!
stack backtrace:
0: std::panicking::begin_panic
0: std::panicking::begin_panic::<&str>
1: short_ice_remove_middle_frames_2::eight
2: short_ice_remove_middle_frames_2::seven::{{closure}}
[... omitted 3 frames ...]
Expand Down
2 changes: 1 addition & 1 deletion tests/ui/panics/short-ice-remove-middle-frames.run.stderr
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
thread 'main' panicked at $DIR/short-ice-remove-middle-frames.rs:52:5:
debug!!!
stack backtrace:
0: std::panicking::begin_panic
0: std::panicking::begin_panic::<&str>
1: short_ice_remove_middle_frames::seven
2: short_ice_remove_middle_frames::sixth
3: short_ice_remove_middle_frames::fifth::{{closure}}
Expand Down

0 comments on commit 65b6baa

Please sign in to comment.