From 003a3f8cd3ea4594e2c2bc89c05562a202868ce0 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 20 Oct 2022 18:59:07 +1100 Subject: [PATCH] Use `br` instead of `switch` in more cases. `codegen_switchint_terminator` already uses `br` instead of `switch` when there is one normal target plus the `otherwise` target. But there's another common case with two normal targets and an `otherwise` target that points to an empty unreachable BB. This comes up a lot when switching on the tags of enums that use niches. The pattern looks like this: ``` bb1: ; preds = %bb6 %3 = load i8, ptr %_2, align 1, !range !9, !noundef !4 %4 = sub i8 %3, 2 %5 = icmp eq i8 %4, 0 %_6 = select i1 %5, i64 0, i64 1 switch i64 %_6, label %bb3 [ i64 0, label %bb4 i64 1, label %bb2 ] bb3: ; preds = %bb1 unreachable ``` This commit adds code to convert the `switch` to a `br`: ``` bb1: ; preds = %bb6 %3 = load i8, ptr %_2, align 1, !range !9, !noundef !4 %4 = sub i8 %3, 2 %5 = icmp eq i8 %4, 0 %_6 = select i1 %5, i64 0, i64 1 %6 = icmp eq i64 %_6, 0 br i1 %6, label %bb4, label %bb2 bb3: ; No predecessors! unreachable ``` This has a surprisingly large effect on compile times, with reductions of 5% on debug builds of some crates. The reduction is all due to LLVM taking less time. Maybe LLVM is just much better at handling `br` than `switch`. The resulting code is still suboptimal. - The `icmp`, `select`, `icmp` sequence is silly, converting an `i1` to an `i64` and back to an `i1`. But with the current code structure it's hard to avoid, and LLVM will easily clean it up, in opt builds at least. - `bb3` is usually now truly dead code (though not always, so it can't be removed universally). --- compiler/rustc_codegen_ssa/src/mir/block.rs | 30 ++++++++++- compiler/rustc_middle/src/mir/mod.rs | 5 ++ src/test/codegen/match-optimized.rs | 60 +++++++++++++++++++++ src/test/codegen/match-unoptimized.rs | 23 ++++++++ src/test/codegen/match.rs | 29 ---------- 5 files changed, 116 insertions(+), 31 deletions(-) create mode 100644 src/test/codegen/match-optimized.rs create mode 100644 src/test/codegen/match-unoptimized.rs delete mode 100644 src/test/codegen/match.rs diff --git a/compiler/rustc_codegen_ssa/src/mir/block.rs b/compiler/rustc_codegen_ssa/src/mir/block.rs index 29b7c9b0a88..0802067cde6 100644 --- a/compiler/rustc_codegen_ssa/src/mir/block.rs +++ b/compiler/rustc_codegen_ssa/src/mir/block.rs @@ -17,6 +17,7 @@ use rustc_middle::mir::{self, AssertKind, SwitchTargets}; use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf}; use rustc_middle::ty::print::{with_no_trimmed_paths, with_no_visible_paths}; use rustc_middle::ty::{self, Instance, Ty, TypeVisitable}; +use rustc_session::config::OptLevel; use rustc_span::source_map::Span; use rustc_span::{sym, Symbol}; use rustc_symbol_mangling::typeid::typeid_for_fnabi; @@ -286,12 +287,13 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { assert_eq!(discr.layout.ty, switch_ty); let mut target_iter = targets.iter(); if target_iter.len() == 1 { - // If there are two targets (one conditional, one fallback), emit br instead of switch + // If there are two targets (one conditional, one fallback), emit `br` instead of + // `switch`. let (test_value, target) = target_iter.next().unwrap(); let lltrue = helper.llbb_with_cleanup(self, target); let llfalse = helper.llbb_with_cleanup(self, targets.otherwise()); if switch_ty == bx.tcx().types.bool { - // Don't generate trivial icmps when switching on bool + // Don't generate trivial icmps when switching on bool. match test_value { 0 => bx.cond_br(discr.immediate(), llfalse, lltrue), 1 => bx.cond_br(discr.immediate(), lltrue, llfalse), @@ -303,6 +305,30 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { let cmp = bx.icmp(IntPredicate::IntEQ, discr.immediate(), llval); bx.cond_br(cmp, lltrue, llfalse); } + } else if self.cx.sess().opts.optimize == OptLevel::No + && target_iter.len() == 2 + && self.mir[targets.otherwise()].is_empty_unreachable() + { + // In unoptimized builds, if there are two normal targets and the `otherwise` target is + // an unreachable BB, emit `br` instead of `switch`. This leaves behind the unreachable + // BB, which will usually (but not always) be dead code. + // + // Why only in unoptimized builds? + // - In unoptimized builds LLVM uses FastISel which does not support switches, so it + // must fall back to the to the slower SelectionDAG isel. Therefore, using `br` gives + // significant compile time speedups for unoptimized builds. + // - In optimized builds the above doesn't hold, and using `br` sometimes results in + // worse generated code because LLVM can no longer tell that the value being switched + // on can only have two values, e.g. 0 and 1. + // + let (test_value1, target1) = target_iter.next().unwrap(); + let (_test_value2, target2) = target_iter.next().unwrap(); + let ll1 = helper.llbb_with_cleanup(self, target1); + let ll2 = helper.llbb_with_cleanup(self, target2); + let switch_llty = bx.immediate_backend_type(bx.layout_of(switch_ty)); + let llval = bx.const_uint_big(switch_llty, test_value1); + let cmp = bx.icmp(IntPredicate::IntEQ, discr.immediate(), llval); + bx.cond_br(cmp, ll1, ll2); } else { bx.switch( discr.immediate(), diff --git a/compiler/rustc_middle/src/mir/mod.rs b/compiler/rustc_middle/src/mir/mod.rs index 79db35a764a..068daaadbda 100644 --- a/compiler/rustc_middle/src/mir/mod.rs +++ b/compiler/rustc_middle/src/mir/mod.rs @@ -1186,6 +1186,11 @@ impl<'tcx> BasicBlockData<'tcx> { pub fn visitable(&self, index: usize) -> &dyn MirVisitable<'tcx> { if index < self.statements.len() { &self.statements[index] } else { &self.terminator } } + + /// Does the block have no statements and an unreachable terminator? + pub fn is_empty_unreachable(&self) -> bool { + self.statements.is_empty() && matches!(self.terminator().kind, TerminatorKind::Unreachable) + } } impl AssertKind { diff --git a/src/test/codegen/match-optimized.rs b/src/test/codegen/match-optimized.rs new file mode 100644 index 00000000000..36402cc7353 --- /dev/null +++ b/src/test/codegen/match-optimized.rs @@ -0,0 +1,60 @@ +// compile-flags: -C no-prepopulate-passes -O + +#![crate_type = "lib"] + +pub enum E { + A, + B, + C, +} + +// CHECK-LABEL: @exhaustive_match +#[no_mangle] +pub fn exhaustive_match(e: E) -> u8 { +// CHECK: switch{{.*}}, label %[[OTHERWISE:[a-zA-Z0-9_]+]] [ +// CHECK-NEXT: i[[TY:[0-9]+]] [[DISCR:[0-9]+]], label %[[A:[a-zA-Z0-9_]+]] +// CHECK-NEXT: i[[TY:[0-9]+]] [[DISCR:[0-9]+]], label %[[B:[a-zA-Z0-9_]+]] +// CHECK-NEXT: i[[TY:[0-9]+]] [[DISCR:[0-9]+]], label %[[C:[a-zA-Z0-9_]+]] +// CHECK-NEXT: ] +// CHECK: [[OTHERWISE]]: +// CHECK-NEXT: unreachable +// +// CHECK: [[A]]: +// CHECK-NEXT: store i8 0, {{i8\*|ptr}} %1, align 1 +// CHECK-NEXT: br label %[[EXIT:[a-zA-Z0-9_]+]] +// CHECK: [[B]]: +// CHECK-NEXT: store i8 1, {{i8\*|ptr}} %1, align 1 +// CHECK-NEXT: br label %[[EXIT]] +// CHECK: [[C]]: +// CHECK-NEXT: store i8 2, {{i8\*|ptr}} %1, align 1 +// CHECK-NEXT: br label %[[EXIT]] + match e { + E::A => 0, + E::B => 1, + E::C => 2, + } +} + +#[repr(u16)] +pub enum E2 { + A = 13, + B = 42, +} + +// For optimized code we produce a switch with an unreachable target as the `otherwise` so LLVM +// knows the possible values. Compare with `src/test/codegen/match-unoptimized.rs`. + +// CHECK-LABEL: @exhaustive_match_2 +#[no_mangle] +pub fn exhaustive_match_2(e: E2) -> u8 { + // CHECK: switch i16 %{{.+}}, label %[[UNREACH:.+]] [ + // CHECK-NEXT: i16 13, + // CHECK-NEXT: i16 42, + // CHECK-NEXT: ] + // CHECK: [[UNREACH]]: + // CHECK-NEXT: unreachable + match e { + E2::A => 0, + E2::B => 1, + } +} diff --git a/src/test/codegen/match-unoptimized.rs b/src/test/codegen/match-unoptimized.rs new file mode 100644 index 00000000000..be40b29e3d3 --- /dev/null +++ b/src/test/codegen/match-unoptimized.rs @@ -0,0 +1,23 @@ +// compile-flags: -C no-prepopulate-passes -Copt-level=0 + +#![crate_type = "lib"] + +#[repr(u16)] +pub enum E2 { + A = 13, + B = 42, +} + +// For unoptimized code we produce a `br` instead of a `switch`. Compare with +// `src/test/codegen/match-optimized.rs` + +// CHECK-LABEL: @exhaustive_match_2 +#[no_mangle] +pub fn exhaustive_match_2(e: E2) -> u8 { + // CHECK: %[[CMP:.+]] = icmp eq i16 %{{.+}}, 13 + // CHECK-NEXT: br i1 %[[CMP:.+]], + match e { + E2::A => 0, + E2::B => 1, + } +} diff --git a/src/test/codegen/match.rs b/src/test/codegen/match.rs deleted file mode 100644 index b203641fddb..00000000000 --- a/src/test/codegen/match.rs +++ /dev/null @@ -1,29 +0,0 @@ -// compile-flags: -C no-prepopulate-passes - -#![crate_type = "lib"] - -pub enum E { - A, - B, -} - -// CHECK-LABEL: @exhaustive_match -#[no_mangle] -pub fn exhaustive_match(e: E) -> u8 { -// CHECK: switch{{.*}}, label %[[OTHERWISE:[a-zA-Z0-9_]+]] [ -// CHECK-NEXT: i[[TY:[0-9]+]] [[DISCR:[0-9]+]], label %[[A:[a-zA-Z0-9_]+]] -// CHECK-NEXT: i[[TY:[0-9]+]] [[DISCR:[0-9]+]], label %[[B:[a-zA-Z0-9_]+]] -// CHECK-NEXT: ] -// CHECK: [[OTHERWISE]]: -// CHECK-NEXT: unreachable -// CHECK: [[A]]: -// CHECK-NEXT: store i8 0, {{i8\*|ptr}} %1, align 1 -// CHECK-NEXT: br label %[[EXIT:[a-zA-Z0-9_]+]] -// CHECK: [[B]]: -// CHECK-NEXT: store i8 1, {{i8\*|ptr}} %1, align 1 -// CHECK-NEXT: br label %[[EXIT:[a-zA-Z0-9_]+]] - match e { - E::A => 0, - E::B => 1, - } -}