Skip to content
This repository was archived by the owner on Jun 26, 2020. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cranelift-codegen/meta/src/isa/x86/encodings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,7 @@ pub fn define(
let trueif = shared.by_name("trueif");
let trunc = shared.by_name("trunc");
let uextend = shared.by_name("uextend");
let uextend_nop = shared.by_name("uextend_nop");
let uload16 = shared.by_name("uload16");
let uload16_complex = shared.by_name("uload16_complex");
let uload32 = shared.by_name("uload32");
Expand Down Expand Up @@ -1406,6 +1407,9 @@ pub fn define(
);
e.enc64(uextend.bind(I64).bind(I32), rec_umr.opcodes(vec![0x89]));

e.enc32_rec(uextend_nop.bind(I64).bind(I32), rec_null, 0);
e.enc64_rec(uextend_nop.bind(I64).bind(I32), rec_null, 0);

// Floating point

// Floating-point constants equal to 0.0 can be encoded using either `xorps` or `xorpd`, for
Expand Down
13 changes: 13 additions & 0 deletions cranelift-codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2804,6 +2804,19 @@ pub fn define(
.constraints(vec![WiderOrEq(IntTo.clone(), Int.clone())]),
);

ig.push(
Inst::new(
"uextend_nop",
r#"
Convert `x` to a larger integer type by zero-extending. Actually generates no code; to be
used whenever it can be proven that the uextend will not modify the source's upper bits.
"#,
)
.operands_in(vec![x])
.operands_out(vec![a])
.constraints(vec![WiderOrEq(IntTo.clone(), Int.clone())]),
);

ig.push(
Inst::new(
"sextend",
Expand Down
10 changes: 10 additions & 0 deletions cranelift-codegen/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use crate::legalize_function;
use crate::licm::do_licm;
use crate::loop_analysis::LoopAnalysis;
use crate::nan_canonicalization::do_nan_canonicalization;
use crate::peephole::do_peephole;
use crate::postopt::do_postopt;
use crate::regalloc;
use crate::result::CodegenResult;
Expand Down Expand Up @@ -147,6 +148,7 @@ impl Context {
self.prologue_epilogue(isa)?;
if isa.flags().opt_level() == OptLevel::Best {
self.shrink_instructions(isa)?;
self.peephole(isa)?;
}
self.relax_branches(isa)
}
Expand Down Expand Up @@ -326,6 +328,14 @@ impl Context {
Ok(())
}

/// Run the platform-specific peephole optimization pass.
pub fn peephole(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
do_peephole(&mut self.func, isa);
self.verify_if(isa)?;
self.verify_locations_if(isa)?;
Ok(())
}

/// Run the branch relaxation pass and return information about the function's code and
/// read-only data.
pub fn relax_branches(&mut self, isa: &dyn TargetIsa) -> CodegenResult<CodeInfo> {
Expand Down
10 changes: 10 additions & 0 deletions cranelift-codegen/src/isa/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,16 @@ pub trait TargetIsa: fmt::Display + Sync {
false
}

/// Does the target implement platform-specific peephole optimizations?
fn has_peephole_optimizations(&self) -> bool {
false
}

/// Run the peephole optimizations for this ISA.
fn run_peephole(&self, _func: &mut ir::Function) {
// Does nothing in the general case.
}

/// Get a data structure describing the registers in this ISA.
fn register_info(&self) -> RegInfo;

Expand Down
9 changes: 9 additions & 0 deletions cranelift-codegen/src/isa/x86/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
mod abi;
mod binemit;
mod enc_tables;
mod peephole;
mod registers;
pub mod settings;

Expand Down Expand Up @@ -136,6 +137,14 @@ impl TargetIsa for Isa {
let _tt = timing::prologue_epilogue();
abi::prologue_epilogue(func, self)
}

fn has_peephole_optimizations(&self) -> bool {
true
}

fn run_peephole(&self, func: &mut ir::Function) {
peephole::run(self, func);
}
}

impl fmt::Display for Isa {
Expand Down
64 changes: 64 additions & 0 deletions cranelift-codegen/src/isa/x86/peephole.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
use crate::cursor::{Cursor, EncCursor};
use crate::ir::types::{I32, I64};
use crate::ir::{Function, Inst, InstBuilder, InstructionData, Opcode, ValueDef};
use crate::isa::TargetIsa;

/// Remove spurious unsigned-extended moves from I32 to I64 when the previous instruction likely
/// already cleared up the upper 32 bits.
///
/// There is an invariant in the Intel x86 architecture on 64 bits that if an instruction writes
/// into a 32-bit register, then the upper 32 bits will be zero-extended. Whenever this happens, it
/// means that a (movl $reg, $reg), as generated by an uextend.i64.i32, will only have one
/// side-effect: clearing up the high 32 bits. In this case, this uextend can be removed.
///
/// Since this is a bit risky to perform this optimization for any instruction/encoding, a list of
/// known instruction/encodings has been recorded here, and it should take care of most of the
/// cases.
fn optimize_uextend(pos: &mut EncCursor, inst: Inst, isa: &dyn TargetIsa) {
use log::warn;
if let InstructionData::Unary {
opcode: Opcode::Uextend,
arg,
} = pos.func.dfg[inst]
{
let to_ty = pos.func.dfg.ctrl_typevar(inst);
if to_ty != I64 {
return;
}

if let ValueDef::Result(def_inst, _) = pos.func.dfg.value_def(arg) {
if pos.func.dfg.ctrl_typevar(def_inst) != I32 {
return;
}
debug_assert_eq!(pos.func.dfg.value_type(arg), I32);

// If the input and output locations differ, it can't be a no-op.
let result = pos.func.dfg.inst_results(inst)[0];
if pos.func.locations[result] != pos.func.locations[arg] {
return;
}

// TODO actually limit this optimization to a set of known instruction / encodings.
let encoding = &pos.func.encodings[def_inst];
warn!("producer: {} {}", def_inst, encoding);

pos.func.dfg.replace(inst).uextend_nop(to_ty, arg);
debug_assert!(pos.func.update_encoding(inst, isa).is_ok());
}
}
}

pub fn run(isa: &TargetIsa, func: &mut Function) {
// At the moment, only the uextend optimization is implemented, which makes sense only on x86
// 64 bits.
if isa.pointer_bits() != 64 {
return;
}

let mut pos = EncCursor::new(func, isa);
while let Some(_ebb) = pos.next_ebb() {
while let Some(inst) = pos.next_inst() {
optimize_uextend(&mut pos, inst, isa);
}
}
}
1 change: 1 addition & 0 deletions cranelift-codegen/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ mod legalizer;
mod licm;
mod nan_canonicalization;
mod partition_slice;
mod peephole;
mod postopt;
mod predicates;
mod ref_slice;
Expand Down
15 changes: 15 additions & 0 deletions cranelift-codegen/src/peephole.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
//! This crate runs peephole optimizations that are platform specific.

use crate::ir::Function;
use crate::isa::TargetIsa;
use crate::timing;

/// Runs the peephole phase on the generated code. Optimizations carried over here are expected to
/// be platform-specific.
pub fn do_peephole(func: &mut Function, isa: &dyn TargetIsa) {
if !isa.has_peephole_optimizations() {
return;
}
let _tt = timing::peephole();
isa.run_peephole(func);
}
1 change: 1 addition & 0 deletions cranelift-codegen/src/timing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ define_passes! {

prologue_epilogue: "Prologue/epilogue insertion",
shrink_instructions: "Instruction encoding shrinking",
peephole: "Peephole optimizations",
relax_branches: "Branch relaxation",
binemit: "Binary machine code emission",
layout_renumber: "Layout full renumbering",
Expand Down