diff --git a/cranelift-codegen/meta/src/isa/x86/encodings.rs b/cranelift-codegen/meta/src/isa/x86/encodings.rs index 9ce33817c..a5df9298e 100644 --- a/cranelift-codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift-codegen/meta/src/isa/x86/encodings.rs @@ -270,7 +270,7 @@ impl PerCpuModeEncodings { /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand binding) has already happened fn enc_32_64_maybe_isap( &mut self, - inst: BoundInstruction, + inst: impl Clone + Into, template: Template, isap: Option, ) { @@ -280,7 +280,7 @@ impl PerCpuModeEncodings { fn enc32_maybe_isap( &mut self, - inst: BoundInstruction, + inst: impl Into, template: Template, isap: Option, ) { @@ -292,7 +292,7 @@ impl PerCpuModeEncodings { fn enc64_maybe_isap( &mut self, - inst: BoundInstruction, + inst: impl Into, template: Template, isap: Option, ) { @@ -432,6 +432,7 @@ pub fn define( let uload8_complex = shared.by_name("uload8_complex"); let ushr = shared.by_name("ushr"); let ushr_imm = shared.by_name("ushr_imm"); + let vconst = shared.by_name("vconst"); let x86_bsf = x86.by_name("x86_bsf"); let x86_bsr = x86.by_name("x86_bsr"); let x86_cvtt2si = x86.by_name("x86_cvtt2si"); @@ -578,6 +579,7 @@ pub fn define( let rec_urm = r.template("urm"); let rec_urm_noflags = r.template("urm_noflags"); let rec_urm_noflags_abcd = r.template("urm_noflags_abcd"); + let rec_vconst = r.template("vconst"); // Predicates shorthands. let all_ones_funcaddrs_and_not_is_pic = @@ -1785,6 +1787,18 @@ pub fn define( } } + // SIMD vconst using MOVUPS + // TODO it would be ideal if eventually this became the more efficient MOVAPS but we would have + // to guarantee that the constants are aligned when emitted and there is currently no mechanism + // for that; alternately, constants could be loaded into XMM registers using a sequence like: + // MOVQ + MOVHPD + MOVQ + MOVLPD (this allows the constants to be immediates instead of stored + // in memory) but some performance measurements are needed. + for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8) { + let instruction = vconst.bind_vector_from_lane(ty, sse_vector_size); + let template = rec_vconst.nonrex().opcodes(vec![0x0f, 0x10]); + e.enc_32_64_maybe_isap(instruction, template, None); // from SSE + } + // Reference type instructions // Null references implemented as iconst 0. diff --git a/cranelift-codegen/meta/src/isa/x86/recipes.rs b/cranelift-codegen/meta/src/isa/x86/recipes.rs index 50ac9fca7..d12658080 100644 --- a/cranelift-codegen/meta/src/isa/x86/recipes.rs +++ b/cranelift-codegen/meta/src/isa/x86/recipes.rs @@ -399,6 +399,7 @@ pub fn define<'shared>( let f_unary_ieee32 = formats.by_name("UnaryIeee32"); let f_unary_ieee64 = formats.by_name("UnaryIeee64"); let f_unary_imm = formats.by_name("UnaryImm"); + let f_unary_imm128 = formats.by_name("UnaryImm128"); // Predicates shorthands. let use_sse41 = settings.predicate_by_name("use_sse41"); @@ -2382,6 +2383,19 @@ pub fn define<'shared>( ), ); + recipes.add_template_recipe( + EncodingRecipeBuilder::new("vconst", f_unary_imm128, 5) + .operands_out(vec![fpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(0, out_reg0), sink); + modrm_riprel(out_reg0, sink); + const_disp4(imm, func, sink); + "#, + ), + ); + recipes.add_template_recipe( EncodingRecipeBuilder::new("jt_base", f_branch_table_base, 5) .operands_out(vec![gpr]) diff --git a/cranelift-codegen/meta/src/shared/formats.rs b/cranelift-codegen/meta/src/shared/formats.rs index e6e7c92df..394eb6d58 100644 --- a/cranelift-codegen/meta/src/shared/formats.rs +++ b/cranelift-codegen/meta/src/shared/formats.rs @@ -5,6 +5,7 @@ pub fn define(immediates: &OperandKinds, entities: &OperandKinds) -> FormatRegis // Shorthands for immediates. let uimm8 = immediates.by_name("uimm8"); let uimm32 = immediates.by_name("uimm32"); + let uimm128 = immediates.by_name("uimm128"); let imm64 = immediates.by_name("imm64"); let ieee32 = immediates.by_name("ieee32"); let ieee64 = immediates.by_name("ieee64"); @@ -30,6 +31,7 @@ pub fn define(immediates: &OperandKinds, entities: &OperandKinds) -> FormatRegis registry.insert(Builder::new("Unary").value()); registry.insert(Builder::new("UnaryImm").imm(imm64)); + registry.insert(Builder::new("UnaryImm128").imm(uimm128)); registry.insert(Builder::new("UnaryIeee32").imm(ieee32)); registry.insert(Builder::new("UnaryIeee64").imm(ieee64)); registry.insert(Builder::new("UnaryBool").imm(boolean)); diff --git a/cranelift-codegen/meta/src/shared/immediates.rs b/cranelift-codegen/meta/src/shared/immediates.rs index bee762a9e..5b8baca89 100644 --- a/cranelift-codegen/meta/src/shared/immediates.rs +++ b/cranelift-codegen/meta/src/shared/immediates.rs @@ -29,6 +29,16 @@ pub fn define() -> Vec { .build(); kinds.push(uimm32); + // An unsigned 128-bit immediate integer operand. + // + // This operand is used to pass entire 128-bit vectors as immediates to + // instructions like const. + let uimm128 = Builder::new_imm("uimm128") + .doc("A 128-bit immediate unsigned integer.") + .rust_type("ir::Constant") + .build(); + kinds.push(uimm128); + // A 32-bit immediate signed offset. // // This is used to represent an immediate address offset in load/store diff --git a/cranelift-codegen/meta/src/shared/instructions.rs b/cranelift-codegen/meta/src/shared/instructions.rs index 3d3fda49b..c11016c64 100644 --- a/cranelift-codegen/meta/src/shared/instructions.rs +++ b/cranelift-codegen/meta/src/shared/instructions.rs @@ -30,6 +30,7 @@ pub fn define( let uimm8 = immediates.by_name("uimm8"); let uimm32 = immediates.by_name("uimm32"); let imm64 = immediates.by_name("imm64"); + let uimm128 = immediates.by_name("uimm128"); let offset32 = immediates.by_name("offset32"); let memflags = immediates.by_name("memflags"); let ieee32 = immediates.by_name("ieee32"); @@ -1088,6 +1089,22 @@ pub fn define( .operands_out(vec![a]), ); + let N = &operand_doc("N", uimm128, "The 16 immediate bytes of a 128-bit vector"); + let a = &operand_doc("a", TxN, "A constant vector value"); + + ig.push( + Inst::new( + "vconst", + r#" + SIMD vector constant. + + Construct a vector with the given immediate bytes. + "#, + ) + .operands_in(vec![N]) + .operands_out(vec![a]), + ); + let a = &operand_doc("a", Ref, "A constant reference null value"); ig.push( diff --git a/cranelift-codegen/src/binemit/memorysink.rs b/cranelift-codegen/src/binemit/memorysink.rs index 49c519f6b..ffb8b44b4 100644 --- a/cranelift-codegen/src/binemit/memorysink.rs +++ b/cranelift-codegen/src/binemit/memorysink.rs @@ -16,7 +16,7 @@ use super::{Addend, CodeInfo, CodeOffset, CodeSink, Reloc}; use crate::binemit::stackmap::Stackmap; use crate::ir::entities::Value; -use crate::ir::{ExternalName, Function, JumpTable, SourceLoc, TrapCode}; +use crate::ir::{ConstantOffset, ExternalName, Function, JumpTable, SourceLoc, TrapCode}; use crate::isa::TargetIsa; use core::ptr::write_unaligned; @@ -78,6 +78,9 @@ pub trait RelocSink { /// Add a relocation referencing an external symbol at the current offset. fn reloc_external(&mut self, _: CodeOffset, _: Reloc, _: &ExternalName, _: Addend); + /// Add a relocation referencing a constant. + fn reloc_constant(&mut self, _: CodeOffset, _: Reloc, _: ConstantOffset); + /// Add a relocation referencing a jump table. fn reloc_jt(&mut self, _: CodeOffset, _: Reloc, _: JumpTable); } @@ -132,6 +135,11 @@ impl<'a> CodeSink for MemoryCodeSink<'a> { self.relocs.reloc_external(ofs, rel, name, addend); } + fn reloc_constant(&mut self, rel: Reloc, constant_offset: ConstantOffset) { + let ofs = self.offset(); + self.relocs.reloc_constant(ofs, rel, constant_offset); + } + fn reloc_jt(&mut self, rel: Reloc, jt: JumpTable) { let ofs = self.offset(); self.relocs.reloc_jt(ofs, rel, jt); @@ -169,6 +177,7 @@ pub struct NullRelocSink {} impl RelocSink for NullRelocSink { fn reloc_ebb(&mut self, _: u32, _: Reloc, _: u32) {} fn reloc_external(&mut self, _: u32, _: Reloc, _: &ExternalName, _: i64) {} + fn reloc_constant(&mut self, _: CodeOffset, _: Reloc, _: ConstantOffset) {} fn reloc_jt(&mut self, _: u32, _: Reloc, _: JumpTable) {} } diff --git a/cranelift-codegen/src/binemit/mod.rs b/cranelift-codegen/src/binemit/mod.rs index ce9321ec1..e5d0cbbec 100644 --- a/cranelift-codegen/src/binemit/mod.rs +++ b/cranelift-codegen/src/binemit/mod.rs @@ -16,7 +16,7 @@ pub use self::relaxation::relax_branches; pub use self::shrink::shrink_instructions; pub use self::stackmap::Stackmap; use crate::ir::entities::Value; -use crate::ir::{ExternalName, Function, Inst, JumpTable, SourceLoc, TrapCode}; +use crate::ir::{ConstantOffset, ExternalName, Function, Inst, JumpTable, SourceLoc, TrapCode}; use crate::isa::TargetIsa; pub use crate::regalloc::RegDiversions; use core::fmt; @@ -133,6 +133,9 @@ pub trait CodeSink { /// Add a relocation referencing an external symbol plus the addend at the current offset. fn reloc_external(&mut self, _: Reloc, _: &ExternalName, _: Addend); + /// Add a relocation referencing a jump table. + fn reloc_constant(&mut self, _: Reloc, _: ConstantOffset); + /// Add a relocation referencing a jump table. fn reloc_jt(&mut self, _: Reloc, _: JumpTable); @@ -192,7 +195,13 @@ where } sink.begin_rodata(); - // TODO: No read-only data (constant pools) at this time. + + // output constants + for (_, constant_data) in func.dfg.constants.iter() { + for byte in constant_data.iter() { + sink.put1(*byte) + } + } sink.end_codegen(); } diff --git a/cranelift-codegen/src/binemit/relaxation.rs b/cranelift-codegen/src/binemit/relaxation.rs index 262ef986d..1fdf51ea0 100644 --- a/cranelift-codegen/src/binemit/relaxation.rs +++ b/cranelift-codegen/src/binemit/relaxation.rs @@ -37,6 +37,7 @@ use crate::iterators::IteratorExtras; use crate::regalloc::RegDiversions; use crate::timing; use crate::CodegenResult; +use core::convert::TryFrom; use log::debug; #[cfg(feature = "basic-blocks")] @@ -135,7 +136,11 @@ pub fn relax_branches( let jumptables_size = offset - jumptables; let rodata = offset; - // TODO: Once we have constant pools we'll do some processing here to update offset. + for constant in func.dfg.constants.entries_mut() { + constant.set_offset(offset); + offset += + u32::try_from(constant.len()).expect("Constants must have a length that fits in a u32") + } let rodata_size = offset - rodata; diff --git a/cranelift-codegen/src/ir/constant.rs b/cranelift-codegen/src/ir/constant.rs new file mode 100644 index 000000000..c44dcd337 --- /dev/null +++ b/cranelift-codegen/src/ir/constant.rs @@ -0,0 +1,218 @@ +//! Constants +//! +//! The constant pool defined here allows cranelift to avoid emitting the same constant multiple +//! times. As constants are inserted in the pool, a handle is returned; the handle is a cranelift +//! Entity. Inserting the same data multiple times will always return the same handle. Future work +//! could include: ensuring alignment of constants within the pool, bucketing constants by size. + +use crate::ir::Constant; +use cranelift_entity::EntityRef; +use std::collections::{BTreeMap, HashMap}; +use std::vec::Vec; + +/// This type describes the actual constant data. +pub type ConstantData = Vec; + +/// This type describes an offset in bytes within a constant pool. +pub type ConstantOffset = u32; + +/// Inner type for storing data and offset together in the constant pool. The offset is optional +/// because it must be set relative to the function code size (i.e. constants are emitted after the +/// function body); because the function is not yet compiled when constants are inserted, +/// [set_offset](ir::ConstantPool::set_offset) must be called once a constant's offset from the +/// beginning of the function is known (see [relaxation.rs](binemit::relaxation)). +#[derive(Clone)] +pub struct ConstantPoolEntry { + data: ConstantData, + offset: Option, +} + +impl ConstantPoolEntry { + fn new(data: ConstantData) -> Self { + ConstantPoolEntry { data, offset: None } + } + + /// Return the size of the constant at this entry. + pub fn len(&self) -> usize { + self.data.len() + } + + /// Assign a new offset to the constant at this entry. + pub fn set_offset(&mut self, offset: ConstantOffset) { + self.offset = Some(offset) + } +} + +/// Maintains the mapping between a constant handle (i.e. [Constant](ir::entities::Constant)) and +/// its constant data (i.e. [ConstantData](ir::constant::ConstantData)). +#[derive(Clone)] +pub struct ConstantPool { + /// This mapping maintains the insertion order as long as Constants are created with sequentially increasing integers. + handles_to_values: BTreeMap, + /// This mapping is unordered (no need for lexicographic ordering) but allows us to map constant data back to handles. + values_to_handles: HashMap, +} + +impl ConstantPool { + /// Create a new constant pool instance. + pub fn new() -> Self { + ConstantPool { + handles_to_values: BTreeMap::new(), + values_to_handles: HashMap::new(), + } + } + + /// Empty the constant pool of all data. + pub fn clear(&mut self) { + self.handles_to_values.clear(); + self.values_to_handles.clear(); + } + + /// Insert constant data into the pool, returning a handle for later referencing; when constant + /// data is inserted that is a duplicate of previous constant data, the existing handle will be + /// returned. + pub fn insert(&mut self, constant_value: ConstantData) -> Constant { + if self.values_to_handles.contains_key(&constant_value) { + self.values_to_handles + .get(&constant_value) + .expect("A constant handle must have a corresponding constant value; this is an implementation error in ConstantPool") + .clone() + } else { + let constant_handle = Constant::new(self.len()); + self.values_to_handles + .insert(constant_value.clone(), constant_handle.clone()); + self.handles_to_values.insert( + constant_handle.clone(), + ConstantPoolEntry::new(constant_value), + ); + constant_handle + } + } + + /// Retrieve the constant data given a handle. + pub fn get(&self, constant_handle: Constant) -> &ConstantData { + assert!(self.handles_to_values.contains_key(&constant_handle)); + &self.handles_to_values + .get(&constant_handle) + .expect("A constant handle must have a corresponding constant value; was a constant handle created outside of the pool?") + .data + } + + /// Assign an offset to a given constant, where the offset is the number of bytes from the + /// beginning of the function to the beginning of the constant data inside the pool. + pub fn set_offset(&mut self, constant_handle: Constant, constant_offset: ConstantOffset) { + assert!(self.handles_to_values.contains_key(&constant_handle), "A constant handle must have already been inserted into the pool; perhaps a constant pool was created outside of the pool?"); + self.handles_to_values + .entry(constant_handle) + .and_modify(|e| e.offset = Some(constant_offset)); + } + + /// Retrieve the offset of a given constant, where the offset is the number of bytes from the + /// beginning of the function to the beginning of the constant data inside the pool. + pub fn get_offset(&self, constant_handle: Constant) -> ConstantOffset { + self.handles_to_values.get(&constant_handle) + .expect("A constant handle must have a corresponding constant value; was a constant handle created outside of the pool?") + .offset + .expect("A constant offset has not yet been set; verify that `set_offset` has been called before this point") + } + + /// Iterate over the constants in insertion order. + pub fn iter(&self) -> impl Iterator { + self.handles_to_values.iter().map(|(h, e)| (h, &e.data)) + } + + /// Iterate over mutable entries in the constant pool in insertion order. + pub fn entries_mut(&mut self) -> impl Iterator { + self.handles_to_values.values_mut() + } + + /// Return the number of constants in the pool. + pub fn len(&self) -> usize { + self.handles_to_values.len() + } + + /// Return the combined size of all of the constant values in the pool. + pub fn byte_size(&self) -> usize { + self.values_to_handles.keys().map(|c| c.len()).sum() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn empty() { + let sut = ConstantPool::new(); + assert_eq!(sut.len(), 0); + } + + #[test] + fn insert() { + let mut sut = ConstantPool::new(); + sut.insert(vec![1, 2, 3]); + sut.insert(vec![4, 5, 6]); + assert_eq!(sut.len(), 2); + } + + #[test] + fn insert_duplicate() { + let mut sut = ConstantPool::new(); + let a = sut.insert(vec![1, 2, 3]); + sut.insert(vec![4, 5, 6]); + let b = sut.insert(vec![1, 2, 3]); + assert_eq!(a, b); + } + + #[test] + fn clear() { + let mut sut = ConstantPool::new(); + sut.insert(vec![1, 2, 3]); + assert_eq!(sut.len(), 1); + + sut.clear(); + assert_eq!(sut.len(), 0); + } + + #[test] + fn iteration_order() { + let mut sut = ConstantPool::new(); + sut.insert(vec![1, 2, 3]); + sut.insert(vec![4, 5, 6]); + sut.insert(vec![1, 2, 3]); + let data = sut.iter().map(|(_, v)| v).collect::>(); + assert_eq!(data, vec![&vec![1, 2, 3], &vec![4, 5, 6]]); + } + + #[test] + fn get() { + let mut sut = ConstantPool::new(); + let data = vec![1, 2, 3]; + let handle = sut.insert(data.clone()); + assert_eq!(sut.get(handle), &data); + } + + #[test] + #[should_panic] + fn get_nonexistent_constant() { + let sut = ConstantPool::new(); + let a = Constant::with_number(42).unwrap(); + sut.get(a); // panics, only use constants returned by ConstantPool + } + + #[test] + fn get_offset() { + let mut sut = ConstantPool::new(); + let a = sut.insert(vec![1]); + sut.set_offset(a, 42); + assert_eq!(sut.get_offset(a), 42) + } + + #[test] + #[should_panic] + fn get_nonexistent_offset() { + let mut sut = ConstantPool::new(); + let a = sut.insert(vec![1]); + sut.get_offset(a); // panics, set_offset should have been called + } +} diff --git a/cranelift-codegen/src/ir/dfg.rs b/cranelift-codegen/src/ir/dfg.rs index 1467ab23b..7392e893e 100644 --- a/cranelift-codegen/src/ir/dfg.rs +++ b/cranelift-codegen/src/ir/dfg.rs @@ -5,7 +5,7 @@ use crate::ir; use crate::ir::builder::ReplaceBuilder; use crate::ir::extfunc::ExtFuncData; use crate::ir::instructions::{BranchInfo, CallInfo, InstructionData}; -use crate::ir::types; +use crate::ir::{types, ConstantPool}; use crate::ir::{ Ebb, FuncRef, Inst, SigRef, Signature, Type, Value, ValueLabelAssignments, ValueList, ValueListPool, @@ -67,6 +67,9 @@ pub struct DataFlowGraph { /// Saves Value labels. pub values_labels: Option>, + + /// Constants used within the function + pub constants: ConstantPool, } impl DataFlowGraph { @@ -81,6 +84,7 @@ impl DataFlowGraph { signatures: PrimaryMap::new(), ext_funcs: PrimaryMap::new(), values_labels: None, + constants: ConstantPool::new(), } } @@ -94,6 +98,7 @@ impl DataFlowGraph { self.signatures.clear(); self.ext_funcs.clear(); self.values_labels = None; + self.constants.clear() } /// Get the total number of instructions created in this function, whether they are currently diff --git a/cranelift-codegen/src/ir/entities.rs b/cranelift-codegen/src/ir/entities.rs index 67fd9157e..ab78e0f3a 100644 --- a/cranelift-codegen/src/ir/entities.rs +++ b/cranelift-codegen/src/ir/entities.rs @@ -104,6 +104,24 @@ impl GlobalValue { } } +/// An opaque reference to a constant +#[derive(Copy, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] +pub struct Constant(u32); +entity_impl!(Constant, "const"); + +impl Constant { + /// Create a const reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Constant(n)) + } else { + None + } + } +} + /// An opaque reference to a jump table. #[derive(Copy, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] @@ -195,7 +213,7 @@ impl Table { } } -/// A reference to any of the entities defined in this module. +/// A reference to any of the entities defined in this module that can appear in CLIF IR. #[derive(Copy, Clone, PartialEq, Eq, Hash)] pub enum AnyEntity { /// The whole function. @@ -331,4 +349,10 @@ mod tests { mem::size_of::>() ); } + + #[test] + fn constant_with_number() { + assert_eq!(Constant::with_number(0).unwrap().to_string(), "const0"); + assert_eq!(Constant::with_number(1).unwrap().to_string(), "const1"); + } } diff --git a/cranelift-codegen/src/ir/function.rs b/cranelift-codegen/src/ir/function.rs index 8eea54256..1dcdeee91 100644 --- a/cranelift-codegen/src/ir/function.rs +++ b/cranelift-codegen/src/ir/function.rs @@ -113,6 +113,7 @@ impl Function { self.encodings.clear(); self.locations.clear(); self.offsets.clear(); + self.jt_offsets.clear(); self.srclocs.clear(); } diff --git a/cranelift-codegen/src/ir/immediates.rs b/cranelift-codegen/src/ir/immediates.rs index 4a8d5e1fb..8917d96d4 100644 --- a/cranelift-codegen/src/ir/immediates.rs +++ b/cranelift-codegen/src/ir/immediates.rs @@ -264,6 +264,74 @@ impl FromStr for Uimm32 { } } +/// A 128-bit unsigned integer immediate operand. +/// +/// This is used as an immediate value in SIMD instructions +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)] +pub struct Uimm128(pub [u8; 16]); + +impl Display for Uimm128 { + // print a 128-bit vector in hexadecimal, e.g. 0x000102030405060708090a0b0c0d0e0f + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "0x")?; + let mut anything_written = false; + for &b in self.0.iter().rev() { + if b == 0 && !anything_written { + continue; + } else { + anything_written = true; + write!(f, "{:02x}", b)?; + } + } + if !anything_written { + write!(f, "00")?; + } + Ok(()) + } +} + +impl From for Uimm128 { + fn from(x: u64) -> Self { + let mut buffer: [u8; 16] = [0; 16]; // zero-fill + (0..8).for_each(|byte| buffer[byte] = (x >> (byte as u64 * 8) & 0xff) as u8); // insert each byte from the u64 into v in little-endian order + Uimm128(buffer) + } +} + +impl From<&[u8]> for Uimm128 { + fn from(slice: &[u8]) -> Self { + assert_eq!(slice.len(), 16); + let mut buffer = [0; 16]; + buffer.copy_from_slice(slice); + Uimm128(buffer) + } +} + +impl FromStr for Uimm128 { + type Err = &'static str; + + // parse a 128-bit vector from a hexadecimal string, formatted as above + fn from_str(s: &str) -> Result { + if s.len() <= 2 || &s[0..2] != "0x" { + Err("Expected a hexadecimal string, e.g. 0x1234") + } else if s.len() % 2 != 0 { + Err("Hexadecimal string must have an even number of digits") + } else if s.len() > 34 { + Err("Hexadecimal string has too many digits to fit in a 128-bit vector") + } else { + let mut buffer = [0; 16]; // zero-fill + let start_at = s.len() / 2 - 1; + for i in (2..s.len()).step_by(2) { + let byte = u8::from_str_radix(&s[i..i + 2], 16) + .or_else(|_| Err("Unable to parse as hexadecimal"))?; + let position = start_at - (i / 2); + buffer[position] = byte; + } + Ok(Uimm128(buffer)) + } + } +} + /// 32-bit signed immediate offset. /// /// This is used to encode an immediate offset for load/store instructions. All supported ISAs have @@ -884,6 +952,54 @@ mod tests { parse_err::("0x0_0000_0000_0000_0000", "Too many hexadecimal digits"); } + #[test] + fn format_uimm128() { + assert_eq!(Uimm128::from(0).to_string(), "0x00"); + assert_eq!(Uimm128::from(42).to_string(), "0x2a"); + assert_eq!(Uimm128::from(3735928559).to_string(), "0xdeadbeef"); + assert_eq!( + Uimm128::from(0x0102030405060708).to_string(), + "0x0102030405060708" + ); + } + + #[test] + fn parse_uimm128() { + parse_ok::("0x00", "0x00"); + parse_ok::("0x00000042", "0x42"); + parse_ok::( + "0x0102030405060708090a0b0c0d0e0f", + "0x0102030405060708090a0b0c0d0e0f", + ); + + parse_err::("", "Expected a hexadecimal string, e.g. 0x1234"); + parse_err::("0x", "Expected a hexadecimal string, e.g. 0x1234"); + parse_err::( + "0x042", + "Hexadecimal string must have an even number of digits", + ); + parse_err::( + "0x00000000000000000000000000000000000000000000000000", + "Hexadecimal string has too many digits to fit in a 128-bit vector", + ); + } + + #[test] + fn uimm128_endianness() { + assert_eq!( + "0x42".parse::().unwrap().0, + [0x42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ); + assert_eq!( + "0x00".parse::().unwrap().0, + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ); + assert_eq!( + "0x12345678".parse::().unwrap().0, + [0x78, 0x56, 0x34, 0x12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ) + } + #[test] fn format_offset32() { assert_eq!(Offset32(0).to_string(), ""); diff --git a/cranelift-codegen/src/ir/mod.rs b/cranelift-codegen/src/ir/mod.rs index caa1da831..930f5d496 100644 --- a/cranelift-codegen/src/ir/mod.rs +++ b/cranelift-codegen/src/ir/mod.rs @@ -2,6 +2,7 @@ mod builder; pub mod condcodes; +pub mod constant; pub mod dfg; pub mod entities; mod extfunc; @@ -27,9 +28,10 @@ mod valueloc; use serde::{Deserialize, Serialize}; pub use crate::ir::builder::{InsertBuilder, InstBuilder, InstBuilderBase, InstInserterBase}; +pub use crate::ir::constant::{ConstantData, ConstantOffset, ConstantPool}; pub use crate::ir::dfg::{DataFlowGraph, ValueDef}; pub use crate::ir::entities::{ - Ebb, FuncRef, GlobalValue, Heap, Inst, JumpTable, SigRef, StackSlot, Table, Value, + Constant, Ebb, FuncRef, GlobalValue, Heap, Inst, JumpTable, SigRef, StackSlot, Table, Value, }; pub use crate::ir::extfunc::{ AbiParam, ArgumentExtension, ArgumentPurpose, ExtFuncData, Signature, diff --git a/cranelift-codegen/src/isa/x86/binemit.rs b/cranelift-codegen/src/isa/x86/binemit.rs index 8aa51c07b..afe2c2611 100644 --- a/cranelift-codegen/src/isa/x86/binemit.rs +++ b/cranelift-codegen/src/isa/x86/binemit.rs @@ -4,7 +4,7 @@ use super::enc_tables::{needs_offset, needs_sib_byte}; use super::registers::RU; use crate::binemit::{bad_encoding, CodeSink, Reloc}; use crate::ir::condcodes::{CondCode, FloatCC, IntCC}; -use crate::ir::{Ebb, Function, Inst, InstructionData, JumpTable, Opcode, TrapCode}; +use crate::ir::{Constant, Ebb, Function, Inst, InstructionData, JumpTable, Opcode, TrapCode}; use crate::isa::{RegUnit, StackBase, StackBaseMask, StackRef, TargetIsa}; use crate::regalloc::RegDiversions; @@ -341,3 +341,11 @@ fn jt_disp4(jt: JumpTable, func: &Function, sink: &mut CS sink.put4(delta); sink.reloc_jt(Reloc::X86PCRelRodata4, jt); } + +/// Emit a four-byte displacement to `constant` +fn const_disp4(constant: Constant, func: &Function, sink: &mut CS) { + let offset = func.dfg.constants.get_offset(constant); + let delta = offset.wrapping_sub(sink.offset() + 4); + sink.put4(delta); + sink.reloc_constant(Reloc::X86PCRelRodata4, offset); +} diff --git a/cranelift-codegen/src/verifier/mod.rs b/cranelift-codegen/src/verifier/mod.rs index 84acd0bd0..72fa1f7e1 100644 --- a/cranelift-codegen/src/verifier/mod.rs +++ b/cranelift-codegen/src/verifier/mod.rs @@ -679,6 +679,7 @@ impl<'a> Verifier<'a> { // Exhaustive list so we can't forget to add new formats Unary { .. } | UnaryImm { .. } + | UnaryImm128 { .. } | UnaryIeee32 { .. } | UnaryIeee64 { .. } | UnaryBool { .. } diff --git a/cranelift-codegen/src/write.rs b/cranelift-codegen/src/write.rs index 4fe2b67f3..4b616cab2 100644 --- a/cranelift-codegen/src/write.rs +++ b/cranelift-codegen/src/write.rs @@ -5,6 +5,7 @@ use crate::entity::SecondaryMap; use crate::ir::entities::AnyEntity; +use crate::ir::immediates::Uimm128; use crate::ir::{ DataFlowGraph, DisplayFunctionAnnotations, Ebb, Function, Inst, SigRef, Type, Value, ValueDef, ValueLoc, @@ -487,6 +488,11 @@ pub fn write_operands( match dfg[inst] { Unary { arg, .. } => write!(w, " {}", arg), UnaryImm { imm, .. } => write!(w, " {}", imm), + UnaryImm128 { imm, .. } => { + let data = dfg.constants.get(imm); + let uimm128 = Uimm128::from(&data[..]); + write!(w, " {}", uimm128) + } UnaryIeee32 { imm, .. } => write!(w, " {}", imm), UnaryIeee64 { imm, .. } => write!(w, " {}", imm), UnaryBool { imm, .. } => write!(w, " {}", imm), diff --git a/cranelift-faerie/src/backend.rs b/cranelift-faerie/src/backend.rs index a33955a21..d605757c4 100644 --- a/cranelift-faerie/src/backend.rs +++ b/cranelift-faerie/src/backend.rs @@ -430,6 +430,18 @@ impl<'a> RelocSink for FaerieRelocSink<'a> { } } } + + fn reloc_constant(&mut self, _offset: CodeOffset, reloc: Reloc, _jt: ir::ConstantOffset) { + match reloc { + Reloc::X86PCRelRodata4 => { + // Not necessary to record this unless we are going to split apart code and its + // jumptbl/rodata. + } + _ => { + panic!("Unhandled reloc"); + } + } + } } #[allow(dead_code)] diff --git a/cranelift-filetests/src/lib.rs b/cranelift-filetests/src/lib.rs index 08f02afbf..073fc2449 100644 --- a/cranelift-filetests/src/lib.rs +++ b/cranelift-filetests/src/lib.rs @@ -48,6 +48,7 @@ mod test_postopt; mod test_preopt; mod test_print_cfg; mod test_regalloc; +mod test_rodata; mod test_run; mod test_safepoint; mod test_shrink; @@ -119,6 +120,7 @@ fn new_subtest(parsed: &TestCommand) -> subtest::SubtestResult test_binemit::subtest(parsed), "cat" => test_cat::subtest(parsed), "compile" => test_compile::subtest(parsed), + "rodata" => test_rodata::subtest(parsed), "dce" => test_dce::subtest(parsed), "domtree" => test_domtree::subtest(parsed), "legalizer" => test_legalizer::subtest(parsed), diff --git a/cranelift-filetests/src/test_binemit.rs b/cranelift-filetests/src/test_binemit.rs index f13480666..75bd86008 100644 --- a/cranelift-filetests/src/test_binemit.rs +++ b/cranelift-filetests/src/test_binemit.rs @@ -90,6 +90,10 @@ impl binemit::CodeSink for TextSink { write!(self.text, ") ").unwrap(); } + fn reloc_constant(&mut self, reloc: binemit::Reloc, constant: ir::ConstantOffset) { + write!(self.text, "{}({}) ", reloc, constant).unwrap(); + } + fn reloc_jt(&mut self, reloc: binemit::Reloc, jt: ir::JumpTable) { write!(self.text, "{}({}) ", reloc, jt).unwrap(); } @@ -313,7 +317,13 @@ impl SubTest for TestBinEmit { } sink.begin_rodata(); - // TODO: Read-only (constant pool) data. + + // output constants + for (_, constant_data) in func.dfg.constants.iter() { + for byte in constant_data.iter() { + sink.put1(*byte) + } + } sink.end_codegen(); diff --git a/cranelift-filetests/src/test_compile.rs b/cranelift-filetests/src/test_compile.rs index 706aa0033..10e07440e 100644 --- a/cranelift-filetests/src/test_compile.rs +++ b/cranelift-filetests/src/test_compile.rs @@ -106,6 +106,7 @@ impl binemit::CodeSink for SizeSink { _addend: binemit::Addend, ) { } + fn reloc_constant(&mut self, _: binemit::Reloc, _: ir::ConstantOffset) {} fn reloc_jt(&mut self, _reloc: binemit::Reloc, _jt: ir::JumpTable) {} fn trap(&mut self, _code: ir::TrapCode, _srcloc: ir::SourceLoc) {} fn begin_jumptables(&mut self) {} diff --git a/cranelift-filetests/src/test_rodata.rs b/cranelift-filetests/src/test_rodata.rs new file mode 100644 index 000000000..412fa55ff --- /dev/null +++ b/cranelift-filetests/src/test_rodata.rs @@ -0,0 +1,123 @@ +//! Test command for verifying the rodata emitted after each function +//! +//! The `rodata` test command runs each function through the full code generator pipeline + +use crate::subtest::{run_filecheck, Context, SubTest, SubtestResult}; +use cranelift_codegen; +use cranelift_codegen::binemit::{self, CodeInfo}; +use cranelift_codegen::ir; +use cranelift_codegen::ir::{Function, Value}; +use cranelift_codegen::isa::TargetIsa; +use cranelift_codegen::print_errors::pretty_error; +use cranelift_reader::TestCommand; +use log::info; +use std::borrow::Cow; + +struct TestRodata; + +pub fn subtest(parsed: &TestCommand) -> SubtestResult> { + assert_eq!(parsed.command, "rodata"); + if !parsed.options.is_empty() { + Err(format!("No options allowed on {}", parsed)) + } else { + Ok(Box::new(TestRodata)) + } +} + +impl SubTest for TestRodata { + fn name(&self) -> &'static str { + "rodata" + } + + fn is_mutating(&self) -> bool { + true + } + + fn needs_isa(&self) -> bool { + true + } + + fn run(&self, func: Cow, context: &Context) -> SubtestResult<()> { + let isa = context.isa.expect("rodata needs an ISA"); + let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); + + let CodeInfo { total_size, .. } = comp_ctx + .compile(isa) + .map_err(|e| pretty_error(&comp_ctx.func, context.isa, e))?; + + info!( + "Generated {} bytes of code:\n{}", + total_size, + comp_ctx.func.display(isa) + ); + + // Verify that the returned code size matches the emitted bytes. + let mut sink = RodataSink { + rodata: Vec::new(), + in_rodata: false, + }; + binemit::emit_function( + &comp_ctx.func, + |func, inst, div, sink, isa| isa.emit_inst(func, inst, div, sink), + &mut sink, + isa, + ); + + // Run final code through filecheck. + let text = format!("{:X?}", sink.rodata); + info!("Found rodata: {}", text); + run_filecheck(&text, context) + } +} + +/// Code sink that only captures emitted rodata +struct RodataSink { + in_rodata: bool, + rodata: Vec, +} + +impl binemit::CodeSink for RodataSink { + fn offset(&self) -> binemit::CodeOffset { + 0 + } + + fn put1(&mut self, byte: u8) { + if self.in_rodata { + self.rodata.push(byte); + } + } + + fn put2(&mut self, bytes: u16) { + if self.in_rodata { + self.rodata.extend_from_slice(&bytes.to_be_bytes()); + } + } + + fn put4(&mut self, bytes: u32) { + if self.in_rodata { + self.rodata.extend_from_slice(&bytes.to_be_bytes()); + } + } + + fn put8(&mut self, bytes: u64) { + if self.in_rodata { + self.rodata.extend_from_slice(&bytes.to_be_bytes()); + } + } + + fn reloc_ebb(&mut self, _reloc: binemit::Reloc, _ebb_offset: binemit::CodeOffset) {} + fn reloc_external(&mut self, _: binemit::Reloc, _: &ir::ExternalName, _: binemit::Addend) {} + fn reloc_constant(&mut self, _: binemit::Reloc, _: ir::ConstantOffset) {} + fn reloc_jt(&mut self, _reloc: binemit::Reloc, _jt: ir::JumpTable) {} + fn trap(&mut self, _code: ir::TrapCode, _srcloc: ir::SourceLoc) {} + fn begin_jumptables(&mut self) { + assert!(!self.in_rodata); + } + fn begin_rodata(&mut self) { + self.in_rodata = true; + } + fn end_codegen(&mut self) { + assert!(self.in_rodata); + } + fn add_stackmap(&mut self, _: &[Value], _: &Function, _: &dyn TargetIsa) {} +} diff --git a/cranelift-reader/src/parser.rs b/cranelift-reader/src/parser.rs index 5bcc87de0..d50315170 100644 --- a/cranelift-reader/src/parser.rs +++ b/cranelift-reader/src/parser.rs @@ -9,7 +9,7 @@ use crate::testfile::{Comment, Details, TestFile}; use cranelift_codegen::entity::EntityRef; use cranelift_codegen::ir; use cranelift_codegen::ir::entities::AnyEntity; -use cranelift_codegen::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm32, Uimm64}; +use cranelift_codegen::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm128, Uimm32, Uimm64}; use cranelift_codegen::ir::instructions::{InstructionData, InstructionFormat, VariableArgs}; use cranelift_codegen::ir::types::INVALID; use cranelift_codegen::ir::{ @@ -546,6 +546,23 @@ impl<'a> Parser<'a> { } } + // Match and consume a Uimm128 immediate; due to size restrictions on InstructionData, Uimm128 is boxed in cranelift-codegen/meta/src/shared/immediates.rs + fn match_uimm128(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::Integer(text)) = self.token() { + self.consume(); + // Lexer just gives us raw text that looks like hex code. + // Parse it as an Uimm128 to check for overflow and other issues. + text.parse().map_err(|e| { + self.error(&format!( + "expected u128 hexadecimal immediate, failed to parse: {}", + e + )) + }) + } else { + err!(self.loc, err_msg) + } + } + // Match and consume a Uimm64 immediate. fn match_uimm64(&mut self, err_msg: &str) -> ParseResult { if let Some(Token::Integer(text)) = self.token() { @@ -2109,6 +2126,14 @@ impl<'a> Parser<'a> { opcode, imm: self.match_imm64("expected immediate integer operand")?, }, + InstructionFormat::UnaryImm128 => { + let uimm128 = self.match_uimm128("expected immediate hexadecimal operand")?; + let constant_handle = ctx.function.dfg.constants.insert(uimm128.0.to_vec()); + InstructionData::UnaryImm128 { + opcode, + imm: constant_handle, + } + } InstructionFormat::UnaryIeee32 => InstructionData::UnaryIeee32 { opcode, imm: self.match_ieee32("expected immediate 32-bit float operand")?, diff --git a/cranelift-serde/src/serde_clif_json.rs b/cranelift-serde/src/serde_clif_json.rs index 0a66cd710..90935e923 100644 --- a/cranelift-serde/src/serde_clif_json.rs +++ b/cranelift-serde/src/serde_clif_json.rs @@ -1,3 +1,4 @@ +use cranelift_codegen::ir::immediates::Uimm128; use cranelift_codegen::ir::{Ebb, Function, Inst, InstructionData, Signature}; use serde_derive::{Deserialize, Serialize}; @@ -261,6 +262,14 @@ pub fn get_inst_data(inst_index: Inst, func: &Function) -> SerInstData { opcode: opcode.to_string(), imm: imm.to_string(), }, + InstructionData::UnaryImm128 { opcode, imm } => { + let data = func.dfg.constants.get(imm); + let uimm128 = Uimm128::from(&data[..]); + SerInstData::UnaryImm { + opcode: opcode.to_string(), + imm: uimm128.to_string(), + } + } InstructionData::UnaryIeee32 { opcode, imm } => SerInstData::UnaryIeee32 { opcode: opcode.to_string(), imm: imm.to_string(), diff --git a/cranelift-simplejit/src/backend.rs b/cranelift-simplejit/src/backend.rs index 0a97df8f4..372f112e8 100644 --- a/cranelift-simplejit/src/backend.rs +++ b/cranelift-simplejit/src/backend.rs @@ -566,6 +566,18 @@ impl RelocSink for SimpleJITRelocSink { } } } + + fn reloc_constant(&mut self, _offset: CodeOffset, reloc: Reloc, _constant: ir::ConstantOffset) { + match reloc { + Reloc::X86PCRelRodata4 => { + // Not necessary to record this unless we are going to split apart code and its + // jumptbl/rodata. + } + _ => { + panic!("Unhandled reloc"); + } + } + } } struct SimpleJITStackmapSink { diff --git a/cranelift-wasm/src/code_translator.rs b/cranelift-wasm/src/code_translator.rs index ad8bbda43..b87844bfd 100644 --- a/cranelift-wasm/src/code_translator.rs +++ b/cranelift-wasm/src/code_translator.rs @@ -33,7 +33,7 @@ use crate::wasm_unsupported; use core::{i32, u32}; use cranelift_codegen::ir::condcodes::{FloatCC, IntCC}; use cranelift_codegen::ir::types::*; -use cranelift_codegen::ir::{self, InstBuilder, JumpTableData, MemFlags, ValueLabel}; +use cranelift_codegen::ir::{self, InstBuilder, JumpTableData, MemFlags, Value, ValueLabel}; use cranelift_codegen::packed_option::ReservedValue; use cranelift_frontend::{FunctionBuilder, Variable}; use wasmparser::{MemoryImmediate, Operator}; @@ -922,30 +922,55 @@ pub fn translate_operator( | Operator::TableSize { .. } => { wasm_unsupported!("proposed bulk memory operator {:?}", op); } + Operator::V128Const { value } => { + let handle = builder.func.dfg.constants.insert(value.bytes().to_vec()); + let value = builder.ins().vconst(I8X16, handle); + // the v128.const is typed in CLIF as a I8x16 but raw_bitcast to a different type before use + state.push1(value) + } + Operator::I8x16Splat + | Operator::I16x8Splat + | Operator::I32x4Splat + | Operator::I64x2Splat + | Operator::F32x4Splat + | Operator::F64x2Splat => { + let value_to_splat = state.pop1(); + let ty = type_of(op); + let splatted = builder.ins().splat(ty, value_to_splat); + state.push1(splatted) + } + Operator::I32x4ExtractLane { lane } + | Operator::I64x2ExtractLane { lane } + | Operator::F32x4ExtractLane { lane } + | Operator::F64x2ExtractLane { lane } => { + let vector = optionally_bitcast_vector(state.pop1(), type_of(op), builder); + state.push1(builder.ins().extractlane(vector, lane.clone())) + } + Operator::I8x16ReplaceLane { lane } + | Operator::I16x8ReplaceLane { lane } + | Operator::I32x4ReplaceLane { lane } + | Operator::I64x2ReplaceLane { lane } + | Operator::F32x4ReplaceLane { lane } + | Operator::F64x2ReplaceLane { lane } => { + let (vector, replacement_value) = state.pop2(); + let original_vector_type = builder.func.dfg.value_type(vector); + let vector = optionally_bitcast_vector(vector, type_of(op), builder); + let replaced_vector = builder + .ins() + .insertlane(vector, lane.clone(), replacement_value); + state.push1(optionally_bitcast_vector( + replaced_vector, + original_vector_type, + builder, + )) + } Operator::V128Load { .. } | Operator::V128Store { .. } - | Operator::V128Const { .. } - | Operator::V8x16Shuffle { .. } - | Operator::I8x16Splat | Operator::I8x16ExtractLaneS { .. } | Operator::I8x16ExtractLaneU { .. } - | Operator::I8x16ReplaceLane { .. } - | Operator::I16x8Splat | Operator::I16x8ExtractLaneS { .. } | Operator::I16x8ExtractLaneU { .. } - | Operator::I16x8ReplaceLane { .. } - | Operator::I32x4Splat - | Operator::I32x4ExtractLane { .. } - | Operator::I32x4ReplaceLane { .. } - | Operator::I64x2Splat - | Operator::I64x2ExtractLane { .. } - | Operator::I64x2ReplaceLane { .. } - | Operator::F32x4Splat - | Operator::F32x4ExtractLane { .. } - | Operator::F32x4ReplaceLane { .. } - | Operator::F64x2Splat - | Operator::F64x2ExtractLane { .. } - | Operator::F64x2ReplaceLane { .. } + | Operator::V8x16Shuffle { .. } | Operator::I8x16Eq | Operator::I8x16Ne | Operator::I8x16LtS @@ -1291,3 +1316,174 @@ fn translate_br_if_args( let inputs = state.peekn(return_count); (br_destination, inputs) } + +/// Determine the returned value type of a WebAssembly operator +fn type_of(operator: &Operator) -> Type { + match operator { + Operator::V128Load { .. } + | Operator::V128Store { .. } + | Operator::V128Const { .. } + | Operator::V128Not + | Operator::V128And + | Operator::V128Or + | Operator::V128Xor + | Operator::V128Bitselect => I8X16, // default type representing V128 + + Operator::V8x16Shuffle { .. } + | Operator::I8x16Splat + | Operator::I8x16ExtractLaneS { .. } + | Operator::I8x16ExtractLaneU { .. } + | Operator::I8x16ReplaceLane { .. } + | Operator::I8x16Eq + | Operator::I8x16Ne + | Operator::I8x16LtS + | Operator::I8x16LtU + | Operator::I8x16GtS + | Operator::I8x16GtU + | Operator::I8x16LeS + | Operator::I8x16LeU + | Operator::I8x16GeS + | Operator::I8x16GeU + | Operator::I8x16Neg + | Operator::I8x16AnyTrue + | Operator::I8x16AllTrue + | Operator::I8x16Shl + | Operator::I8x16ShrS + | Operator::I8x16ShrU + | Operator::I8x16Add + | Operator::I8x16AddSaturateS + | Operator::I8x16AddSaturateU + | Operator::I8x16Sub + | Operator::I8x16SubSaturateS + | Operator::I8x16SubSaturateU + | Operator::I8x16Mul => I8X16, + + Operator::I16x8Splat + | Operator::I16x8ExtractLaneS { .. } + | Operator::I16x8ExtractLaneU { .. } + | Operator::I16x8ReplaceLane { .. } + | Operator::I16x8Eq + | Operator::I16x8Ne + | Operator::I16x8LtS + | Operator::I16x8LtU + | Operator::I16x8GtS + | Operator::I16x8GtU + | Operator::I16x8LeS + | Operator::I16x8LeU + | Operator::I16x8GeS + | Operator::I16x8GeU + | Operator::I16x8Neg + | Operator::I16x8AnyTrue + | Operator::I16x8AllTrue + | Operator::I16x8Shl + | Operator::I16x8ShrS + | Operator::I16x8ShrU + | Operator::I16x8Add + | Operator::I16x8AddSaturateS + | Operator::I16x8AddSaturateU + | Operator::I16x8Sub + | Operator::I16x8SubSaturateS + | Operator::I16x8SubSaturateU + | Operator::I16x8Mul => I16X8, + + Operator::I32x4Splat + | Operator::I32x4ExtractLane { .. } + | Operator::I32x4ReplaceLane { .. } + | Operator::I32x4Eq + | Operator::I32x4Ne + | Operator::I32x4LtS + | Operator::I32x4LtU + | Operator::I32x4GtS + | Operator::I32x4GtU + | Operator::I32x4LeS + | Operator::I32x4LeU + | Operator::I32x4GeS + | Operator::I32x4GeU + | Operator::I32x4Neg + | Operator::I32x4AnyTrue + | Operator::I32x4AllTrue + | Operator::I32x4Shl + | Operator::I32x4ShrS + | Operator::I32x4ShrU + | Operator::I32x4Add + | Operator::I32x4Sub + | Operator::I32x4Mul + | Operator::F32x4ConvertSI32x4 + | Operator::F32x4ConvertUI32x4 => I32X4, + + Operator::I64x2Splat + | Operator::I64x2ExtractLane { .. } + | Operator::I64x2ReplaceLane { .. } + | Operator::I64x2Neg + | Operator::I64x2AnyTrue + | Operator::I64x2AllTrue + | Operator::I64x2Shl + | Operator::I64x2ShrS + | Operator::I64x2ShrU + | Operator::I64x2Add + | Operator::I64x2Sub + | Operator::F64x2ConvertSI64x2 + | Operator::F64x2ConvertUI64x2 => I64X2, + + Operator::F32x4Splat + | Operator::F32x4ExtractLane { .. } + | Operator::F32x4ReplaceLane { .. } + | Operator::F32x4Eq + | Operator::F32x4Ne + | Operator::F32x4Lt + | Operator::F32x4Gt + | Operator::F32x4Le + | Operator::F32x4Ge + | Operator::F32x4Abs + | Operator::F32x4Neg + | Operator::F32x4Sqrt + | Operator::F32x4Add + | Operator::F32x4Sub + | Operator::F32x4Mul + | Operator::F32x4Div + | Operator::F32x4Min + | Operator::F32x4Max + | Operator::I32x4TruncSF32x4Sat + | Operator::I32x4TruncUF32x4Sat => F32X4, + + Operator::F64x2Splat + | Operator::F64x2ExtractLane { .. } + | Operator::F64x2ReplaceLane { .. } + | Operator::F64x2Eq + | Operator::F64x2Ne + | Operator::F64x2Lt + | Operator::F64x2Gt + | Operator::F64x2Le + | Operator::F64x2Ge + | Operator::F64x2Abs + | Operator::F64x2Neg + | Operator::F64x2Sqrt + | Operator::F64x2Add + | Operator::F64x2Sub + | Operator::F64x2Mul + | Operator::F64x2Div + | Operator::F64x2Min + | Operator::F64x2Max + | Operator::I64x2TruncSF64x2Sat + | Operator::I64x2TruncUF64x2Sat => F64X2, + + _ => unimplemented!( + "Currently only the SIMD instructions are translated to their return type: {:?}", + operator + ), + } +} + +/// Some SIMD operations only operate on I8X16 in CLIF; this will convert them to that type by +/// adding a raw_bitcast if necessary +fn optionally_bitcast_vector( + value: Value, + needed_type: Type, + builder: &mut FunctionBuilder, +) -> Value { + if builder.func.dfg.value_type(value) != needed_type { + builder.ins().raw_bitcast(needed_type, value) + } else { + value + } +} diff --git a/cranelift-wasm/tests/wasm_testsuite.rs b/cranelift-wasm/tests/wasm_testsuite.rs index f52182828..6b16f6979 100644 --- a/cranelift-wasm/tests/wasm_testsuite.rs +++ b/cranelift-wasm/tests/wasm_testsuite.rs @@ -10,7 +10,7 @@ use std::io::prelude::*; use std::path::Path; use std::str::FromStr; use target_lexicon::triple; -use wabt::wat2wasm; +use wabt::{wat2wasm_with_features, Features}; #[test] fn testsuite() { @@ -61,7 +61,9 @@ fn handle_module(path: &Path, flags: &Flags, return_mode: ReturnMode) { Some("wasm") => read_file(path).expect("error reading wasm file"), Some("wat") => { let wat = read_file(path).expect("error reading wat file"); - match wat2wasm(&wat) { + let mut features = Features::new(); + features.enable_all(); + match wat2wasm_with_features(&wat, features) { Ok(wasm) => wasm, Err(e) => { panic!("error converting wat to wasm: {:?}", e); diff --git a/filetests/isa/x86/compile-vconst.clif b/filetests/isa/x86/compile-vconst.clif new file mode 100644 index 000000000..c64c9fc50 --- /dev/null +++ b/filetests/isa/x86/compile-vconst.clif @@ -0,0 +1,16 @@ +test compile +set enable_simd=true +set probestack_enabled=false +target x86_64 haswell + +; use baldrdash calling convention here for simplicity (avoids prologue, epilogue) +function %test_vconst_i32() -> i32x4 baldrdash_system_v { +ebb0: + v0 = vconst.i32x4 0x1234 + return v0 +} + +; check: ebb0: +; nextln: v0 = vconst.i32x4 0x1234 +; nextln: return v0 +; nextln: } diff --git a/filetests/isa/x86/rodata-vconst.clif b/filetests/isa/x86/rodata-vconst.clif new file mode 100644 index 000000000..fd029b678 --- /dev/null +++ b/filetests/isa/x86/rodata-vconst.clif @@ -0,0 +1,13 @@ +test rodata +set enable_simd=true +set probestack_enabled=false +target x86_64 haswell + +; use baldrdash calling convention here for simplicity (avoids prologue, epilogue) +function %test_vconst_i32() -> i32x4 baldrdash_system_v { +ebb0: + v0 = vconst.i32x4 0x1234 + return v0 +} + +; sameln: [34, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] diff --git a/filetests/isa/x86/vconst.clif b/filetests/isa/x86/vconst.clif new file mode 100644 index 000000000..f7b9ce462 --- /dev/null +++ b/filetests/isa/x86/vconst.clif @@ -0,0 +1,11 @@ +test binemit +set opt_level=best +set enable_simd +target x86_64 + +function %test_vconst_b8() { +ebb0: +[-, %xmm2] v0 = vconst.b8x16 0x00 ; bin: 0f 10 15 00000008 PCRelRodata4(15) +[-, %xmm3] v1 = vconst.b8x16 0x01 ; bin: 0f 10 1d 00000011 PCRelRodata4(31) + return +} diff --git a/src/disasm.rs b/src/disasm.rs index 5bfa14e31..7a98e02b0 100644 --- a/src/disasm.rs +++ b/src/disasm.rs @@ -56,6 +56,22 @@ impl binemit::RelocSink for PrintRelocs { write!(&mut self.text, "reloc_jt: {} {} at {}\n", r, jt, where_).unwrap(); } } + + fn reloc_constant( + &mut self, + code_offset: binemit::CodeOffset, + reloc: binemit::Reloc, + constant: ir::ConstantOffset, + ) { + if self.flag_print { + write!( + &mut self.text, + "reloc_constant: {} {} at {}\n", + reloc, constant, code_offset + ) + .unwrap(); + } + } } pub struct PrintTraps { diff --git a/wasmtests/simd.wat b/wasmtests/simd.wat new file mode 100644 index 000000000..99b7d5c10 --- /dev/null +++ b/wasmtests/simd.wat @@ -0,0 +1,23 @@ +(module + (func $test_splat (result i32) + i32.const 42 + i32x4.splat + i32x4.extract_lane 0 + ) + + (func $test_insert_lane (result i32) + v128.const i64x2 0 0 + i32.const 99 + i32x4.replace_lane 1 + i32x4.extract_lane 1 + ) + + (func $test_const (result i32) + v128.const i32x4 1 2 3 4 + i32x4.extract_lane 3 + ) + + (export "test_splat" (func $test_splat)) + (export "test_insert_lane" (func $test_insert_lane)) + (export "test_const" (func $test_const)) +)