use crate::{
core_arch::{simd::*, simd_llvm::*, x86::*},
intrinsics, mem, ptr,
};
#[cfg(test)]
use stdarch_test::assert_instr;
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(addss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_add_ss(a: __m128, b: __m128) -> __m128 {
addss(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(addps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_add_ps(a: __m128, b: __m128) -> __m128 {
simd_add(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(subss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_sub_ss(a: __m128, b: __m128) -> __m128 {
subss(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(subps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_sub_ps(a: __m128, b: __m128) -> __m128 {
simd_sub(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(mulss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_mul_ss(a: __m128, b: __m128) -> __m128 {
mulss(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(mulps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_mul_ps(a: __m128, b: __m128) -> __m128 {
simd_mul(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(divss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_div_ss(a: __m128, b: __m128) -> __m128 {
divss(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(divps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_div_ps(a: __m128, b: __m128) -> __m128 {
simd_div(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(sqrtss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_sqrt_ss(a: __m128) -> __m128 {
sqrtss(a)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(sqrtps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_sqrt_ps(a: __m128) -> __m128 {
sqrtps(a)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(rcpss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_rcp_ss(a: __m128) -> __m128 {
rcpss(a)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(rcpps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_rcp_ps(a: __m128) -> __m128 {
rcpps(a)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(rsqrtss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_rsqrt_ss(a: __m128) -> __m128 {
rsqrtss(a)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(rsqrtps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_rsqrt_ps(a: __m128) -> __m128 {
rsqrtps(a)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(minss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_min_ss(a: __m128, b: __m128) -> __m128 {
minss(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(minps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_min_ps(a: __m128, b: __m128) -> __m128 {
minps(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(maxss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_max_ss(a: __m128, b: __m128) -> __m128 {
maxss(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(maxps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_max_ps(a: __m128, b: __m128) -> __m128 {
maxps(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(
all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(andps)
)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_and_ps(a: __m128, b: __m128) -> __m128 {
let a: __m128i = mem::transmute(a);
let b: __m128i = mem::transmute(b);
mem::transmute(simd_and(a, b))
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(
all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(andnps)
)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128 {
let a: __m128i = mem::transmute(a);
let b: __m128i = mem::transmute(b);
let mask: __m128i = mem::transmute(i32x4::splat(-1));
mem::transmute(simd_and(simd_xor(mask, a), b))
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(
all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(orps)
)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_or_ps(a: __m128, b: __m128) -> __m128 {
let a: __m128i = mem::transmute(a);
let b: __m128i = mem::transmute(b);
mem::transmute(simd_or(a, b))
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(
all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(xorps)
)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_xor_ps(a: __m128, b: __m128) -> __m128 {
let a: __m128i = mem::transmute(a);
let b: __m128i = mem::transmute(b);
mem::transmute(simd_xor(a, b))
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpeqss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpeq_ss(a: __m128, b: __m128) -> __m128 {
cmpss(a, b, 0)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpltss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmplt_ss(a: __m128, b: __m128) -> __m128 {
cmpss(a, b, 1)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpless))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmple_ss(a: __m128, b: __m128) -> __m128 {
cmpss(a, b, 2)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpltss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpgt_ss(a: __m128, b: __m128) -> __m128 {
simd_shuffle4(a, cmpss(b, a, 1), [4, 1, 2, 3])
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpless))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpge_ss(a: __m128, b: __m128) -> __m128 {
simd_shuffle4(a, cmpss(b, a, 2), [4, 1, 2, 3])
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpneqss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpneq_ss(a: __m128, b: __m128) -> __m128 {
cmpss(a, b, 4)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpnltss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpnlt_ss(a: __m128, b: __m128) -> __m128 {
cmpss(a, b, 5)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpnless))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpnle_ss(a: __m128, b: __m128) -> __m128 {
cmpss(a, b, 6)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpnltss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpngt_ss(a: __m128, b: __m128) -> __m128 {
simd_shuffle4(a, cmpss(b, a, 5), [4, 1, 2, 3])
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpnless))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpnge_ss(a: __m128, b: __m128) -> __m128 {
simd_shuffle4(a, cmpss(b, a, 6), [4, 1, 2, 3])
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpordss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpord_ss(a: __m128, b: __m128) -> __m128 {
cmpss(a, b, 7)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpunordss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpunord_ss(a: __m128, b: __m128) -> __m128 {
cmpss(a, b, 3)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpeqps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpeq_ps(a: __m128, b: __m128) -> __m128 {
cmpps(a, b, 0)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpltps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmplt_ps(a: __m128, b: __m128) -> __m128 {
cmpps(a, b, 1)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpleps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmple_ps(a: __m128, b: __m128) -> __m128 {
cmpps(a, b, 2)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpltps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpgt_ps(a: __m128, b: __m128) -> __m128 {
cmpps(b, a, 1)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpleps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpge_ps(a: __m128, b: __m128) -> __m128 {
cmpps(b, a, 2)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpneqps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpneq_ps(a: __m128, b: __m128) -> __m128 {
cmpps(a, b, 4)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpnltps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpnlt_ps(a: __m128, b: __m128) -> __m128 {
cmpps(a, b, 5)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpnleps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpnle_ps(a: __m128, b: __m128) -> __m128 {
cmpps(a, b, 6)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpnltps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpngt_ps(a: __m128, b: __m128) -> __m128 {
cmpps(b, a, 5)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpnleps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpnge_ps(a: __m128, b: __m128) -> __m128 {
cmpps(b, a, 6)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpordps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpord_ps(a: __m128, b: __m128) -> __m128 {
cmpps(b, a, 7)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpunordps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpunord_ps(a: __m128, b: __m128) -> __m128 {
cmpps(b, a, 3)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(comiss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_comieq_ss(a: __m128, b: __m128) -> i32 {
comieq_ss(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(comiss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_comilt_ss(a: __m128, b: __m128) -> i32 {
comilt_ss(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(comiss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_comile_ss(a: __m128, b: __m128) -> i32 {
comile_ss(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(comiss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_comigt_ss(a: __m128, b: __m128) -> i32 {
comigt_ss(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(comiss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_comige_ss(a: __m128, b: __m128) -> i32 {
comige_ss(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(comiss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_comineq_ss(a: __m128, b: __m128) -> i32 {
comineq_ss(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(ucomiss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_ucomieq_ss(a: __m128, b: __m128) -> i32 {
ucomieq_ss(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(ucomiss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_ucomilt_ss(a: __m128, b: __m128) -> i32 {
ucomilt_ss(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(ucomiss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_ucomile_ss(a: __m128, b: __m128) -> i32 {
ucomile_ss(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(ucomiss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_ucomigt_ss(a: __m128, b: __m128) -> i32 {
ucomigt_ss(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(ucomiss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_ucomige_ss(a: __m128, b: __m128) -> i32 {
ucomige_ss(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(ucomiss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_ucomineq_ss(a: __m128, b: __m128) -> i32 {
ucomineq_ss(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvtss2si))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtss_si32(a: __m128) -> i32 {
cvtss2si(a)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvtss2si))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvt_ss2si(a: __m128) -> i32 {
_mm_cvtss_si32(a)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvttss2si))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvttss_si32(a: __m128) -> i32 {
cvttss2si(a)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvttss2si))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtt_ss2si(a: __m128) -> i32 {
_mm_cvttss_si32(a)
}
#[inline]
#[target_feature(enable = "sse")]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtss_f32(a: __m128) -> f32 {
simd_extract(a, 0)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvtsi2ss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtsi32_ss(a: __m128, b: i32) -> __m128 {
cvtsi2ss(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvtsi2ss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvt_si2ss(a: __m128, b: i32) -> __m128 {
_mm_cvtsi32_ss(a, b)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_set_ss(a: f32) -> __m128 {
__m128(a, 0.0, 0.0, 0.0)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(shufps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_set1_ps(a: f32) -> __m128 {
__m128(a, a, a, a)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(shufps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_set_ps1(a: f32) -> __m128 {
_mm_set1_ps(a)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(unpcklps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 {
__m128(d, c, b, a)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(unpcklps))]
#[cfg_attr(all(test, target_arch = "x86"), assert_instr(movaps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_setr_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 {
__m128(a, b, c, d)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(xorps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_setzero_ps() -> __m128 {
__m128(0.0, 0.0, 0.0, 0.0)
}
#[inline]
#[allow(non_snake_case)]
#[unstable(feature = "stdarch", issue = "27731")]
pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 {
((z << 6) | (y << 4) | (x << 2) | w) as i32
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(shufps, mask = 3))]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_shuffle_ps(a: __m128, b: __m128, mask: i32) -> __m128 {
let mask = (mask & 0xFF) as u8;
macro_rules! shuffle_done {
($x01:expr, $x23:expr, $x45:expr, $x67:expr) => {
simd_shuffle4(a, b, [$x01, $x23, $x45, $x67])
};
}
macro_rules! shuffle_x67 {
($x01:expr, $x23:expr, $x45:expr) => {
match (mask >> 6) & 0b11 {
0b00 => shuffle_done!($x01, $x23, $x45, 4),
0b01 => shuffle_done!($x01, $x23, $x45, 5),
0b10 => shuffle_done!($x01, $x23, $x45, 6),
_ => shuffle_done!($x01, $x23, $x45, 7),
}
};
}
macro_rules! shuffle_x45 {
($x01:expr, $x23:expr) => {
match (mask >> 4) & 0b11 {
0b00 => shuffle_x67!($x01, $x23, 4),
0b01 => shuffle_x67!($x01, $x23, 5),
0b10 => shuffle_x67!($x01, $x23, 6),
_ => shuffle_x67!($x01, $x23, 7),
}
};
}
macro_rules! shuffle_x23 {
($x01:expr) => {
match (mask >> 2) & 0b11 {
0b00 => shuffle_x45!($x01, 0),
0b01 => shuffle_x45!($x01, 1),
0b10 => shuffle_x45!($x01, 2),
_ => shuffle_x45!($x01, 3),
}
};
}
match mask & 0b11 {
0b00 => shuffle_x23!(0),
0b01 => shuffle_x23!(1),
0b10 => shuffle_x23!(2),
_ => shuffle_x23!(3),
}
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(unpckhps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128 {
simd_shuffle4(a, b, [2, 6, 3, 7])
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(unpcklps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128 {
simd_shuffle4(a, b, [0, 4, 1, 5])
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movhlps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 {
simd_shuffle4(a, b, [6, 7, 2, 3])
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 {
simd_shuffle4(a, b, [0, 1, 4, 5])
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(movmskps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
movmskps(a)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movhps))]
pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 {
let q = p as *const f32x2;
let b: f32x2 = *q;
let bb = simd_shuffle4(b, b, [0, 1, 0, 1]);
simd_shuffle4(a, bb, [0, 1, 4, 5])
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movlps))]
pub unsafe fn _mm_loadl_pi(a: __m128, p: *const __m64) -> __m128 {
let q = p as *const f32x2;
let b: f32x2 = *q;
let bb = simd_shuffle4(b, b, [0, 1, 0, 1]);
simd_shuffle4(a, bb, [4, 5, 2, 3])
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_load_ss(p: *const f32) -> __m128 {
__m128(*p, 0.0, 0.0, 0.0)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_load1_ps(p: *const f32) -> __m128 {
let a = *p;
__m128(a, a, a, a)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_load_ps1(p: *const f32) -> __m128 {
_mm_load1_ps(p)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movaps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn _mm_load_ps(p: *const f32) -> __m128 {
*(p as *const __m128)
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movups))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 {
let mut dst = _mm_undefined_ps();
ptr::copy_nonoverlapping(
p as *const u8,
&mut dst as *mut __m128 as *mut u8,
mem::size_of::<__m128>(),
);
dst
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movaps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_loadr_ps(p: *const f32) -> __m128 {
let a = _mm_load_ps(p);
simd_shuffle4(a, a, [3, 2, 1, 0])
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(
all(test, any(target_arch = "x86_64", target_feature = "sse2"),
not(target_os = "windows")),
assert_instr(movhps)
)]
pub unsafe fn _mm_storeh_pi(p: *mut __m64, a: __m128) {
#[cfg(target_arch = "x86")]
{
let a64: u64x2 = mem::transmute(a);
let a_hi = a64.extract(1);
*(p as *mut u64) = a_hi;
}
#[cfg(target_arch = "x86_64")]
{
let a64: f64x2 = mem::transmute(a);
let a_hi = a64.extract(1);
*p = mem::transmute(a_hi);
}
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(
all(
test,
any(target_arch = "x86_64", target_feature = "sse2"),
not(target_os = "windows")
),
assert_instr(movlps)
)]
pub unsafe fn _mm_storel_pi(p: *mut __m64, a: __m128) {
#[cfg(target_arch = "x86")]
{
let a64: u64x2 = mem::transmute(a);
let a_hi = a64.extract(0);
*(p as *mut u64) = a_hi;
}
#[cfg(target_arch = "x86_64")]
{
let a64: f64x2 = mem::transmute(a);
let a_hi = a64.extract(0);
*p = mem::transmute(a_hi);
}
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) {
*p = simd_extract(a, 0);
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movaps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn _mm_store1_ps(p: *mut f32, a: __m128) {
let b: __m128 = simd_shuffle4(a, a, [0, 0, 0, 0]);
*(p as *mut __m128) = b;
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movaps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_store_ps1(p: *mut f32, a: __m128) {
_mm_store1_ps(p, a);
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movaps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn _mm_store_ps(p: *mut f32, a: __m128) {
*(p as *mut __m128) = a;
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movups))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) {
ptr::copy_nonoverlapping(
&a as *const __m128 as *const u8,
p as *mut u8,
mem::size_of::<__m128>(),
);
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movaps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) {
let b: __m128 = simd_shuffle4(a, a, [3, 2, 1, 0]);
*(p as *mut __m128) = b;
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_move_ss(a: __m128, b: __m128) -> __m128 {
simd_shuffle4(a, b, [4, 1, 2, 3])
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(sfence))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_sfence() {
sfence()
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(stmxcsr))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_getcsr() -> u32 {
let mut result = 0_i32;
stmxcsr((&mut result) as *mut _ as *mut i8);
result as u32
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(ldmxcsr))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_setcsr(val: u32) {
ldmxcsr(&val as *const _ as *const i8);
}
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_EXCEPT_INVALID: u32 = 0x0001;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_EXCEPT_DENORM: u32 = 0x0002;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_EXCEPT_DIV_ZERO: u32 = 0x0004;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_EXCEPT_OVERFLOW: u32 = 0x0008;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_EXCEPT_UNDERFLOW: u32 = 0x0010;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_EXCEPT_INEXACT: u32 = 0x0020;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_EXCEPT_MASK: u32 = 0x003f;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_MASK_INVALID: u32 = 0x0080;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_MASK_DENORM: u32 = 0x0100;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_MASK_DIV_ZERO: u32 = 0x0200;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_MASK_OVERFLOW: u32 = 0x0400;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_MASK_UNDERFLOW: u32 = 0x0800;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_MASK_INEXACT: u32 = 0x1000;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_MASK_MASK: u32 = 0x1f80;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_ROUND_NEAREST: u32 = 0x0000;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_ROUND_DOWN: u32 = 0x2000;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_ROUND_UP: u32 = 0x4000;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_ROUND_TOWARD_ZERO: u32 = 0x6000;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_ROUND_MASK: u32 = 0x6000;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FLUSH_ZERO_MASK: u32 = 0x8000;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FLUSH_ZERO_ON: u32 = 0x8000;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FLUSH_ZERO_OFF: u32 = 0x0000;
#[inline]
#[allow(non_snake_case)]
#[target_feature(enable = "sse")]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _MM_GET_EXCEPTION_MASK() -> u32 {
_mm_getcsr() & _MM_MASK_MASK
}
#[inline]
#[allow(non_snake_case)]
#[target_feature(enable = "sse")]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _MM_GET_EXCEPTION_STATE() -> u32 {
_mm_getcsr() & _MM_EXCEPT_MASK
}
#[inline]
#[allow(non_snake_case)]
#[target_feature(enable = "sse")]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _MM_GET_FLUSH_ZERO_MODE() -> u32 {
_mm_getcsr() & _MM_FLUSH_ZERO_MASK
}
#[inline]
#[allow(non_snake_case)]
#[target_feature(enable = "sse")]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _MM_GET_ROUNDING_MODE() -> u32 {
_mm_getcsr() & _MM_ROUND_MASK
}
#[inline]
#[allow(non_snake_case)]
#[target_feature(enable = "sse")]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _MM_SET_EXCEPTION_MASK(x: u32) {
_mm_setcsr((_mm_getcsr() & !_MM_MASK_MASK) | x)
}
#[inline]
#[allow(non_snake_case)]
#[target_feature(enable = "sse")]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _MM_SET_EXCEPTION_STATE(x: u32) {
_mm_setcsr((_mm_getcsr() & !_MM_EXCEPT_MASK) | x)
}
#[inline]
#[allow(non_snake_case)]
#[target_feature(enable = "sse")]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _MM_SET_FLUSH_ZERO_MODE(x: u32) {
let val = (_mm_getcsr() & !_MM_FLUSH_ZERO_MASK) | x;
_mm_setcsr(val)
}
#[inline]
#[allow(non_snake_case)]
#[target_feature(enable = "sse")]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _MM_SET_ROUNDING_MODE(x: u32) {
_mm_setcsr((_mm_getcsr() & !_MM_ROUND_MASK) | x)
}
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_HINT_T0: i32 = 3;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_HINT_T1: i32 = 2;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_HINT_T2: i32 = 1;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_HINT_NTA: i32 = 0;
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(prefetcht0, strategy = _MM_HINT_T0))]
#[cfg_attr(test, assert_instr(prefetcht1, strategy = _MM_HINT_T1))]
#[cfg_attr(test, assert_instr(prefetcht2, strategy = _MM_HINT_T2))]
#[cfg_attr(test, assert_instr(prefetchnta, strategy = _MM_HINT_NTA))]
#[rustc_args_required_const(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_prefetch(p: *const i8, strategy: i32) {
macro_rules! pref {
($imm8:expr) => {
match $imm8 {
0 => prefetch(p, 0, 0, 1),
1 => prefetch(p, 0, 1, 1),
2 => prefetch(p, 0, 2, 1),
_ => prefetch(p, 0, 3, 1),
}
};
}
pref!(strategy)
}
#[inline]
#[target_feature(enable = "sse")]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_undefined_ps() -> __m128 {
mem::MaybeUninit::<__m128>::uninit().assume_init()
}
#[inline]
#[allow(non_snake_case)]
#[target_feature(enable = "sse")]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _MM_TRANSPOSE4_PS(
row0: &mut __m128,
row1: &mut __m128,
row2: &mut __m128,
row3: &mut __m128,
) {
let tmp0 = _mm_unpacklo_ps(*row0, *row1);
let tmp2 = _mm_unpacklo_ps(*row2, *row3);
let tmp1 = _mm_unpackhi_ps(*row0, *row1);
let tmp3 = _mm_unpackhi_ps(*row2, *row3);
*row0 = _mm_movelh_ps(tmp0, tmp2);
*row1 = _mm_movehl_ps(tmp2, tmp0);
*row2 = _mm_movelh_ps(tmp1, tmp3);
*row3 = _mm_movehl_ps(tmp3, tmp1);
}
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.sse.add.ss"]
fn addss(a: __m128, b: __m128) -> __m128;
#[link_name = "llvm.x86.sse.sub.ss"]
fn subss(a: __m128, b: __m128) -> __m128;
#[link_name = "llvm.x86.sse.mul.ss"]
fn mulss(a: __m128, b: __m128) -> __m128;
#[link_name = "llvm.x86.sse.div.ss"]
fn divss(a: __m128, b: __m128) -> __m128;
#[link_name = "llvm.x86.sse.sqrt.ss"]
fn sqrtss(a: __m128) -> __m128;
#[link_name = "llvm.x86.sse.sqrt.ps"]
fn sqrtps(a: __m128) -> __m128;
#[link_name = "llvm.x86.sse.rcp.ss"]
fn rcpss(a: __m128) -> __m128;
#[link_name = "llvm.x86.sse.rcp.ps"]
fn rcpps(a: __m128) -> __m128;
#[link_name = "llvm.x86.sse.rsqrt.ss"]
fn rsqrtss(a: __m128) -> __m128;
#[link_name = "llvm.x86.sse.rsqrt.ps"]
fn rsqrtps(a: __m128) -> __m128;
#[link_name = "llvm.x86.sse.min.ss"]
fn minss(a: __m128, b: __m128) -> __m128;
#[link_name = "llvm.x86.sse.min.ps"]
fn minps(a: __m128, b: __m128) -> __m128;
#[link_name = "llvm.x86.sse.max.ss"]
fn maxss(a: __m128, b: __m128) -> __m128;
#[link_name = "llvm.x86.sse.max.ps"]
fn maxps(a: __m128, b: __m128) -> __m128;
#[link_name = "llvm.x86.sse.movmsk.ps"]
fn movmskps(a: __m128) -> i32;
#[link_name = "llvm.x86.sse.cmp.ps"]
fn cmpps(a: __m128, b: __m128, imm8: i8) -> __m128;
#[link_name = "llvm.x86.sse.comieq.ss"]
fn comieq_ss(a: __m128, b: __m128) -> i32;
#[link_name = "llvm.x86.sse.comilt.ss"]
fn comilt_ss(a: __m128, b: __m128) -> i32;
#[link_name = "llvm.x86.sse.comile.ss"]
fn comile_ss(a: __m128, b: __m128) -> i32;
#[link_name = "llvm.x86.sse.comigt.ss"]
fn comigt_ss(a: __m128, b: __m128) -> i32;
#[link_name = "llvm.x86.sse.comige.ss"]
fn comige_ss(a: __m128, b: __m128) -> i32;
#[link_name = "llvm.x86.sse.comineq.ss"]
fn comineq_ss(a: __m128, b: __m128) -> i32;
#[link_name = "llvm.x86.sse.ucomieq.ss"]
fn ucomieq_ss(a: __m128, b: __m128) -> i32;
#[link_name = "llvm.x86.sse.ucomilt.ss"]
fn ucomilt_ss(a: __m128, b: __m128) -> i32;
#[link_name = "llvm.x86.sse.ucomile.ss"]
fn ucomile_ss(a: __m128, b: __m128) -> i32;
#[link_name = "llvm.x86.sse.ucomigt.ss"]
fn ucomigt_ss(a: __m128, b: __m128) -> i32;
#[link_name = "llvm.x86.sse.ucomige.ss"]
fn ucomige_ss(a: __m128, b: __m128) -> i32;
#[link_name = "llvm.x86.sse.ucomineq.ss"]
fn ucomineq_ss(a: __m128, b: __m128) -> i32;
#[link_name = "llvm.x86.sse.cvtss2si"]
fn cvtss2si(a: __m128) -> i32;
#[link_name = "llvm.x86.sse.cvttss2si"]
fn cvttss2si(a: __m128) -> i32;
#[link_name = "llvm.x86.sse.cvtsi2ss"]
fn cvtsi2ss(a: __m128, b: i32) -> __m128;
#[link_name = "llvm.x86.sse.sfence"]
fn sfence();
#[link_name = "llvm.x86.sse.stmxcsr"]
fn stmxcsr(p: *mut i8);
#[link_name = "llvm.x86.sse.ldmxcsr"]
fn ldmxcsr(p: *const i8);
#[link_name = "llvm.prefetch"]
fn prefetch(p: *const i8, rw: i32, loc: i32, ty: i32);
#[link_name = "llvm.x86.sse.cmp.ss"]
fn cmpss(a: __m128, b: __m128, imm8: i8) -> __m128;
#[link_name = "llvm.x86.mmx.movnt.dq"]
fn movntdq(a: *mut __m64, b: __m64);
#[link_name = "llvm.x86.sse.cvtpi2ps"]
fn cvtpi2ps(a: __m128, b: __m64) -> __m128;
#[link_name = "llvm.x86.mmx.maskmovq"]
fn maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8);
#[link_name = "llvm.x86.mmx.pextr.w"]
fn pextrw(a: __m64, imm8: i32) -> i32;
#[link_name = "llvm.x86.mmx.pinsr.w"]
fn pinsrw(a: __m64, d: i32, imm8: i32) -> __m64;
#[link_name = "llvm.x86.mmx.pmovmskb"]
fn pmovmskb(a: __m64) -> i32;
#[link_name = "llvm.x86.sse.pshuf.w"]
fn pshufw(a: __m64, imm8: i8) -> __m64;
#[link_name = "llvm.x86.mmx.pmaxs.w"]
fn pmaxsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pmaxu.b"]
fn pmaxub(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pmins.w"]
fn pminsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pminu.b"]
fn pminub(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pmulhu.w"]
fn pmulhuw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pmull.w"]
fn pmullw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pavg.b"]
fn pavgb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pavg.w"]
fn pavgw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.psad.bw"]
fn psadbw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.sse.cvtps2pi"]
fn cvtps2pi(a: __m128) -> __m64;
#[link_name = "llvm.x86.sse.cvttps2pi"]
fn cvttps2pi(a: __m128) -> __m64;
}
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movntps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) {
intrinsics::nontemporal_store(mem_addr as *mut __m128, a);
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(movntq))]
pub unsafe fn _mm_stream_pi(mem_addr: *mut __m64, a: __m64) {
movntdq(mem_addr, a)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmaxsw))]
pub unsafe fn _mm_max_pi16(a: __m64, b: __m64) -> __m64 {
pmaxsw(a, b)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmaxsw))]
pub unsafe fn _m_pmaxsw(a: __m64, b: __m64) -> __m64 {
_mm_max_pi16(a, b)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmaxub))]
pub unsafe fn _mm_max_pu8(a: __m64, b: __m64) -> __m64 {
pmaxub(a, b)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmaxub))]
pub unsafe fn _m_pmaxub(a: __m64, b: __m64) -> __m64 {
_mm_max_pu8(a, b)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pminsw))]
pub unsafe fn _mm_min_pi16(a: __m64, b: __m64) -> __m64 {
pminsw(a, b)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pminsw))]
pub unsafe fn _m_pminsw(a: __m64, b: __m64) -> __m64 {
_mm_min_pi16(a, b)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pminub))]
pub unsafe fn _mm_min_pu8(a: __m64, b: __m64) -> __m64 {
pminub(a, b)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pminub))]
pub unsafe fn _m_pminub(a: __m64, b: __m64) -> __m64 {
_mm_min_pu8(a, b)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmulhuw))]
pub unsafe fn _mm_mulhi_pu16(a: __m64, b: __m64) -> __m64 {
pmulhuw(a, b)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmullw))]
pub unsafe fn _mm_mullo_pi16(a: __m64, b: __m64) -> __m64 {
pmullw(a, b)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmulhuw))]
pub unsafe fn _m_pmulhuw(a: __m64, b: __m64) -> __m64 {
_mm_mulhi_pu16(a, b)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pavgb))]
pub unsafe fn _mm_avg_pu8(a: __m64, b: __m64) -> __m64 {
pavgb(a, b)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pavgb))]
pub unsafe fn _m_pavgb(a: __m64, b: __m64) -> __m64 {
_mm_avg_pu8(a, b)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pavgw))]
pub unsafe fn _mm_avg_pu16(a: __m64, b: __m64) -> __m64 {
pavgw(a, b)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pavgw))]
pub unsafe fn _m_pavgw(a: __m64, b: __m64) -> __m64 {
_mm_avg_pu16(a, b)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(psadbw))]
pub unsafe fn _mm_sad_pu8(a: __m64, b: __m64) -> __m64 {
psadbw(a, b)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(psadbw))]
pub unsafe fn _m_psadbw(a: __m64, b: __m64) -> __m64 {
_mm_sad_pu8(a, b)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: __m64) -> __m128 {
cvtpi2ps(a, b)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvt_pi2ps(a: __m128, b: __m64) -> __m128 {
_mm_cvtpi32_ps(a, b)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> __m128 {
let b = _mm_setzero_si64();
let b = _mm_cmpgt_pi8(b, a);
let b = _mm_unpacklo_pi8(a, b);
_mm_cvtpi16_ps(b)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> __m128 {
let b = _mm_setzero_si64();
let b = _mm_unpacklo_pi8(a, b);
_mm_cvtpi16_ps(b)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> __m128 {
let b = _mm_setzero_si64();
let b = _mm_cmpgt_pi16(b, a);
let c = _mm_unpackhi_pi16(a, b);
let r = _mm_setzero_ps();
let r = cvtpi2ps(r, c);
let r = _mm_movelh_ps(r, r);
let c = _mm_unpacklo_pi16(a, b);
cvtpi2ps(r, c)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> __m128 {
let b = _mm_setzero_si64();
let c = _mm_unpackhi_pi16(a, b);
let r = _mm_setzero_ps();
let r = cvtpi2ps(r, c);
let r = _mm_movelh_ps(r, r);
let c = _mm_unpacklo_pi16(a, b);
cvtpi2ps(r, c)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpi32x2_ps(a: __m64, b: __m64) -> __m128 {
let c = _mm_setzero_ps();
let c = _mm_cvtpi32_ps(c, b);
let c = _mm_movelh_ps(c, c);
_mm_cvtpi32_ps(c, a)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(maskmovq))]
pub unsafe fn _mm_maskmove_si64(a: __m64, mask: __m64, mem_addr: *mut i8) {
maskmovq(a, mask, mem_addr)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(maskmovq))]
pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) {
_mm_maskmove_si64(a, mask, mem_addr)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i32 {
macro_rules! call {
($imm2:expr) => {
pextrw(a, $imm2) as i32
};
}
constify_imm2!(imm2, call)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
#[rustc_args_required_const(1)]
pub unsafe fn _m_pextrw(a: __m64, imm2: i32) -> i32 {
macro_rules! call {
($imm2:expr) => {
pextrw(a, $imm2) as i32
};
}
constify_imm2!(imm2, call)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_insert_pi16(a: __m64, d: i32, imm2: i32) -> __m64 {
macro_rules! call {
($imm2:expr) => {
pinsrw(a, d, $imm2)
};
}
constify_imm2!(imm2, call)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
#[rustc_args_required_const(2)]
pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 {
macro_rules! call {
($imm2:expr) => {
pinsrw(a, d, $imm2)
};
}
constify_imm2!(imm2, call)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmovmskb))]
pub unsafe fn _mm_movemask_pi8(a: __m64) -> i32 {
pmovmskb(a)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmovmskb))]
pub unsafe fn _m_pmovmskb(a: __m64) -> i32 {
_mm_movemask_pi8(a)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_shuffle_pi16(a: __m64, imm8: i32) -> __m64 {
macro_rules! call {
($imm8:expr) => {
pshufw(a, $imm8)
};
}
constify_imm8!(imm8, call)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
#[rustc_args_required_const(1)]
pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 {
macro_rules! call {
($imm8:expr) => {
pshufw(a, $imm8)
};
}
constify_imm8!(imm8, call)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvttps2pi))]
pub unsafe fn _mm_cvttps_pi32(a: __m128) -> __m64 {
cvttps2pi(a)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvttps2pi))]
pub unsafe fn _mm_cvtt_ps2pi(a: __m128) -> __m64 {
_mm_cvttps_pi32(a)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvtps_pi32(a: __m128) -> __m64 {
cvtps2pi(a)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvt_ps2pi(a: __m128) -> __m64 {
_mm_cvtps_pi32(a)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvtps_pi16(a: __m128) -> __m64 {
let b = _mm_cvtps_pi32(a);
let a = _mm_movehl_ps(a, a);
let c = _mm_cvtps_pi32(a);
_mm_packs_pi32(b, c)
}
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvtps_pi8(a: __m128) -> __m64 {
let b = _mm_cvtps_pi16(a);
let c = _mm_setzero_si64();
_mm_packs_pi16(b, c)
}
#[cfg(test)]
mod tests {
use crate::{hint::black_box, mem::transmute};
use std::{boxed, f32::NAN};
use stdarch_test::simd_test;
use crate::core_arch::{simd::*, x86::*};
#[simd_test(enable = "sse")]
unsafe fn test_mm_add_ps() {
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
let r = _mm_add_ps(a, b);
assert_eq_m128(r, _mm_setr_ps(-101.0, 25.0, 0.0, -15.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_add_ss() {
let a = _mm_set_ps(-1.0, 5.0, 0.0, -10.0);
let b = _mm_set_ps(-100.0, 20.0, 0.0, -5.0);
let r = _mm_add_ss(a, b);
assert_eq_m128(r, _mm_set_ps(-1.0, 5.0, 0.0, -15.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_sub_ps() {
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
let r = _mm_sub_ps(a, b);
assert_eq_m128(r, _mm_setr_ps(99.0, -15.0, 0.0, -5.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_sub_ss() {
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
let r = _mm_sub_ss(a, b);
assert_eq_m128(r, _mm_setr_ps(99.0, 5.0, 0.0, -10.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_mul_ps() {
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
let r = _mm_mul_ps(a, b);
assert_eq_m128(r, _mm_setr_ps(100.0, 100.0, 0.0, 50.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_mul_ss() {
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
let r = _mm_mul_ss(a, b);
assert_eq_m128(r, _mm_setr_ps(100.0, 5.0, 0.0, -10.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_div_ps() {
let a = _mm_setr_ps(-1.0, 5.0, 2.0, -10.0);
let b = _mm_setr_ps(-100.0, 20.0, 0.2, -5.0);
let r = _mm_div_ps(a, b);
assert_eq_m128(r, _mm_setr_ps(0.01, 0.25, 10.0, 2.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_div_ss() {
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
let r = _mm_div_ss(a, b);
assert_eq_m128(r, _mm_setr_ps(0.01, 5.0, 0.0, -10.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_sqrt_ss() {
let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
let r = _mm_sqrt_ss(a);
let e = _mm_setr_ps(2.0, 13.0, 16.0, 100.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_sqrt_ps() {
let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
let r = _mm_sqrt_ps(a);
let e = _mm_setr_ps(2.0, 3.6055512, 4.0, 10.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_rcp_ss() {
let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
let r = _mm_rcp_ss(a);
let e = _mm_setr_ps(0.24993896, 13.0, 16.0, 100.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_rcp_ps() {
let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
let r = _mm_rcp_ps(a);
let e = _mm_setr_ps(0.24993896, 0.0769043, 0.06248474, 0.0099983215);
let rel_err = 0.00048828125;
for i in 0..4 {
assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err);
}
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_rsqrt_ss() {
let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
let r = _mm_rsqrt_ss(a);
let e = _mm_setr_ps(0.49987793, 13.0, 16.0, 100.0);
let rel_err = 0.00048828125;
for i in 0..4 {
assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err);
}
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_rsqrt_ps() {
let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
let r = _mm_rsqrt_ps(a);
let e = _mm_setr_ps(0.49987793, 0.2772827, 0.24993896, 0.099990845);
let rel_err = 0.00048828125;
for i in 0..4 {
assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err);
}
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_min_ss() {
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
let r = _mm_min_ss(a, b);
assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_min_ps() {
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
let r = _mm_min_ps(a, b);
assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_max_ss() {
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
let r = _mm_max_ss(a, b);
assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, -10.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_max_ps() {
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
let r = _mm_max_ps(a, b);
assert_eq_m128(r, _mm_setr_ps(-1.0, 20.0, 0.0, -5.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_and_ps() {
let a = transmute(u32x4::splat(0b0011));
let b = transmute(u32x4::splat(0b0101));
let r = _mm_and_ps(*black_box(&a), *black_box(&b));
let e = transmute(u32x4::splat(0b0001));
assert_eq_m128(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_andnot_ps() {
let a = transmute(u32x4::splat(0b0011));
let b = transmute(u32x4::splat(0b0101));
let r = _mm_andnot_ps(*black_box(&a), *black_box(&b));
let e = transmute(u32x4::splat(0b0100));
assert_eq_m128(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_or_ps() {
let a = transmute(u32x4::splat(0b0011));
let b = transmute(u32x4::splat(0b0101));
let r = _mm_or_ps(*black_box(&a), *black_box(&b));
let e = transmute(u32x4::splat(0b0111));
assert_eq_m128(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_xor_ps() {
let a = transmute(u32x4::splat(0b0011));
let b = transmute(u32x4::splat(0b0101));
let r = _mm_xor_ps(*black_box(&a), *black_box(&b));
let e = transmute(u32x4::splat(0b0110));
assert_eq_m128(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpeq_ss() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(-1.0, 5.0, 6.0, 7.0);
let r: u32x4 = transmute(_mm_cmpeq_ss(a, b));
let e: u32x4 = transmute(_mm_setr_ps(transmute(0u32), 2.0, 3.0, 4.0));
assert_eq!(r, e);
let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
let r2: u32x4 = transmute(_mm_cmpeq_ss(a, b2));
let e2: u32x4 = transmute(_mm_setr_ps(transmute(0xffffffffu32), 2.0, 3.0, 4.0));
assert_eq!(r2, e2);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmplt_ss() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
let b1 = 0u32;
let c1 = 0u32;
let d1 = !0u32;
let rb: u32x4 = transmute(_mm_cmplt_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmplt_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmplt_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
assert_eq!(rd, ed);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmple_ss() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
let b1 = 0u32;
let c1 = !0u32;
let d1 = !0u32;
let rb: u32x4 = transmute(_mm_cmple_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmple_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmple_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
assert_eq!(rd, ed);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpgt_ss() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
let b1 = !0u32;
let c1 = 0u32;
let d1 = 0u32;
let rb: u32x4 = transmute(_mm_cmpgt_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpgt_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpgt_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
assert_eq!(rd, ed);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpge_ss() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
let b1 = !0u32;
let c1 = !0u32;
let d1 = 0u32;
let rb: u32x4 = transmute(_mm_cmpge_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpge_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpge_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
assert_eq!(rd, ed);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpneq_ss() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
let b1 = !0u32;
let c1 = 0u32;
let d1 = !0u32;
let rb: u32x4 = transmute(_mm_cmpneq_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpneq_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpneq_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
assert_eq!(rd, ed);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpnlt_ss() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
let b1 = !0u32;
let c1 = !0u32;
let d1 = 0u32;
let rb: u32x4 = transmute(_mm_cmpnlt_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpnlt_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpnlt_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
assert_eq!(rd, ed);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpnle_ss() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
let b1 = !0u32;
let c1 = 0u32;
let d1 = 0u32;
let rb: u32x4 = transmute(_mm_cmpnle_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpnle_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpnle_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
assert_eq!(rd, ed);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpngt_ss() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
let b1 = 0u32;
let c1 = !0u32;
let d1 = !0u32;
let rb: u32x4 = transmute(_mm_cmpngt_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpngt_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpngt_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
assert_eq!(rd, ed);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpnge_ss() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
let b1 = 0u32;
let c1 = 0u32;
let d1 = !0u32;
let rb: u32x4 = transmute(_mm_cmpnge_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpnge_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpnge_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
assert_eq!(rd, ed);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpord_ss() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
let b1 = !0u32;
let c1 = 0u32;
let d1 = !0u32;
let rb: u32x4 = transmute(_mm_cmpord_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpord_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpord_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
assert_eq!(rd, ed);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpunord_ss() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
let b1 = 0u32;
let c1 = !0u32;
let d1 = 0u32;
let rb: u32x4 = transmute(_mm_cmpunord_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpunord_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpunord_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
assert_eq!(rd, ed);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpeq_ps() {
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(fls, fls, tru, fls);
let r: u32x4 = transmute(_mm_cmpeq_ps(a, b));
assert_eq!(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmplt_ps() {
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(tru, fls, fls, fls);
let r: u32x4 = transmute(_mm_cmplt_ps(a, b));
assert_eq!(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmple_ps() {
let a = _mm_setr_ps(10.0, 50.0, 1.0, 4.0);
let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(tru, fls, tru, fls);
let r: u32x4 = transmute(_mm_cmple_ps(a, b));
assert_eq!(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpgt_ps() {
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(fls, tru, fls, fls);
let r: u32x4 = transmute(_mm_cmpgt_ps(a, b));
assert_eq!(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpge_ps() {
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(fls, tru, tru, fls);
let r: u32x4 = transmute(_mm_cmpge_ps(a, b));
assert_eq!(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpneq_ps() {
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(tru, tru, fls, tru);
let r: u32x4 = transmute(_mm_cmpneq_ps(a, b));
assert_eq!(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpnlt_ps() {
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(fls, tru, tru, tru);
let r: u32x4 = transmute(_mm_cmpnlt_ps(a, b));
assert_eq!(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpnle_ps() {
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(fls, tru, fls, tru);
let r: u32x4 = transmute(_mm_cmpnle_ps(a, b));
assert_eq!(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpngt_ps() {
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(tru, fls, tru, tru);
let r: u32x4 = transmute(_mm_cmpngt_ps(a, b));
assert_eq!(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpnge_ps() {
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(tru, fls, fls, tru);
let r: u32x4 = transmute(_mm_cmpnge_ps(a, b));
assert_eq!(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpord_ps() {
let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(tru, fls, fls, fls);
let r: u32x4 = transmute(_mm_cmpord_ps(a, b));
assert_eq!(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpunord_ps() {
let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(fls, tru, tru, tru);
let r: u32x4 = transmute(_mm_cmpunord_ps(a, b));
assert_eq!(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_comieq_ss() {
let aa = &[3.0f32, 12.0, 23.0, NAN];
let bb = &[3.0f32, 47.5, 1.5, NAN];
let ee = &[1i32, 0, 0, 0];
for i in 0..4 {
let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
let r = _mm_comieq_ss(a, b);
assert_eq!(
ee[i], r,
"_mm_comieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
a, b, r, ee[i], i
);
}
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_comilt_ss() {
let aa = &[3.0f32, 12.0, 23.0, NAN];
let bb = &[3.0f32, 47.5, 1.5, NAN];
let ee = &[0i32, 1, 0, 0];
for i in 0..4 {
let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
let r = _mm_comilt_ss(a, b);
assert_eq!(
ee[i], r,
"_mm_comilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
a, b, r, ee[i], i
);
}
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_comile_ss() {
let aa = &[3.0f32, 12.0, 23.0, NAN];
let bb = &[3.0f32, 47.5, 1.5, NAN];
let ee = &[1i32, 1, 0, 0];
for i in 0..4 {
let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
let r = _mm_comile_ss(a, b);
assert_eq!(
ee[i], r,
"_mm_comile_ss({:?}, {:?}) = {}, expected: {} (i={})",
a, b, r, ee[i], i
);
}
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_comigt_ss() {
let aa = &[3.0f32, 12.0, 23.0, NAN];
let bb = &[3.0f32, 47.5, 1.5, NAN];
let ee = &[1i32, 0, 1, 0];
for i in 0..4 {
let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
let r = _mm_comige_ss(a, b);
assert_eq!(
ee[i], r,
"_mm_comige_ss({:?}, {:?}) = {}, expected: {} (i={})",
a, b, r, ee[i], i
);
}
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_comineq_ss() {
let aa = &[3.0f32, 12.0, 23.0, NAN];
let bb = &[3.0f32, 47.5, 1.5, NAN];
let ee = &[0i32, 1, 1, 1];
for i in 0..4 {
let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
let r = _mm_comineq_ss(a, b);
assert_eq!(
ee[i], r,
"_mm_comineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
a, b, r, ee[i], i
);
}
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_ucomieq_ss() {
let aa = &[3.0f32, 12.0, 23.0, NAN];
let bb = &[3.0f32, 47.5, 1.5, NAN];
let ee = &[1i32, 0, 0, 0];
for i in 0..4 {
let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
let r = _mm_ucomieq_ss(a, b);
assert_eq!(
ee[i], r,
"_mm_ucomieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
a, b, r, ee[i], i
);
}
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_ucomilt_ss() {
let aa = &[3.0f32, 12.0, 23.0, NAN];
let bb = &[3.0f32, 47.5, 1.5, NAN];
let ee = &[0i32, 1, 0, 0];
for i in 0..4 {
let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
let r = _mm_ucomilt_ss(a, b);
assert_eq!(
ee[i], r,
"_mm_ucomilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
a, b, r, ee[i], i
);
}
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_ucomile_ss() {
let aa = &[3.0f32, 12.0, 23.0, NAN];
let bb = &[3.0f32, 47.5, 1.5, NAN];
let ee = &[1i32, 1, 0, 0];
for i in 0..4 {
let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
let r = _mm_ucomile_ss(a, b);
assert_eq!(
ee[i], r,
"_mm_ucomile_ss({:?}, {:?}) = {}, expected: {} (i={})",
a, b, r, ee[i], i
);
}
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_ucomigt_ss() {
let aa = &[3.0f32, 12.0, 23.0, NAN];
let bb = &[3.0f32, 47.5, 1.5, NAN];
let ee = &[0i32, 0, 1, 0];
for i in 0..4 {
let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
let r = _mm_ucomigt_ss(a, b);
assert_eq!(
ee[i], r,
"_mm_ucomigt_ss({:?}, {:?}) = {}, expected: {} (i={})",
a, b, r, ee[i], i
);
}
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_ucomige_ss() {
let aa = &[3.0f32, 12.0, 23.0, NAN];
let bb = &[3.0f32, 47.5, 1.5, NAN];
let ee = &[1i32, 0, 1, 0];
for i in 0..4 {
let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
let r = _mm_ucomige_ss(a, b);
assert_eq!(
ee[i], r,
"_mm_ucomige_ss({:?}, {:?}) = {}, expected: {} (i={})",
a, b, r, ee[i], i
);
}
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_ucomineq_ss() {
let aa = &[3.0f32, 12.0, 23.0, NAN];
let bb = &[3.0f32, 47.5, 1.5, NAN];
let ee = &[0i32, 1, 1, 1];
for i in 0..4 {
let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
let r = _mm_ucomineq_ss(a, b);
assert_eq!(
ee[i], r,
"_mm_ucomineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
a, b, r, ee[i], i
);
}
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_comieq_ss_vs_ucomieq_ss() {
let aa = &[3.0f32, NAN, 23.0, NAN];
let bb = &[3.0f32, 47.5, NAN, NAN];
let ee = &[1i32, 0, 0, 0];
let exc = &[0u32, 1, 1, 1];
for i in 0..4 {
let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
_MM_SET_EXCEPTION_STATE(0);
let r1 = _mm_comieq_ss(*black_box(&a), b);
let s1 = _MM_GET_EXCEPTION_STATE();
_MM_SET_EXCEPTION_STATE(0);
let r2 = _mm_ucomieq_ss(*black_box(&a), b);
let s2 = _MM_GET_EXCEPTION_STATE();
assert_eq!(
ee[i], r1,
"_mm_comeq_ss({:?}, {:?}) = {}, expected: {} (i={})",
a, b, r1, ee[i], i
);
assert_eq!(
ee[i], r2,
"_mm_ucomeq_ss({:?}, {:?}) = {}, expected: {} (i={})",
a, b, r2, ee[i], i
);
assert_eq!(
s1,
exc[i] * _MM_EXCEPT_INVALID,
"_mm_comieq_ss() set exception flags: {} (i={})",
s1,
i
);
assert_eq!(
s2,
0,
"_mm_ucomieq_ss() set exception flags: {} (i={})",
s2,
i
);
}
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cvtss_si32() {
let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1];
let result = &[42i32, -3, i32::min_value(), 0, i32::min_value(), 2147483520];
for i in 0..inputs.len() {
let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0);
let e = result[i];
let r = _mm_cvtss_si32(x);
assert_eq!(
e, r,
"TestCase #{} _mm_cvtss_si32({:?}) = {}, expected: {}",
i, x, r, e
);
}
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cvttss_si32() {
let inputs = &[
(42.0f32, 42i32),
(-31.4, -31),
(-33.5, -33),
(-34.5, -34),
(10.999, 10),
(-5.99, -5),
(4.0e10, i32::min_value()),
(4.0e-10, 0),
(NAN, i32::min_value()),
(2147483500.1, 2147483520),
];
for i in 0..inputs.len() {
let (xi, e) = inputs[i];
let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
let r = _mm_cvttss_si32(x);
assert_eq!(
e, r,
"TestCase #{} _mm_cvttss_si32({:?}) = {}, expected: {}",
i, x, r, e
);
}
}
#[simd_test(enable = "sse")]
pub unsafe fn test_mm_cvtsi32_ss() {
let inputs = &[
(4555i32, 4555.0f32),
(322223333, 322223330.0),
(-432, -432.0),
(-322223333, -322223330.0),
];
for i in 0..inputs.len() {
let (x, f) = inputs[i];
let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
let r = _mm_cvtsi32_ss(a, x);
let e = _mm_setr_ps(f, 6.0, 7.0, 8.0);
assert_eq_m128(e, r);
}
}
#[simd_test(enable = "sse")]
pub unsafe fn test_mm_cvtss_f32() {
let a = _mm_setr_ps(312.0134, 5.0, 6.0, 7.0);
assert_eq!(_mm_cvtss_f32(a), 312.0134);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_set_ss() {
let r = _mm_set_ss(black_box(4.25));
assert_eq_m128(r, _mm_setr_ps(4.25, 0.0, 0.0, 0.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_set1_ps() {
let r1 = _mm_set1_ps(black_box(4.25));
let r2 = _mm_set_ps1(black_box(4.25));
assert_eq!(get_m128(r1, 0), 4.25);
assert_eq!(get_m128(r1, 1), 4.25);
assert_eq!(get_m128(r1, 2), 4.25);
assert_eq!(get_m128(r1, 3), 4.25);
assert_eq!(get_m128(r2, 0), 4.25);
assert_eq!(get_m128(r2, 1), 4.25);
assert_eq!(get_m128(r2, 2), 4.25);
assert_eq!(get_m128(r2, 3), 4.25);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_set_ps() {
let r = _mm_set_ps(
black_box(1.0),
black_box(2.0),
black_box(3.0),
black_box(4.0),
);
assert_eq!(get_m128(r, 0), 4.0);
assert_eq!(get_m128(r, 1), 3.0);
assert_eq!(get_m128(r, 2), 2.0);
assert_eq!(get_m128(r, 3), 1.0);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_setr_ps() {
let r = _mm_setr_ps(
black_box(1.0),
black_box(2.0),
black_box(3.0),
black_box(4.0),
);
assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_setzero_ps() {
let r = *black_box(&_mm_setzero_ps());
assert_eq_m128(r, _mm_set1_ps(0.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_shuffle() {
assert_eq!(_MM_SHUFFLE(0, 1, 1, 3), 0b00_01_01_11);
assert_eq!(_MM_SHUFFLE(3, 1, 1, 0), 0b11_01_01_00);
assert_eq!(_MM_SHUFFLE(1, 2, 2, 1), 0b01_10_10_01);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_shuffle_ps() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
let r = _mm_shuffle_ps(a, b, 0b00_01_01_11);
assert_eq_m128(r, _mm_setr_ps(4.0, 2.0, 6.0, 5.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_unpackhi_ps() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
let r = _mm_unpackhi_ps(a, b);
assert_eq_m128(r, _mm_setr_ps(3.0, 7.0, 4.0, 8.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_unpacklo_ps() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
let r = _mm_unpacklo_ps(a, b);
assert_eq_m128(r, _mm_setr_ps(1.0, 5.0, 2.0, 6.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_movehl_ps() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
let r = _mm_movehl_ps(a, b);
assert_eq_m128(r, _mm_setr_ps(7.0, 8.0, 3.0, 4.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_movelh_ps() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
let r = _mm_movelh_ps(a, b);
assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 5.0, 6.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_loadh_pi() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let x: [f32; 4] = [5.0, 6.0, 7.0, 8.0];
let p = x[..].as_ptr();
let r = _mm_loadh_pi(a, p as *const _);
assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 5.0, 6.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_loadl_pi() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let x: [f32; 4] = [5.0, 6.0, 7.0, 8.0];
let p = x[..].as_ptr();
let r = _mm_loadl_pi(a, p as *const _);
assert_eq_m128(r, _mm_setr_ps(5.0, 6.0, 3.0, 4.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_load_ss() {
let a = 42.0f32;
let r = _mm_load_ss(&a as *const f32);
assert_eq_m128(r, _mm_setr_ps(42.0, 0.0, 0.0, 0.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_load1_ps() {
let a = 42.0f32;
let r = _mm_load1_ps(&a as *const f32);
assert_eq_m128(r, _mm_setr_ps(42.0, 42.0, 42.0, 42.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_load_ps() {
let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let mut p = vals.as_ptr();
let mut fixup = 0.0f32;
let unalignment = (p as usize) & 0xf;
if unalignment != 0 {
let delta = ((16 - unalignment) >> 2) as isize;
fixup = delta as f32;
p = p.offset(delta);
}
let r = _mm_load_ps(p);
let e = _mm_add_ps(_mm_setr_ps(1.0, 2.0, 3.0, 4.0), _mm_set1_ps(fixup));
assert_eq_m128(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_loadu_ps() {
let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let p = vals.as_ptr().offset(3);
let r = _mm_loadu_ps(black_box(p));
assert_eq_m128(r, _mm_setr_ps(4.0, 5.0, 6.0, 7.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_loadr_ps() {
let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let mut p = vals.as_ptr();
let mut fixup = 0.0f32;
let unalignment = (p as usize) & 0xf;
if unalignment != 0 {
let delta = ((16 - unalignment) >> 2) as isize;
fixup = delta as f32;
p = p.offset(delta);
}
let r = _mm_loadr_ps(p);
let e = _mm_add_ps(_mm_setr_ps(4.0, 3.0, 2.0, 1.0), _mm_set1_ps(fixup));
assert_eq_m128(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_storeh_pi() {
let mut vals = [0.0f32; 8];
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
_mm_storeh_pi(vals.as_mut_ptr() as *mut _, a);
assert_eq!(vals[0], 3.0);
assert_eq!(vals[1], 4.0);
assert_eq!(vals[2], 0.0);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_storel_pi() {
let mut vals = [0.0f32; 8];
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
_mm_storel_pi(vals.as_mut_ptr() as *mut _, a);
assert_eq!(vals[0], 1.0);
assert_eq!(vals[1], 2.0);
assert_eq!(vals[2], 0.0);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_store_ss() {
let mut vals = [0.0f32; 8];
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
_mm_store_ss(vals.as_mut_ptr().offset(1), a);
assert_eq!(vals[0], 0.0);
assert_eq!(vals[1], 1.0);
assert_eq!(vals[2], 0.0);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_store1_ps() {
let mut vals = [0.0f32; 8];
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let mut ofs = 0;
let mut p = vals.as_mut_ptr();
if (p as usize) & 0xf != 0 {
ofs = (16 - (p as usize) & 0xf) >> 2;
p = p.offset(ofs as isize);
}
_mm_store1_ps(p, *black_box(&a));
if ofs > 0 {
assert_eq!(vals[ofs - 1], 0.0);
}
assert_eq!(vals[ofs + 0], 1.0);
assert_eq!(vals[ofs + 1], 1.0);
assert_eq!(vals[ofs + 2], 1.0);
assert_eq!(vals[ofs + 3], 1.0);
assert_eq!(vals[ofs + 4], 0.0);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_store_ps() {
let mut vals = [0.0f32; 8];
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let mut ofs = 0;
let mut p = vals.as_mut_ptr();
if (p as usize) & 0xf != 0 {
ofs = (16 - (p as usize) & 0xf) >> 2;
p = p.offset(ofs as isize);
}
_mm_store_ps(p, *black_box(&a));
if ofs > 0 {
assert_eq!(vals[ofs - 1], 0.0);
}
assert_eq!(vals[ofs + 0], 1.0);
assert_eq!(vals[ofs + 1], 2.0);
assert_eq!(vals[ofs + 2], 3.0);
assert_eq!(vals[ofs + 3], 4.0);
assert_eq!(vals[ofs + 4], 0.0);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_storer_ps() {
let mut vals = [0.0f32; 8];
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let mut ofs = 0;
let mut p = vals.as_mut_ptr();
if (p as usize) & 0xf != 0 {
ofs = (16 - (p as usize) & 0xf) >> 2;
p = p.offset(ofs as isize);
}
_mm_storer_ps(p, *black_box(&a));
if ofs > 0 {
assert_eq!(vals[ofs - 1], 0.0);
}
assert_eq!(vals[ofs + 0], 4.0);
assert_eq!(vals[ofs + 1], 3.0);
assert_eq!(vals[ofs + 2], 2.0);
assert_eq!(vals[ofs + 3], 1.0);
assert_eq!(vals[ofs + 4], 0.0);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_storeu_ps() {
let mut vals = [0.0f32; 8];
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let mut ofs = 0;
let mut p = vals.as_mut_ptr();
if (p as usize) & 0xf == 0 {
ofs = 1;
p = p.offset(1);
}
_mm_storeu_ps(p, *black_box(&a));
if ofs > 0 {
assert_eq!(vals[ofs - 1], 0.0);
}
assert_eq!(vals[ofs + 0], 1.0);
assert_eq!(vals[ofs + 1], 2.0);
assert_eq!(vals[ofs + 2], 3.0);
assert_eq!(vals[ofs + 3], 4.0);
assert_eq!(vals[ofs + 4], 0.0);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_move_ss() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
let r = _mm_move_ss(a, b);
let e = _mm_setr_ps(5.0, 2.0, 3.0, 4.0);
assert_eq_m128(e, r);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_movemask_ps() {
let r = _mm_movemask_ps(_mm_setr_ps(-1.0, 5.0, -5.0, 0.0));
assert_eq!(r, 0b0101);
let r = _mm_movemask_ps(_mm_setr_ps(-1.0, -5.0, -5.0, 0.0));
assert_eq!(r, 0b0111);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_sfence() {
_mm_sfence();
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_getcsr_setcsr_1() {
let saved_csr = _mm_getcsr();
let a = _mm_setr_ps(1.1e-36, 0.0, 0.0, 1.0);
let b = _mm_setr_ps(0.001, 0.0, 0.0, 1.0);
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
let r = _mm_mul_ps(*black_box(&a), *black_box(&b));
_mm_setcsr(saved_csr);
let exp = _mm_setr_ps(0.0, 0.0, 0.0, 1.0);
assert_eq_m128(r, exp);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_getcsr_setcsr_2() {
let saved_csr = _mm_getcsr();
let a = _mm_setr_ps(1.1e-36, 0.0, 0.0, 1.0);
let b = _mm_setr_ps(0.001, 0.0, 0.0, 1.0);
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_OFF);
let r = _mm_mul_ps(*black_box(&a), *black_box(&b));
_mm_setcsr(saved_csr);
let exp = _mm_setr_ps(1.1e-39, 0.0, 0.0, 1.0);
assert_eq_m128(r, exp);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_getcsr_setcsr_underflow() {
_MM_SET_EXCEPTION_STATE(0);
let a = _mm_setr_ps(1.1e-36, 0.0, 0.0, 1.0);
let b = _mm_setr_ps(1e-5, 0.0, 0.0, 1.0);
assert_eq!(_MM_GET_EXCEPTION_STATE(), 0);
let r = _mm_mul_ps(*black_box(&a), *black_box(&b));
let exp = _mm_setr_ps(1.1e-41, 0.0, 0.0, 1.0);
assert_eq_m128(r, exp);
let underflow = _MM_GET_EXCEPTION_STATE() & _MM_EXCEPT_UNDERFLOW != 0;
assert_eq!(underflow, true);
}
#[simd_test(enable = "sse")]
unsafe fn test_MM_TRANSPOSE4_PS() {
let mut a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let mut b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
let mut c = _mm_setr_ps(9.0, 10.0, 11.0, 12.0);
let mut d = _mm_setr_ps(13.0, 14.0, 15.0, 16.0);
_MM_TRANSPOSE4_PS(&mut a, &mut b, &mut c, &mut d);
assert_eq_m128(a, _mm_setr_ps(1.0, 5.0, 9.0, 13.0));
assert_eq_m128(b, _mm_setr_ps(2.0, 6.0, 10.0, 14.0));
assert_eq_m128(c, _mm_setr_ps(3.0, 7.0, 11.0, 15.0));
assert_eq_m128(d, _mm_setr_ps(4.0, 8.0, 12.0, 16.0));
}
#[repr(align(16))]
struct Memory {
pub data: [f32; 4],
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_stream_ps() {
let a = _mm_set1_ps(7.0);
let mut mem = Memory { data: [-1.0; 4] };
_mm_stream_ps(&mut mem.data[0] as *mut f32, a);
for i in 0..4 {
assert_eq!(mem.data[i], get_m128(a, i));
}
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_stream_pi() {
let a = transmute(i8x8::new(0, 0, 0, 0, 0, 0, 0, 7));
let mut mem = boxed::Box::<__m64>::new(transmute(i8x8::splat(1)));
_mm_stream_pi(&mut *mem as *mut _ as *mut _, a);
assert_eq_m64(a, *mem);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_max_pi16() {
let a = _mm_setr_pi16(-1, 6, -3, 8);
let b = _mm_setr_pi16(5, -2, 7, -4);
let r = _mm_setr_pi16(5, 6, 7, 8);
assert_eq_m64(r, _mm_max_pi16(a, b));
assert_eq_m64(r, _m_pmaxsw(a, b));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_max_pu8() {
let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8);
let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4);
let r = _mm_setr_pi8(5, 6, 7, 8, 5, 6, 7, 8);
assert_eq_m64(r, _mm_max_pu8(a, b));
assert_eq_m64(r, _m_pmaxub(a, b));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_min_pi16() {
let a = _mm_setr_pi16(-1, 6, -3, 8);
let b = _mm_setr_pi16(5, -2, 7, -4);
let r = _mm_setr_pi16(-1, -2, -3, -4);
assert_eq_m64(r, _mm_min_pi16(a, b));
assert_eq_m64(r, _m_pminsw(a, b));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_min_pu8() {
let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8);
let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4);
let r = _mm_setr_pi8(2, 2, 3, 4, 2, 2, 3, 4);
assert_eq_m64(r, _mm_min_pu8(a, b));
assert_eq_m64(r, _m_pminub(a, b));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_mulhi_pu16() {
let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
let r = _mm_mulhi_pu16(a, b);
assert_eq_m64(r, _mm_set1_pi16(15));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_mullo_pi16() {
let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
let r = _mm_mullo_pi16(a, b);
assert_eq_m64(r, _mm_set1_pi16(17960));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_m_pmulhuw() {
let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
let r = _m_pmulhuw(a, b);
assert_eq_m64(r, _mm_set1_pi16(15));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_avg_pu8() {
let (a, b) = (_mm_set1_pi8(3), _mm_set1_pi8(9));
let r = _mm_avg_pu8(a, b);
assert_eq_m64(r, _mm_set1_pi8(6));
let r = _m_pavgb(a, b);
assert_eq_m64(r, _mm_set1_pi8(6));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_avg_pu16() {
let (a, b) = (_mm_set1_pi16(3), _mm_set1_pi16(9));
let r = _mm_avg_pu16(a, b);
assert_eq_m64(r, _mm_set1_pi16(6));
let r = _m_pavgw(a, b);
assert_eq_m64(r, _mm_set1_pi16(6));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_sad_pu8() {
#[rustfmt::skip]
let a = _mm_setr_pi8(
255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
1, 2, 3, 4,
);
let b = _mm_setr_pi8(0, 0, 0, 0, 2, 1, 2, 1);
let r = _mm_sad_pu8(a, b);
assert_eq_m64(r, _mm_setr_pi16(1020, 0, 0, 0));
let r = _m_psadbw(a, b);
assert_eq_m64(r, _mm_setr_pi16(1020, 0, 0, 0));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_cvtpi32_ps() {
let a = _mm_setr_ps(0., 0., 3., 4.);
let b = _mm_setr_pi32(1, 2);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpi32_ps(a, b);
assert_eq_m128(r, expected);
let r = _mm_cvt_pi2ps(a, b);
assert_eq_m128(r, expected);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_cvtpi16_ps() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpi16_ps(a);
assert_eq_m128(r, expected);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_cvtpu16_ps() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpu16_ps(a);
assert_eq_m128(r, expected);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_cvtpi8_ps() {
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpi8_ps(a);
assert_eq_m128(r, expected);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_cvtpu8_ps() {
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpu8_ps(a);
assert_eq_m128(r, expected);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_cvtpi32x2_ps() {
let a = _mm_setr_pi32(1, 2);
let b = _mm_setr_pi32(3, 4);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpi32x2_ps(a, b);
assert_eq_m128(r, expected);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_maskmove_si64() {
let a = _mm_set1_pi8(9);
let mask = _mm_setr_pi8(0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0);
let mut r = _mm_set1_pi8(0);
_mm_maskmove_si64(a, mask, &mut r as *mut _ as *mut i8);
let e = _mm_setr_pi8(0, 0, 9, 0, 0, 0, 0, 0);
assert_eq_m64(r, e);
let mut r = _mm_set1_pi8(0);
_m_maskmovq(a, mask, &mut r as *mut _ as *mut i8);
assert_eq_m64(r, e);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_extract_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let r = _mm_extract_pi16(a, 0);
assert_eq!(r, 1);
let r = _mm_extract_pi16(a, 1);
assert_eq!(r, 2);
let r = _m_pextrw(a, 1);
assert_eq!(r, 2);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_insert_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let r = _mm_insert_pi16(a, 0, 0b0);
let expected = _mm_setr_pi16(0, 2, 3, 4);
assert_eq_m64(r, expected);
let r = _mm_insert_pi16(a, 0, 0b10);
let expected = _mm_setr_pi16(1, 2, 0, 4);
assert_eq_m64(r, expected);
let r = _m_pinsrw(a, 0, 0b10);
assert_eq_m64(r, expected);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_movemask_pi8() {
let a = _mm_setr_pi16(0b1000_0000, 0b0100_0000, 0b1000_0000, 0b0100_0000);
let r = _mm_movemask_pi8(a);
assert_eq!(r, 0b10001);
let r = _m_pmovmskb(a);
assert_eq!(r, 0b10001);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_shuffle_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let r = _mm_shuffle_pi16(a, 0b00_01_01_11);
let expected = _mm_setr_pi16(4, 2, 2, 1);
assert_eq_m64(r, expected);
let r = _m_pshufw(a, 0b00_01_01_11);
assert_eq_m64(r, expected);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_cvtps_pi32() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let r = _mm_setr_pi32(1, 2);
assert_eq_m64(r, _mm_cvtps_pi32(a));
assert_eq_m64(r, _mm_cvt_ps2pi(a));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_cvttps_pi32() {
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
let r = _mm_setr_pi32(7, 2);
assert_eq_m64(r, _mm_cvttps_pi32(a));
assert_eq_m64(r, _mm_cvtt_ps2pi(a));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_cvtps_pi16() {
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
let r = _mm_setr_pi16(7, 2, 3, 4);
assert_eq_m64(r, _mm_cvtps_pi16(a));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_cvtps_pi8() {
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
let r = _mm_setr_pi8(7, 2, 3, 4, 0, 0, 0, 0);
assert_eq_m64(r, _mm_cvtps_pi8(a));
}
}