1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
use crate::simd::{cmp::SimdPartialEq, LaneCount, Simd, SimdElement, SupportedLaneCount};
use core::ops::{Add, Mul};
use core::ops::{BitAnd, BitOr, BitXor};
use core::ops::{Div, Rem, Sub};
use core::ops::{Shl, Shr};
mod assign;
mod deref;
mod shift_scalar;
mod unary;
impl<I, T, const N: usize> core::ops::Index<I> for Simd<T, N>
where
T: SimdElement,
LaneCount<N>: SupportedLaneCount,
I: core::slice::SliceIndex<[T]>,
{
type Output = I::Output;
#[inline]
fn index(&self, index: I) -> &Self::Output {
&self.as_array()[index]
}
}
impl<I, T, const N: usize> core::ops::IndexMut<I> for Simd<T, N>
where
T: SimdElement,
LaneCount<N>: SupportedLaneCount,
I: core::slice::SliceIndex<[T]>,
{
#[inline]
fn index_mut(&mut self, index: I) -> &mut Self::Output {
&mut self.as_mut_array()[index]
}
}
macro_rules! unsafe_base {
($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => {
// Safety: $lhs and $rhs are vectors
unsafe { core::intrinsics::simd::$simd_call($lhs, $rhs) }
};
}
/// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic.
/// It handles performing a bitand in addition to calling the shift operator, so that the result
/// is well-defined: LLVM can return a poison value if you shl, lshr, or ashr if `rhs >= <Int>::BITS`
/// At worst, this will maybe add another instruction and cycle,
/// at best, it may open up more optimization opportunities,
/// or simply be elided entirely, especially for SIMD ISAs which default to this.
///
// FIXME: Consider implementing this in cg_llvm instead?
// cg_clif defaults to this, and scalar MIR shifts also default to wrapping
macro_rules! wrap_bitshift {
($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => {
#[allow(clippy::suspicious_arithmetic_impl)]
// Safety: $lhs and the bitand result are vectors
unsafe {
core::intrinsics::simd::$simd_call(
$lhs,
$rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)),
)
}
};
}
/// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic.
/// It guards against LLVM's UB conditions for integer div or rem using masks and selects,
/// thus guaranteeing a Rust value returns instead.
///
/// | | LLVM | Rust
/// | :--------------: | :--- | :----------
/// | N {/,%} 0 | UB | panic!()
/// | <$int>::MIN / -1 | UB | <$int>::MIN
/// | <$int>::MIN % -1 | UB | 0
///
macro_rules! int_divrem_guard {
( $lhs:ident,
$rhs:ident,
{ const PANIC_ZERO: &'static str = $zero:literal;
$simd_call:ident, $op:tt
},
$int:ident ) => {
if $rhs.simd_eq(Simd::splat(0 as _)).any() {
panic!($zero);
} else {
// Prevent otherwise-UB overflow on the MIN / -1 case.
let rhs = if <$int>::MIN != 0 {
// This should, at worst, optimize to a few branchless logical ops
// Ideally, this entire conditional should evaporate
// Fire LLVM and implement those manually if it doesn't get the hint
($lhs.simd_eq(Simd::splat(<$int>::MIN))
// type inference can break here, so cut an SInt to size
& $rhs.simd_eq(Simd::splat(-1i64 as _)))
.select(Simd::splat(1 as _), $rhs)
} else {
// Nice base case to make it easy to const-fold away the other branch.
$rhs
};
// aarch64 div fails for arbitrary `v % 0`, mod fails when rhs is MIN, for non-powers-of-two
// these operations aren't vectorized on aarch64 anyway
#[cfg(target_arch = "aarch64")]
{
let mut out = Simd::splat(0 as _);
for i in 0..Self::LEN {
out[i] = $lhs[i] $op rhs[i];
}
out
}
#[cfg(not(target_arch = "aarch64"))]
{
// Safety: $lhs and rhs are vectors
unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) }
}
}
};
}
macro_rules! for_base_types {
( T = ($($scalar:ident),*);
type Lhs = Simd<T, N>;
type Rhs = Simd<T, N>;
type Output = $out:ty;
impl $op:ident::$call:ident {
$macro_impl:ident $inner:tt
}) => {
$(
impl<const N: usize> $op<Self> for Simd<$scalar, N>
where
$scalar: SimdElement,
LaneCount<N>: SupportedLaneCount,
{
type Output = $out;
#[inline]
#[must_use = "operator returns a new vector without mutating the inputs"]
// TODO: only useful for int Div::div, but we hope that this
// will essentially always always get inlined anyway.
#[track_caller]
fn $call(self, rhs: Self) -> Self::Output {
$macro_impl!(self, rhs, $inner, $scalar)
}
}
)*
}
}
// A "TokenTree muncher": takes a set of scalar types `T = {};`
// type parameters for the ops it implements, `Op::fn` names,
// and a macro that expands into an expr, substituting in an intrinsic.
// It passes that to for_base_types, which expands an impl for the types,
// using the expanded expr in the function, and recurses with itself.
//
// tl;dr impls a set of ops::{Traits} for a set of types
macro_rules! for_base_ops {
(
T = $types:tt;
type Lhs = Simd<T, N>;
type Rhs = Simd<T, N>;
type Output = $out:ident;
impl $op:ident::$call:ident
$inner:tt
$($rest:tt)*
) => {
for_base_types! {
T = $types;
type Lhs = Simd<T, N>;
type Rhs = Simd<T, N>;
type Output = $out;
impl $op::$call
$inner
}
for_base_ops! {
T = $types;
type Lhs = Simd<T, N>;
type Rhs = Simd<T, N>;
type Output = $out;
$($rest)*
}
};
($($done:tt)*) => {
// Done.
}
}
// Integers can always accept add, mul, sub, bitand, bitor, and bitxor.
// For all of these operations, simd_* intrinsics apply wrapping logic.
for_base_ops! {
T = (i8, i16, i32, i64, isize, u8, u16, u32, u64, usize);
type Lhs = Simd<T, N>;
type Rhs = Simd<T, N>;
type Output = Self;
impl Add::add {
unsafe_base { simd_add }
}
impl Mul::mul {
unsafe_base { simd_mul }
}
impl Sub::sub {
unsafe_base { simd_sub }
}
impl BitAnd::bitand {
unsafe_base { simd_and }
}
impl BitOr::bitor {
unsafe_base { simd_or }
}
impl BitXor::bitxor {
unsafe_base { simd_xor }
}
impl Div::div {
int_divrem_guard {
const PANIC_ZERO: &'static str = "attempt to divide by zero";
simd_div, /
}
}
impl Rem::rem {
int_divrem_guard {
const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
simd_rem, %
}
}
// The only question is how to handle shifts >= <Int>::BITS?
// Our current solution uses wrapping logic.
impl Shl::shl {
wrap_bitshift { simd_shl }
}
impl Shr::shr {
wrap_bitshift {
// This automatically monomorphizes to lshr or ashr, depending,
// so it's fine to use it for both UInts and SInts.
simd_shr
}
}
}
// We don't need any special precautions here:
// Floats always accept arithmetic ops, but may become NaN.
for_base_ops! {
T = (f32, f64);
type Lhs = Simd<T, N>;
type Rhs = Simd<T, N>;
type Output = Self;
impl Add::add {
unsafe_base { simd_add }
}
impl Mul::mul {
unsafe_base { simd_mul }
}
impl Sub::sub {
unsafe_base { simd_sub }
}
impl Div::div {
unsafe_base { simd_div }
}
impl Rem::rem {
unsafe_base { simd_rem }
}
}