1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
use crate::{
core_arch::{simd::*, x86::*},
hint::unreachable_unchecked,
mem::transmute,
};
#[cfg(test)]
use stdarch_test::assert_instr;
#[allow(improper_ctypes)]
extern "unadjusted" {
#[link_name = "llvm.x86.vcvtph2ps.128"]
fn llvm_vcvtph2ps_128(a: i16x8) -> f32x4;
#[link_name = "llvm.x86.vcvtph2ps.256"]
fn llvm_vcvtph2ps_256(a: i16x8) -> f32x8;
#[link_name = "llvm.x86.vcvtps2ph.128"]
fn llvm_vcvtps2ph_128(a: f32x4, rounding: i32) -> i16x8;
#[link_name = "llvm.x86.vcvtps2ph.256"]
fn llvm_vcvtps2ph_256(a: f32x8, rounding: i32) -> i16x8;
}
#[inline]
#[target_feature(enable = "f16c")]
#[cfg_attr(test, assert_instr("vcvtph2ps"))]
pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 {
transmute(llvm_vcvtph2ps_128(transmute(a)))
}
#[inline]
#[target_feature(enable = "f16c")]
#[cfg_attr(test, assert_instr("vcvtph2ps"))]
pub unsafe fn _mm256_cvtph_ps(a: __m128i) -> __m256 {
transmute(llvm_vcvtph2ps_256(transmute(a)))
}
macro_rules! dispatch_rounding {
($rounding:ident, $call:ident) => {{
match $rounding {
0 => call!(0),
1 => call!(1),
2 => call!(2),
3 => call!(3),
4 => call!(4),
5 => call!(5),
6 => call!(6),
7 => call!(7),
_ => unreachable_unchecked(),
}
}};
}
#[inline]
#[target_feature(enable = "f16c")]
#[rustc_args_required_const(1)]
#[cfg_attr(test, assert_instr("vcvtps2ph", imm_rounding = 0))]
pub unsafe fn _mm_cvtps_ph(a: __m128, imm_rounding: i32) -> __m128i {
let a = transmute(a);
macro_rules! call {
($rounding:expr) => {
llvm_vcvtps2ph_128(a, $rounding)
};
}
transmute(dispatch_rounding!(imm_rounding, call))
}
#[inline]
#[target_feature(enable = "f16c")]
#[rustc_args_required_const(1)]
#[cfg_attr(test, assert_instr("vcvtps2ph", imm_rounding = 0))]
pub unsafe fn _mm256_cvtps_ph(a: __m256, imm_rounding: i32) -> __m128i {
let a = transmute(a);
macro_rules! call {
($rounding:expr) => {
llvm_vcvtps2ph_256(a, $rounding)
};
}
transmute(dispatch_rounding!(imm_rounding, call))
}
#[cfg(test)]
mod tests {
use crate::{core_arch::x86::*, mem::transmute};
use stdarch_test::simd_test;
#[simd_test(enable = "f16c")]
unsafe fn test_mm_cvtph_ps() {
let array = [1_f32, 2_f32, 3_f32, 4_f32];
let float_vec: __m128 = transmute(array);
let halfs: __m128i = _mm_cvtps_ph(float_vec, 0);
let floats: __m128 = _mm_cvtph_ps(halfs);
let result: [f32; 4] = transmute(floats);
assert_eq!(result, array);
}
#[simd_test(enable = "f16c")]
unsafe fn test_mm256_cvtph_ps() {
let array = [1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32];
let float_vec: __m256 = transmute(array);
let halfs: __m128i = _mm256_cvtps_ph(float_vec, 0);
let floats: __m256 = _mm256_cvtph_ps(halfs);
let result: [f32; 8] = transmute(floats);
assert_eq!(result, array);
}
}