70 #ifndef INCLUDED_volk_32f_s32f_convert_32i_u_H
71 #define INCLUDED_volk_32f_s32f_convert_32i_u_H
78 #include <immintrin.h>
81 const float* inputVector,
83 unsigned int num_points)
85 unsigned int number = 0;
87 const unsigned int eighthPoints = num_points / 8;
89 const float* inputVectorPtr = (
const float*)inputVector;
90 int32_t* outputVectorPtr = outputVector;
92 float min_val = INT_MIN;
93 float max_val = INT_MAX;
96 __m256 vScalar = _mm256_set1_ps(scalar);
99 __m256 vmin_val = _mm256_set1_ps(min_val);
100 __m256 vmax_val = _mm256_set1_ps(max_val);
102 for (; number < eighthPoints; number++) {
103 inputVal1 = _mm256_loadu_ps(inputVectorPtr);
106 inputVal1 = _mm256_max_ps(
107 _mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
108 intInputVal1 = _mm256_cvtps_epi32(inputVal1);
110 _mm256_storeu_si256((__m256i*)outputVectorPtr, intInputVal1);
111 outputVectorPtr += 8;
114 number = eighthPoints * 8;
115 for (; number < num_points; number++) {
116 r = inputVector[number] * scalar;
119 else if (r < min_val)
121 outputVector[number] = (int32_t)
rintf(r);
128 #include <emmintrin.h>
131 const float* inputVector,
133 unsigned int num_points)
135 unsigned int number = 0;
137 const unsigned int quarterPoints = num_points / 4;
139 const float* inputVectorPtr = (
const float*)inputVector;
140 int32_t* outputVectorPtr = outputVector;
142 float min_val = INT_MIN;
143 float max_val = INT_MAX;
146 __m128 vScalar = _mm_set_ps1(scalar);
148 __m128i intInputVal1;
149 __m128 vmin_val = _mm_set_ps1(min_val);
150 __m128 vmax_val = _mm_set_ps1(max_val);
152 for (; number < quarterPoints; number++) {
153 inputVal1 = _mm_loadu_ps(inputVectorPtr);
157 _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
158 intInputVal1 = _mm_cvtps_epi32(inputVal1);
160 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
161 outputVectorPtr += 4;
164 number = quarterPoints * 4;
165 for (; number < num_points; number++) {
166 r = inputVector[number] * scalar;
169 else if (r < min_val)
171 outputVector[number] = (int32_t)
rintf(r);
179 #include <xmmintrin.h>
182 const float* inputVector,
184 unsigned int num_points)
186 unsigned int number = 0;
188 const unsigned int quarterPoints = num_points / 4;
190 const float* inputVectorPtr = (
const float*)inputVector;
191 int32_t* outputVectorPtr = outputVector;
193 float min_val = INT_MIN;
194 float max_val = INT_MAX;
197 __m128 vScalar = _mm_set_ps1(scalar);
199 __m128 vmin_val = _mm_set_ps1(min_val);
200 __m128 vmax_val = _mm_set_ps1(max_val);
204 for (; number < quarterPoints; number++) {
205 ret = _mm_loadu_ps(inputVectorPtr);
208 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
210 _mm_store_ps(outputFloatBuffer, ret);
211 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[0]);
212 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[1]);
213 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[2]);
214 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[3]);
217 number = quarterPoints * 4;
218 for (; number < num_points; number++) {
219 r = inputVector[number] * scalar;
222 else if (r < min_val)
224 outputVector[number] = (int32_t)
rintf(r);
231 #ifdef LV_HAVE_GENERIC
234 const float* inputVector,
236 unsigned int num_points)
238 int32_t* outputVectorPtr = outputVector;
239 const float* inputVectorPtr = inputVector;
240 unsigned int number = 0;
241 float min_val = INT_MIN;
242 float max_val = INT_MAX;
245 for (number = 0; number < num_points; number++) {
246 r = *inputVectorPtr++ * scalar;
249 else if (r < min_val)
251 *outputVectorPtr++ = (int32_t)
rintf(r);
259 #ifndef INCLUDED_volk_32f_s32f_convert_32i_a_H
260 #define INCLUDED_volk_32f_s32f_convert_32i_a_H
262 #include <inttypes.h>
267 #include <immintrin.h>
270 const float* inputVector,
272 unsigned int num_points)
274 unsigned int number = 0;
276 const unsigned int eighthPoints = num_points / 8;
278 const float* inputVectorPtr = (
const float*)inputVector;
279 int32_t* outputVectorPtr = outputVector;
281 float min_val = INT_MIN;
282 float max_val = INT_MAX;
285 __m256 vScalar = _mm256_set1_ps(scalar);
287 __m256i intInputVal1;
288 __m256 vmin_val = _mm256_set1_ps(min_val);
289 __m256 vmax_val = _mm256_set1_ps(max_val);
291 for (; number < eighthPoints; number++) {
292 inputVal1 = _mm256_load_ps(inputVectorPtr);
295 inputVal1 = _mm256_max_ps(
296 _mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
297 intInputVal1 = _mm256_cvtps_epi32(inputVal1);
299 _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1);
300 outputVectorPtr += 8;
303 number = eighthPoints * 8;
304 for (; number < num_points; number++) {
305 r = inputVector[number] * scalar;
308 else if (r < min_val)
310 outputVector[number] = (int32_t)
rintf(r);
318 #include <emmintrin.h>
321 const float* inputVector,
323 unsigned int num_points)
325 unsigned int number = 0;
327 const unsigned int quarterPoints = num_points / 4;
329 const float* inputVectorPtr = (
const float*)inputVector;
330 int32_t* outputVectorPtr = outputVector;
332 float min_val = INT_MIN;
333 float max_val = INT_MAX;
336 __m128 vScalar = _mm_set_ps1(scalar);
338 __m128i intInputVal1;
339 __m128 vmin_val = _mm_set_ps1(min_val);
340 __m128 vmax_val = _mm_set_ps1(max_val);
342 for (; number < quarterPoints; number++) {
343 inputVal1 = _mm_load_ps(inputVectorPtr);
347 _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
348 intInputVal1 = _mm_cvtps_epi32(inputVal1);
350 _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
351 outputVectorPtr += 4;
354 number = quarterPoints * 4;
355 for (; number < num_points; number++) {
356 r = inputVector[number] * scalar;
359 else if (r < min_val)
361 outputVector[number] = (int32_t)
rintf(r);
369 #include <xmmintrin.h>
372 const float* inputVector,
374 unsigned int num_points)
376 unsigned int number = 0;
378 const unsigned int quarterPoints = num_points / 4;
380 const float* inputVectorPtr = (
const float*)inputVector;
381 int32_t* outputVectorPtr = outputVector;
383 float min_val = INT_MIN;
384 float max_val = INT_MAX;
387 __m128 vScalar = _mm_set_ps1(scalar);
389 __m128 vmin_val = _mm_set_ps1(min_val);
390 __m128 vmax_val = _mm_set_ps1(max_val);
394 for (; number < quarterPoints; number++) {
395 ret = _mm_load_ps(inputVectorPtr);
398 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
400 _mm_store_ps(outputFloatBuffer, ret);
401 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[0]);
402 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[1]);
403 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[2]);
404 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[3]);
407 number = quarterPoints * 4;
408 for (; number < num_points; number++) {
409 r = inputVector[number] * scalar;
412 else if (r < min_val)
414 outputVector[number] = (int32_t)
rintf(r);
421 #ifdef LV_HAVE_GENERIC
424 const float* inputVector,
426 unsigned int num_points)
428 int32_t* outputVectorPtr = outputVector;
429 const float* inputVectorPtr = inputVector;
430 unsigned int number = 0;
431 float min_val = INT_MIN;
432 float max_val = INT_MAX;
435 for (number = 0; number < num_points; number++) {
436 r = *inputVectorPtr++ * scalar;
439 else if (r < min_val)
441 *outputVectorPtr++ = (int32_t)
rintf(r);
static float rintf(float x)
Definition: config.h:37
static void volk_32f_s32f_convert_32i_a_sse(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:371
static void volk_32f_s32f_convert_32i_a_avx(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:269
static void volk_32f_s32f_convert_32i_a_generic(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:423
static void volk_32f_s32f_convert_32i_a_sse2(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:320
static void volk_32f_s32f_convert_32i_u_sse(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:181
static void volk_32f_s32f_convert_32i_generic(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:233
static void volk_32f_s32f_convert_32i_u_avx(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:80
static void volk_32f_s32f_convert_32i_u_sse2(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:130
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:56