Unverified Commit 7ff86be6 authored by dwtowner's avatar dwtowner Committed by GitHub
Browse files

CPU: Replaced intel specific intrinsics with generic versions (#2668)

parent f6511053
...@@ -335,16 +335,19 @@ static inline fvec8 blendZero(const fvec8 v, const fvec8 mask) { ...@@ -335,16 +335,19 @@ static inline fvec8 blendZero(const fvec8 v, const fvec8 mask) {
*/ */
static inline void gatherVecPair(const float* table, const ivec8 index, fvec8& out0, fvec8& out1) { static inline void gatherVecPair(const float* table, const ivec8 index, fvec8& out0, fvec8& out1) {
const auto lower = index.lowerVec();
const auto upper = index.upperVec();
// Gather all the separate memory data together. Each vector will have two values // Gather all the separate memory data together. Each vector will have two values
// which get used, and two which are ultimately discarded. // which get used, and two which are ultimately discarded.
fvec4 t0(table + _mm256_extract_epi32(index, 0)); fvec4 t0(table + lower[0]);
fvec4 t1(table + _mm256_extract_epi32(index, 1)); fvec4 t1(table + lower[1]);
fvec4 t2(table + _mm256_extract_epi32(index, 2)); fvec4 t2(table + lower[2]);
fvec4 t3(table + _mm256_extract_epi32(index, 3)); fvec4 t3(table + lower[3]);
fvec4 t4(table + _mm256_extract_epi32(index, 4)); fvec4 t4(table + upper[0]);
fvec4 t5(table + _mm256_extract_epi32(index, 5)); fvec4 t5(table + upper[1]);
fvec4 t6(table + _mm256_extract_epi32(index, 6)); fvec4 t6(table + upper[2]);
fvec4 t7(table + _mm256_extract_epi32(index, 7)); fvec4 t7(table + upper[3]);
// Tranposing the 8 vectors above will put all the first elements into one output // Tranposing the 8 vectors above will put all the first elements into one output
// vector, all the second elements into the next vector and so on. // vector, all the second elements into the next vector and so on.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment