@@ -62,47 +62,53 @@ op_dot::direct_dot_generic(const uword n_elem, const eT* const A, const eT* cons
62
62
63
63
64
64
65
- // ! generic version for non-complex values with forced optimisation under GCC
66
- template <typename eT>
67
- #if defined(ARMA_REAL_GCC) && !defined(ARMA_DONT_FORCE_OPTIMISE_DOT)
68
- __attribute__ ((optimize(" O3" , " fast-math" )))
69
- #endif
70
- inline
71
- typename arma_not_cx<eT>::result
72
- op_dot::direct_dot_generic_force_optimise (const uword n_elem, const eT* const A, const eT* const B)
73
- {
74
- arma_debug_sigprint ();
75
-
76
- #if defined(__FAST_MATH__)
77
- {
78
- eT val = eT (0 );
79
-
80
- for (uword i=0 ; i < n_elem; ++i) { val += (A[i] * B[i]); }
81
-
82
- return val;
83
- }
84
- #else
85
- {
86
- eT val1 = eT (0 );
87
- eT val2 = eT (0 );
88
-
89
- uword i, j;
90
-
91
- for (i=0 , j=1 ; j < n_elem; i+=2 , j+=2 )
92
- {
93
- val1 += (A[i] * B[i]);
94
- val2 += (A[j] * B[j]);
95
- }
96
-
97
- if (i < n_elem)
98
- {
99
- val1 += (A[i] * B[i]);
100
- }
101
-
102
- return (val1 + val2);
103
- }
104
- #endif
105
- }
65
+ // //! generic version for non-complex values with forced SIMD optimisation under OpenMP
66
+ // template<typename eT>
67
+ // inline
68
+ // typename arma_not_cx<eT>::result
69
+ // op_dot::direct_dot_generic_force_optimise(const uword n_elem, const eT* const A, const eT* const B)
70
+ // {
71
+ // arma_debug_sigprint();
72
+ //
73
+ // #if defined(ARMA_USE_OPENMP)
74
+ // {
75
+ // eT val = eT(0);
76
+ //
77
+ // #pragma omp simd
78
+ // for(uword i=0; i < n_elem; ++i) { val += (A[i] * B[i]); }
79
+ //
80
+ // return val;
81
+ // }
82
+ // #elif defined(__FAST_MATH__)
83
+ // {
84
+ // eT val = eT(0);
85
+ //
86
+ // for(uword i=0; i < n_elem; ++i) { val += (A[i] * B[i]); }
87
+ //
88
+ // return val;
89
+ // }
90
+ // #else
91
+ // {
92
+ // eT val1 = eT(0);
93
+ // eT val2 = eT(0);
94
+ //
95
+ // uword i, j;
96
+ //
97
+ // for(i=0, j=1; j < n_elem; i+=2, j+=2)
98
+ // {
99
+ // val1 += (A[i] * B[i]);
100
+ // val2 += (A[j] * B[j]);
101
+ // }
102
+ //
103
+ // if(i < n_elem)
104
+ // {
105
+ // val1 += (A[i] * B[i]);
106
+ // }
107
+ //
108
+ // return (val1 + val2);
109
+ // }
110
+ // #endif
111
+ // }
106
112
107
113
108
114
@@ -209,7 +215,9 @@ op_dot::direct_dot(const uword n_elem, const eT* const A, const eT* const B)
209
215
{
210
216
arma_debug_sigprint ();
211
217
212
- return (n_elem <= 32u ) ? op_dot::direct_dot_generic (n_elem, A, B) : op_dot::direct_dot_generic_force_optimise (n_elem, A, B);
218
+ // return (n_elem <= 32u) ? op_dot::direct_dot_generic(n_elem, A, B) : op_dot::direct_dot_generic_force_optimise(n_elem, A, B);
219
+
220
+ return op_dot::direct_dot_generic (n_elem, A, B);
213
221
}
214
222
215
223
0 commit comments