bugfix Windows: FTZ and DAZ bits in SSE computation unit is thread dependent.

This fix sets the FTZ ans DAZ bits in every thread to gain the speedup.
Linux seems to store these bits per process rather than per thread.
This commit is contained in:
Sebastian Held 2012-10-06 20:50:36 +02:00
parent 77e0cd2c60
commit 384cfe5677
2 changed files with 9 additions and 2 deletions

View File

@ -251,6 +251,13 @@ void thread::operator()()
//std::cout << "thread::operator() Parameters: " << m_start << " " << m_stop << std::endl;
//DBG().cout() << "Thread " << m_threadID << " (" << boost::this_thread::get_id() << ") started." << endl;
// speed up the calculation of denormal floating point values (flush-to-zero)
#ifndef SSE_CORRECT_DENORMALS
unsigned int oldMXCSR = _mm_getcsr(); //read the old MXCSR setting
unsigned int newMXCSR = oldMXCSR | 0x8040; // set DAZ and FZ bits
_mm_setcsr( newMXCSR ); //write the new MXCSR setting to the MXCSR
#endif
while (!m_enginePtr->m_stopThreads)
{
// wait for start

View File

@ -38,8 +38,8 @@ Engine_sse::Engine_sse(const Operator_sse* op) : Engine(op)
// speed up the calculation of denormal floating point values (flush-to-zero)
#ifndef SSE_CORRECT_DENORMALS
int oldMXCSR = _mm_getcsr(); //read the old MXCSR setting
int newMXCSR = oldMXCSR | 0x8040; // set DAZ and FZ bits
unsigned int oldMXCSR = _mm_getcsr(); //read the old MXCSR setting
unsigned int newMXCSR = oldMXCSR | 0x8040; // set DAZ and FZ bits
_mm_setcsr( newMXCSR ); //write the new MXCSR setting to the MXCSR
#endif
}