From 384cfe567765da76637fa2e1566596244a97c82b Mon Sep 17 00:00:00 2001 From: Sebastian Held Date: Sat, 6 Oct 2012 20:50:36 +0200 Subject: [PATCH] bugfix Windows: FTZ and DAZ bits in SSE computation unit is thread dependent. This fix sets the FTZ ans DAZ bits in every thread to gain the speedup. Linux seems to store these bits per process rather than per thread. --- FDTD/engine_multithread.cpp | 7 +++++++ FDTD/engine_sse.cpp | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/FDTD/engine_multithread.cpp b/FDTD/engine_multithread.cpp index 96273de..0bc3d68 100644 --- a/FDTD/engine_multithread.cpp +++ b/FDTD/engine_multithread.cpp @@ -251,6 +251,13 @@ void thread::operator()() //std::cout << "thread::operator() Parameters: " << m_start << " " << m_stop << std::endl; //DBG().cout() << "Thread " << m_threadID << " (" << boost::this_thread::get_id() << ") started." << endl; + // speed up the calculation of denormal floating point values (flush-to-zero) +#ifndef SSE_CORRECT_DENORMALS + unsigned int oldMXCSR = _mm_getcsr(); //read the old MXCSR setting + unsigned int newMXCSR = oldMXCSR | 0x8040; // set DAZ and FZ bits + _mm_setcsr( newMXCSR ); //write the new MXCSR setting to the MXCSR +#endif + while (!m_enginePtr->m_stopThreads) { // wait for start diff --git a/FDTD/engine_sse.cpp b/FDTD/engine_sse.cpp index b075a49..660e6d6 100644 --- a/FDTD/engine_sse.cpp +++ b/FDTD/engine_sse.cpp @@ -38,8 +38,8 @@ Engine_sse::Engine_sse(const Operator_sse* op) : Engine(op) // speed up the calculation of denormal floating point values (flush-to-zero) #ifndef SSE_CORRECT_DENORMALS - int oldMXCSR = _mm_getcsr(); //read the old MXCSR setting - int newMXCSR = oldMXCSR | 0x8040; // set DAZ and FZ bits + unsigned int oldMXCSR = _mm_getcsr(); //read the old MXCSR setting + unsigned int newMXCSR = oldMXCSR | 0x8040; // set DAZ and FZ bits _mm_setcsr( newMXCSR ); //write the new MXCSR setting to the MXCSR #endif }