Improve the speed of the SSE engine by using flush-to-zero
This change modifies the behaviour of the complete program, if the SSE engine is used. A better approach may be to only enable flush-to-zero in IterateTS()...pull/1/head
parent
911f7c5528
commit
59ffbb5100
|
@ -15,6 +15,7 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include "engine_sse.h"
|
||||
|
||||
//! \brief construct an Engine_sse instance
|
||||
|
@ -34,10 +35,18 @@ Engine_sse::Engine_sse(const Operator_sse* op) : Engine(op)
|
|||
f4_volt = 0;
|
||||
f4_curr = 0;
|
||||
numVectors = ceil((double)numLines[2]/4.0);
|
||||
|
||||
// speed up the calculation of denormal floating point values (flush-to-zero)
|
||||
#ifndef SSE_CORRECT_DENORMALS
|
||||
int oldMXCSR = _mm_getcsr(); //read the old MXCSR setting
|
||||
int newMXCSR = oldMXCSR | 0x8040; // set DAZ and FZ bits
|
||||
_mm_setcsr( newMXCSR ); //write the new MXCSR setting to the MXCSR
|
||||
#endif
|
||||
}
|
||||
|
||||
Engine_sse::~Engine_sse()
|
||||
{
|
||||
//_mm_setcsr( oldMXCSR ); // restore old setting
|
||||
Reset();
|
||||
}
|
||||
|
||||
|
|
|
@ -12,6 +12,10 @@ INCLUDEPATH += ../CSXCAD \
|
|||
../tinyxml
|
||||
LIBS += -L../CSXCAD -lCSXCAD
|
||||
|
||||
# the SSE engine defaults to flush-to-zero mode, because of speed advantages
|
||||
# to restore the correct handling of denormals and to comply to IEEE 754 uncomment:
|
||||
# DEFINES += SSE_CORRECT_DENORMALS
|
||||
|
||||
win32 {
|
||||
INCLUDEPATH += ../hdf5/include ../boost/include/boost-1_42
|
||||
LIBS += ../hdf5/lib/libhdf5_cpp.a ../hdf5/lib/libhdf5.a
|
||||
|
|
Loading…
Reference in New Issue