From 59ffbb51009f2a162f0710fe9d5102b4df778b08 Mon Sep 17 00:00:00 2001 From: Sebastian Held Date: Tue, 13 Jul 2010 10:47:40 +0200 Subject: [PATCH] Improve the speed of the SSE engine by using flush-to-zero This change modifies the behaviour of the complete program, if the SSE engine is used. A better approach may be to only enable flush-to-zero in IterateTS()... --- FDTD/engine_sse.cpp | 9 +++++++++ openEMS.pro | 4 ++++ 2 files changed, 13 insertions(+) diff --git a/FDTD/engine_sse.cpp b/FDTD/engine_sse.cpp index afe4d32..02bcecf 100644 --- a/FDTD/engine_sse.cpp +++ b/FDTD/engine_sse.cpp @@ -15,6 +15,7 @@ * along with this program. If not, see . */ +#include #include "engine_sse.h" //! \brief construct an Engine_sse instance @@ -34,10 +35,18 @@ Engine_sse::Engine_sse(const Operator_sse* op) : Engine(op) f4_volt = 0; f4_curr = 0; numVectors = ceil((double)numLines[2]/4.0); + + // speed up the calculation of denormal floating point values (flush-to-zero) +#ifndef SSE_CORRECT_DENORMALS + int oldMXCSR = _mm_getcsr(); //read the old MXCSR setting + int newMXCSR = oldMXCSR | 0x8040; // set DAZ and FZ bits + _mm_setcsr( newMXCSR ); //write the new MXCSR setting to the MXCSR +#endif } Engine_sse::~Engine_sse() { + //_mm_setcsr( oldMXCSR ); // restore old setting Reset(); } diff --git a/openEMS.pro b/openEMS.pro index 35ba129..1296cd8 100644 --- a/openEMS.pro +++ b/openEMS.pro @@ -12,6 +12,10 @@ INCLUDEPATH += ../CSXCAD \ ../tinyxml LIBS += -L../CSXCAD -lCSXCAD +# the SSE engine defaults to flush-to-zero mode, because of speed advantages +# to restore the correct handling of denormals and to comply to IEEE 754 uncomment: +# DEFINES += SSE_CORRECT_DENORMALS + win32 { INCLUDEPATH += ../hdf5/include ../boost/include/boost-1_42 LIBS += ../hdf5/lib/libhdf5_cpp.a ../hdf5/lib/libhdf5.a