diff --git a/FDTD/engine_sse.cpp b/FDTD/engine_sse.cpp
index afe4d32..02bcecf 100644
--- a/FDTD/engine_sse.cpp
+++ b/FDTD/engine_sse.cpp
@@ -15,6 +15,7 @@
* along with this program. If not, see .
*/
+#include
#include "engine_sse.h"
//! \brief construct an Engine_sse instance
@@ -34,10 +35,18 @@ Engine_sse::Engine_sse(const Operator_sse* op) : Engine(op)
f4_volt = 0;
f4_curr = 0;
numVectors = ceil((double)numLines[2]/4.0);
+
+ // speed up the calculation of denormal floating point values (flush-to-zero)
+#ifndef SSE_CORRECT_DENORMALS
+ int oldMXCSR = _mm_getcsr(); //read the old MXCSR setting
+ int newMXCSR = oldMXCSR | 0x8040; // set DAZ and FZ bits
+ _mm_setcsr( newMXCSR ); //write the new MXCSR setting to the MXCSR
+#endif
}
Engine_sse::~Engine_sse()
{
+ //_mm_setcsr( oldMXCSR ); // restore old setting
Reset();
}
diff --git a/openEMS.pro b/openEMS.pro
index 35ba129..1296cd8 100644
--- a/openEMS.pro
+++ b/openEMS.pro
@@ -12,6 +12,10 @@ INCLUDEPATH += ../CSXCAD \
../tinyxml
LIBS += -L../CSXCAD -lCSXCAD
+# the SSE engine defaults to flush-to-zero mode, because of speed advantages
+# to restore the correct handling of denormals and to comply to IEEE 754 uncomment:
+# DEFINES += SSE_CORRECT_DENORMALS
+
win32 {
INCLUDEPATH += ../hdf5/include ../boost/include/boost-1_42
LIBS += ../hdf5/lib/libhdf5_cpp.a ../hdf5/lib/libhdf5.a