diff --git a/FDTD/engine_sse.h b/FDTD/engine_sse.h
index 17d74b0..5141379 100644
--- a/FDTD/engine_sse.h
+++ b/FDTD/engine_sse.h
@@ -18,13 +18,13 @@
#ifndef ENGINE_SSE_H
#define ENGINE_SSE_H
-#include "operator.h"
#include "engine.h"
+#include "operator_sse.h"
class Engine_sse : public Engine
{
public:
- static Engine_sse* New(const Operator* op);
+ static Engine_sse* New(const Operator_sse* op);
virtual ~Engine_sse();
virtual void Init();
@@ -39,8 +39,8 @@ public:
inline virtual FDTD_FLOAT GetCurr( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { return curr_[n][x][y][z/4].f[z%4]; }
protected:
- Engine_sse(const Operator* op);
- const Operator* Op;
+ Engine_sse(const Operator_sse* op);
+ const Operator_sse* Op;
virtual void UpdateVoltages();
virtual void ApplyVoltageExcite();
diff --git a/FDTD/operator.cpp b/FDTD/operator.cpp
index 546a02b..8314cb6 100644
--- a/FDTD/operator.cpp
+++ b/FDTD/operator.cpp
@@ -47,12 +47,8 @@ void Operator::Init()
E_Exc_dir=NULL;
vv=NULL;
vi=NULL;
- vv_=NULL;
- vi_=NULL;
iv=NULL;
ii=NULL;
- iv_=NULL;
- ii_=NULL;
for (int n=0;n<3;++n)
{
discLines[n]=NULL;
@@ -79,12 +75,8 @@ void Operator::Reset()
delete[] E_Exc_amp;
Delete_N_3DArray(vv,numLines);
Delete_N_3DArray(vi,numLines);
- Delete_N_3DArray_v4sf(vv_,numLines);
- Delete_N_3DArray_v4sf(vi_,numLines);
Delete_N_3DArray(iv,numLines);
Delete_N_3DArray(ii,numLines);
- Delete_N_3DArray_v4sf(iv_,numLines);
- Delete_N_3DArray_v4sf(ii_,numLines);
for (int n=0;n<3;++n)
{
delete[] discLines[n];
@@ -510,20 +502,12 @@ void Operator::InitOperator()
{
Delete_N_3DArray(vv,numLines);
Delete_N_3DArray(vi,numLines);
- Delete_N_3DArray_v4sf(vv_,numLines);
- Delete_N_3DArray_v4sf(vi_,numLines);
Delete_N_3DArray(iv,numLines);
Delete_N_3DArray(ii,numLines);
- Delete_N_3DArray_v4sf(iv_,numLines);
- Delete_N_3DArray_v4sf(ii_,numLines);
vv = Create_N_3DArray(numLines);
vi = Create_N_3DArray(numLines);
- vv_ = Create_N_3DArray_v4sf(numLines);
- vi_ = Create_N_3DArray_v4sf(numLines);
iv = Create_N_3DArray(numLines);
ii = Create_N_3DArray(numLines);
- iv_ = Create_N_3DArray_v4sf(numLines);
- ii_ = Create_N_3DArray_v4sf(numLines);
}
inline void Operator::Calc_ECOperatorPos(int n, unsigned int* pos)
@@ -577,27 +561,6 @@ int Operator::CalcECOperator()
if (CalcEFieldExcitation()==false) return -1;
CalcPEC();
-
- // copy operator to aligned memory (only for sse engine)
- // FIXME this is really inefficient!
- for (int n=0;n<3;++n)
- {
- for (pos[0]=0;pos[0].
+*/
+
+#include "operator_sse.h"
+#include "tools/array_ops.h"
+
+Operator_sse* Operator_sse::New()
+{
+ Operator_sse* op = new Operator_sse();
+ op->Init();
+ return op;
+}
+
+Operator_sse::Operator_sse() : Operator()
+{
+}
+
+Operator_sse::~Operator_sse()
+{
+ Reset();
+}
+
+void Operator_sse::Init()
+{
+ Operator::Init();
+ vv_ = 0;
+ vi_ = 0;
+ iv_ = 0;
+ ii_ = 0;
+}
+
+void Operator_sse::Reset()
+{
+ Delete_N_3DArray_v4sf(vv_,numLines);
+ Delete_N_3DArray_v4sf(vi_,numLines);
+ Delete_N_3DArray_v4sf(iv_,numLines);
+ Delete_N_3DArray_v4sf(ii_,numLines);
+ Operator::Reset();
+ Init(); // FIXME this calls Operator::Init() twice...
+}
+
+void Operator_sse::InitOperator()
+{
+ Operator::InitOperator();
+ Delete_N_3DArray_v4sf(vv_,numLines);
+ Delete_N_3DArray_v4sf(vi_,numLines);
+ Delete_N_3DArray_v4sf(iv_,numLines);
+ Delete_N_3DArray_v4sf(ii_,numLines);
+ vv_ = Create_N_3DArray_v4sf(numLines);
+ vi_ = Create_N_3DArray_v4sf(numLines);
+ iv_ = Create_N_3DArray_v4sf(numLines);
+ ii_ = Create_N_3DArray_v4sf(numLines);
+}
+
+int Operator_sse::CalcECOperator()
+{
+ Operator::CalcECOperator();
+
+ // copy operator to aligned memory
+ // FIXME this is really inefficient!
+ unsigned int pos[3];
+ for (int n=0;n<3;++n)
+ {
+ for (pos[0]=0;pos[0].
+*/
+
+#ifndef OPERATOR_SSE_H
+#define OPERATOR_SSE_H
+
+#include "operator.h"
+#include "tools/array_ops.h"
+
+class Operator_sse : public Operator
+{
+public:
+ //! Create a new operator
+ static Operator_sse* New();
+ virtual ~Operator_sse();
+
+ virtual int CalcECOperator();
+
+ virtual void ApplyMagneticBC(bool* dirs);
+
+protected:
+ //! use New() for creating a new Operator
+ Operator_sse();
+
+ virtual void Init();
+ virtual void Reset();
+ virtual void InitOperator();
+
+ // engine/post-proc needs access
+public:
+ f4vector**** vv_; //calc new voltage from old voltage
+ f4vector**** vi_; //calc new voltage from old current
+ f4vector**** iv_; //calc new current from old current
+ f4vector**** ii_; //calc new current from old voltage
+};
+
+#endif // OPERATOR_SSE_H
diff --git a/openems.cpp b/openems.cpp
index e908fdd..6fca5e4 100644
--- a/openems.cpp
+++ b/openems.cpp
@@ -254,6 +254,10 @@ int openEMS::SetupFDTD(const char* file)
FDTD_Op = Operator_Cylinder::New();
CSX.SetCoordInputType(1); //tell CSX to use cylinder-coords
}
+ else if (m_engine == EngineType_SSE)
+ {
+ FDTD_Op = Operator_sse::New();
+ }
else
{
FDTD_Op = Operator::New();
@@ -295,7 +299,7 @@ int openEMS::SetupFDTD(const char* file)
FDTD_Eng = Engine_Multithread::New(FDTD_Op,m_engine_numThreads);
break;
case EngineType_SSE:
- FDTD_Eng = Engine_sse::New(FDTD_Op);
+ FDTD_Eng = Engine_sse::New(dynamic_cast(FDTD_Op));
break;
default:
FDTD_Eng = Engine::New(FDTD_Op);
diff --git a/tools/array_ops.h b/tools/array_ops.h
index 24cfec2..e3307d4 100644
--- a/tools/array_ops.h
+++ b/tools/array_ops.h
@@ -18,6 +18,19 @@
#ifndef ARRAY_OPS_H
#define ARRAY_OPS_H
+#if __SIZEOF_FLOAT__ != 4
+ #error wrong size of float
+#endif
+
+typedef float v4sf __attribute__ ((vector_size (16))); // vector of four single floats
+
+union f4vector
+{
+ v4sf v;
+ float f[4];
+};
+
+
#include "../FDTD/operator.h"
FDTD_FLOAT*** Create3DArray(const unsigned int* numLines);