From a19ea3350d4432e73c02b92639acc102d38a157a Mon Sep 17 00:00:00 2001 From: Sebastian Held Date: Wed, 21 Apr 2010 15:36:23 +0200 Subject: [PATCH] removed sse code from normal operator -> class Operator_sse --- FDTD/engine_sse.h | 8 +-- FDTD/operator.cpp | 37 ------------- FDTD/operator.h | 19 ------- FDTD/operator_sse.cpp | 119 ++++++++++++++++++++++++++++++++++++++++++ FDTD/operator_sse.h | 51 ++++++++++++++++++ openems.cpp | 6 ++- tools/array_ops.h | 13 +++++ 7 files changed, 192 insertions(+), 61 deletions(-) create mode 100644 FDTD/operator_sse.cpp create mode 100644 FDTD/operator_sse.h diff --git a/FDTD/engine_sse.h b/FDTD/engine_sse.h index 17d74b0..5141379 100644 --- a/FDTD/engine_sse.h +++ b/FDTD/engine_sse.h @@ -18,13 +18,13 @@ #ifndef ENGINE_SSE_H #define ENGINE_SSE_H -#include "operator.h" #include "engine.h" +#include "operator_sse.h" class Engine_sse : public Engine { public: - static Engine_sse* New(const Operator* op); + static Engine_sse* New(const Operator_sse* op); virtual ~Engine_sse(); virtual void Init(); @@ -39,8 +39,8 @@ public: inline virtual FDTD_FLOAT GetCurr( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { return curr_[n][x][y][z/4].f[z%4]; } protected: - Engine_sse(const Operator* op); - const Operator* Op; + Engine_sse(const Operator_sse* op); + const Operator_sse* Op; virtual void UpdateVoltages(); virtual void ApplyVoltageExcite(); diff --git a/FDTD/operator.cpp b/FDTD/operator.cpp index 546a02b..8314cb6 100644 --- a/FDTD/operator.cpp +++ b/FDTD/operator.cpp @@ -47,12 +47,8 @@ void Operator::Init() E_Exc_dir=NULL; vv=NULL; vi=NULL; - vv_=NULL; - vi_=NULL; iv=NULL; ii=NULL; - iv_=NULL; - ii_=NULL; for (int n=0;n<3;++n) { discLines[n]=NULL; @@ -79,12 +75,8 @@ void Operator::Reset() delete[] E_Exc_amp; Delete_N_3DArray(vv,numLines); Delete_N_3DArray(vi,numLines); - Delete_N_3DArray_v4sf(vv_,numLines); - Delete_N_3DArray_v4sf(vi_,numLines); Delete_N_3DArray(iv,numLines); Delete_N_3DArray(ii,numLines); - Delete_N_3DArray_v4sf(iv_,numLines); - Delete_N_3DArray_v4sf(ii_,numLines); for (int n=0;n<3;++n) { delete[] discLines[n]; @@ -510,20 +502,12 @@ void Operator::InitOperator() { Delete_N_3DArray(vv,numLines); Delete_N_3DArray(vi,numLines); - Delete_N_3DArray_v4sf(vv_,numLines); - Delete_N_3DArray_v4sf(vi_,numLines); Delete_N_3DArray(iv,numLines); Delete_N_3DArray(ii,numLines); - Delete_N_3DArray_v4sf(iv_,numLines); - Delete_N_3DArray_v4sf(ii_,numLines); vv = Create_N_3DArray(numLines); vi = Create_N_3DArray(numLines); - vv_ = Create_N_3DArray_v4sf(numLines); - vi_ = Create_N_3DArray_v4sf(numLines); iv = Create_N_3DArray(numLines); ii = Create_N_3DArray(numLines); - iv_ = Create_N_3DArray_v4sf(numLines); - ii_ = Create_N_3DArray_v4sf(numLines); } inline void Operator::Calc_ECOperatorPos(int n, unsigned int* pos) @@ -577,27 +561,6 @@ int Operator::CalcECOperator() if (CalcEFieldExcitation()==false) return -1; CalcPEC(); - - // copy operator to aligned memory (only for sse engine) - // FIXME this is really inefficient! - for (int n=0;n<3;++n) - { - for (pos[0]=0;pos[0]. +*/ + +#include "operator_sse.h" +#include "tools/array_ops.h" + +Operator_sse* Operator_sse::New() +{ + Operator_sse* op = new Operator_sse(); + op->Init(); + return op; +} + +Operator_sse::Operator_sse() : Operator() +{ +} + +Operator_sse::~Operator_sse() +{ + Reset(); +} + +void Operator_sse::Init() +{ + Operator::Init(); + vv_ = 0; + vi_ = 0; + iv_ = 0; + ii_ = 0; +} + +void Operator_sse::Reset() +{ + Delete_N_3DArray_v4sf(vv_,numLines); + Delete_N_3DArray_v4sf(vi_,numLines); + Delete_N_3DArray_v4sf(iv_,numLines); + Delete_N_3DArray_v4sf(ii_,numLines); + Operator::Reset(); + Init(); // FIXME this calls Operator::Init() twice... +} + +void Operator_sse::InitOperator() +{ + Operator::InitOperator(); + Delete_N_3DArray_v4sf(vv_,numLines); + Delete_N_3DArray_v4sf(vi_,numLines); + Delete_N_3DArray_v4sf(iv_,numLines); + Delete_N_3DArray_v4sf(ii_,numLines); + vv_ = Create_N_3DArray_v4sf(numLines); + vi_ = Create_N_3DArray_v4sf(numLines); + iv_ = Create_N_3DArray_v4sf(numLines); + ii_ = Create_N_3DArray_v4sf(numLines); +} + +int Operator_sse::CalcECOperator() +{ + Operator::CalcECOperator(); + + // copy operator to aligned memory + // FIXME this is really inefficient! + unsigned int pos[3]; + for (int n=0;n<3;++n) + { + for (pos[0]=0;pos[0]. +*/ + +#ifndef OPERATOR_SSE_H +#define OPERATOR_SSE_H + +#include "operator.h" +#include "tools/array_ops.h" + +class Operator_sse : public Operator +{ +public: + //! Create a new operator + static Operator_sse* New(); + virtual ~Operator_sse(); + + virtual int CalcECOperator(); + + virtual void ApplyMagneticBC(bool* dirs); + +protected: + //! use New() for creating a new Operator + Operator_sse(); + + virtual void Init(); + virtual void Reset(); + virtual void InitOperator(); + + // engine/post-proc needs access +public: + f4vector**** vv_; //calc new voltage from old voltage + f4vector**** vi_; //calc new voltage from old current + f4vector**** iv_; //calc new current from old current + f4vector**** ii_; //calc new current from old voltage +}; + +#endif // OPERATOR_SSE_H diff --git a/openems.cpp b/openems.cpp index e908fdd..6fca5e4 100644 --- a/openems.cpp +++ b/openems.cpp @@ -254,6 +254,10 @@ int openEMS::SetupFDTD(const char* file) FDTD_Op = Operator_Cylinder::New(); CSX.SetCoordInputType(1); //tell CSX to use cylinder-coords } + else if (m_engine == EngineType_SSE) + { + FDTD_Op = Operator_sse::New(); + } else { FDTD_Op = Operator::New(); @@ -295,7 +299,7 @@ int openEMS::SetupFDTD(const char* file) FDTD_Eng = Engine_Multithread::New(FDTD_Op,m_engine_numThreads); break; case EngineType_SSE: - FDTD_Eng = Engine_sse::New(FDTD_Op); + FDTD_Eng = Engine_sse::New(dynamic_cast(FDTD_Op)); break; default: FDTD_Eng = Engine::New(FDTD_Op); diff --git a/tools/array_ops.h b/tools/array_ops.h index 24cfec2..e3307d4 100644 --- a/tools/array_ops.h +++ b/tools/array_ops.h @@ -18,6 +18,19 @@ #ifndef ARRAY_OPS_H #define ARRAY_OPS_H +#if __SIZEOF_FLOAT__ != 4 + #error wrong size of float +#endif + +typedef float v4sf __attribute__ ((vector_size (16))); // vector of four single floats + +union f4vector +{ + v4sf v; + float f[4]; +}; + + #include "../FDTD/operator.h" FDTD_FLOAT*** Create3DArray(const unsigned int* numLines);