From d26986fe0d6e201582dba7c3d9ae2998609e4501 Mon Sep 17 00:00:00 2001 From: Thorsten Liebig Date: Fri, 30 Apr 2010 19:41:44 +0200 Subject: [PATCH] sse: removed more unnecessary methods & new operator access & names that make more sense --- FDTD/engine_sse.cpp | 68 +++++++++++++++++----------------- FDTD/engine_sse.h | 8 ++-- FDTD/operator_sse.cpp | 86 +++++++++---------------------------------- FDTD/operator_sse.h | 14 ++++--- 4 files changed, 63 insertions(+), 113 deletions(-) diff --git a/FDTD/engine_sse.cpp b/FDTD/engine_sse.cpp index 737c97b..cb69548 100644 --- a/FDTD/engine_sse.cpp +++ b/FDTD/engine_sse.cpp @@ -43,18 +43,18 @@ Engine_sse::~Engine_sse() void Engine_sse::Init() { numTS = 0; - volt_ = Create_N_3DArray_v4sf(numLines); - curr_ = Create_N_3DArray_v4sf(numLines); + f4_volt = Create_N_3DArray_v4sf(numLines); + f4_curr = Create_N_3DArray_v4sf(numLines); volt = 0; // not used curr = 0; // not used } void Engine_sse::Reset() { - Delete_N_3DArray_v4sf(volt_,numLines); - volt_ = 0; - Delete_N_3DArray_v4sf(curr_,numLines); - curr_ = 0; + Delete_N_3DArray_v4sf(f4_volt,numLines); + f4_volt = 0; + Delete_N_3DArray_v4sf(f4_curr,numLines); + f4_curr = 0; } void Engine_sse::UpdateVoltages() @@ -72,24 +72,24 @@ void Engine_sse::UpdateVoltages() for (pos[2]=0;pos[2]vv_[0][pos[0]][pos[1]][pos[2]].v; - volt_[0][pos[0]][pos[1]][pos[2]].v += Op->vi_[0][pos[0]][pos[1]][pos[2]].v * ( curr_[2][pos[0]][pos[1]][pos[2]].v - curr_[2][pos[0]][pos[1]-shift[1]][pos[2]].v - curr_[1][pos[0]][pos[1]][pos[2]].v + temp.v ); + temp.f[0] = f4_curr[1][pos[0]][pos[1]][pos[2]-(bool)pos[2]].f[3]; + temp.f[1] = f4_curr[1][pos[0]][pos[1]][pos[2]].f[0]; + temp.f[2] = f4_curr[1][pos[0]][pos[1]][pos[2]].f[1]; + temp.f[3] = f4_curr[1][pos[0]][pos[1]][pos[2]].f[2]; + f4_volt[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv[0][pos[0]][pos[1]][pos[2]].v; + f4_volt[0][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[0][pos[0]][pos[1]][pos[2]].v * ( f4_curr[2][pos[0]][pos[1]][pos[2]].v - f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2]].v - f4_curr[1][pos[0]][pos[1]][pos[2]].v + temp.v ); // y-polarization - temp.f[0] = curr_[0][pos[0]][pos[1]][pos[2]-(bool)pos[2]].f[3]; - temp.f[1] = curr_[0][pos[0]][pos[1]][pos[2]].f[0]; - temp.f[2] = curr_[0][pos[0]][pos[1]][pos[2]].f[1]; - temp.f[3] = curr_[0][pos[0]][pos[1]][pos[2]].f[2]; - volt_[1][pos[0]][pos[1]][pos[2]].v *= Op->vv_[1][pos[0]][pos[1]][pos[2]].v; - volt_[1][pos[0]][pos[1]][pos[2]].v += Op->vi_[1][pos[0]][pos[1]][pos[2]].v * ( curr_[0][pos[0]][pos[1]][pos[2]].v - temp.v - curr_[2][pos[0]][pos[1]][pos[2]].v + curr_[2][pos[0]-shift[0]][pos[1]][pos[2]].v); + temp.f[0] = f4_curr[0][pos[0]][pos[1]][pos[2]-(bool)pos[2]].f[3]; + temp.f[1] = f4_curr[0][pos[0]][pos[1]][pos[2]].f[0]; + temp.f[2] = f4_curr[0][pos[0]][pos[1]][pos[2]].f[1]; + temp.f[3] = f4_curr[0][pos[0]][pos[1]][pos[2]].f[2]; + f4_volt[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv[1][pos[0]][pos[1]][pos[2]].v; + f4_volt[1][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[1][pos[0]][pos[1]][pos[2]].v * ( f4_curr[0][pos[0]][pos[1]][pos[2]].v - temp.v - f4_curr[2][pos[0]][pos[1]][pos[2]].v + f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2]].v); // z-polarization - volt_[2][pos[0]][pos[1]][pos[2]].v *= Op->vv_[2][pos[0]][pos[1]][pos[2]].v; - volt_[2][pos[0]][pos[1]][pos[2]].v += Op->vi_[2][pos[0]][pos[1]][pos[2]].v * ( curr_[1][pos[0]][pos[1]][pos[2]].v - curr_[1][pos[0]-shift[0]][pos[1]][pos[2]].v - curr_[0][pos[0]][pos[1]][pos[2]].v + curr_[0][pos[0]][pos[1]-shift[1]][pos[2]].v); + f4_volt[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv[2][pos[0]][pos[1]][pos[2]].v; + f4_volt[2][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[2][pos[0]][pos[1]][pos[2]].v * ( f4_curr[1][pos[0]][pos[1]][pos[2]].v - f4_curr[1][pos[0]-shift[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]].v + f4_curr[0][pos[0]][pos[1]-shift[1]][pos[2]].v); } } } @@ -107,24 +107,24 @@ void Engine_sse::UpdateCurrents() for (pos[2]=0;pos[2]ii_[0][pos[0]][pos[1]][pos[2]].v; - curr_[0][pos[0]][pos[1]][pos[2]].v += Op->iv_[0][pos[0]][pos[1]][pos[2]].v * ( volt_[2][pos[0]][pos[1]][pos[2]].v - volt_[2][pos[0]][pos[1]+1][pos[2]].v - volt_[1][pos[0]][pos[1]][pos[2]].v + temp.v); + temp.f[0] = f4_volt[1][pos[0]][pos[1]][pos[2]].f[1]; + temp.f[1] = f4_volt[1][pos[0]][pos[1]][pos[2]].f[2]; + temp.f[2] = f4_volt[1][pos[0]][pos[1]][pos[2]].f[3]; + temp.f[3] = f4_volt[1][pos[0]][pos[1]][pos[2]+1].f[0]; // FIXME outside sim area + f4_curr[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii[0][pos[0]][pos[1]][pos[2]].v; + f4_curr[0][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[0][pos[0]][pos[1]][pos[2]].v * ( f4_volt[2][pos[0]][pos[1]][pos[2]].v - f4_volt[2][pos[0]][pos[1]+1][pos[2]].v - f4_volt[1][pos[0]][pos[1]][pos[2]].v + temp.v); // y-pol - temp.f[0] = volt_[0][pos[0]][pos[1]][pos[2]].f[1]; - temp.f[1] = volt_[0][pos[0]][pos[1]][pos[2]].f[2]; - temp.f[2] = volt_[0][pos[0]][pos[1]][pos[2]].f[3]; - temp.f[3] = volt_[0][pos[0]][pos[1]][pos[2]+1].f[0]; // FIXME outside sim area - curr_[1][pos[0]][pos[1]][pos[2]].v *= Op->ii_[1][pos[0]][pos[1]][pos[2]].v; - curr_[1][pos[0]][pos[1]][pos[2]].v += Op->iv_[1][pos[0]][pos[1]][pos[2]].v * ( volt_[0][pos[0]][pos[1]][pos[2]].v - temp.v - volt_[2][pos[0]][pos[1]][pos[2]].v + volt_[2][pos[0]+1][pos[1]][pos[2]].v); + temp.f[0] = f4_volt[0][pos[0]][pos[1]][pos[2]].f[1]; + temp.f[1] = f4_volt[0][pos[0]][pos[1]][pos[2]].f[2]; + temp.f[2] = f4_volt[0][pos[0]][pos[1]][pos[2]].f[3]; + temp.f[3] = f4_volt[0][pos[0]][pos[1]][pos[2]+1].f[0]; // FIXME outside sim area + f4_curr[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii[1][pos[0]][pos[1]][pos[2]].v; + f4_curr[1][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[1][pos[0]][pos[1]][pos[2]].v * ( f4_volt[0][pos[0]][pos[1]][pos[2]].v - temp.v - f4_volt[2][pos[0]][pos[1]][pos[2]].v + f4_volt[2][pos[0]+1][pos[1]][pos[2]].v); // z-pol - curr_[2][pos[0]][pos[1]][pos[2]].v *= Op->ii_[2][pos[0]][pos[1]][pos[2]].v; - curr_[2][pos[0]][pos[1]][pos[2]].v += Op->iv_[2][pos[0]][pos[1]][pos[2]].v * ( volt_[1][pos[0]][pos[1]][pos[2]].v - volt_[1][pos[0]+1][pos[1]][pos[2]].v - volt_[0][pos[0]][pos[1]][pos[2]].v + volt_[0][pos[0]][pos[1]+1][pos[2]].v); + f4_curr[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii[2][pos[0]][pos[1]][pos[2]].v; + f4_curr[2][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[2][pos[0]][pos[1]][pos[2]].v * ( f4_volt[1][pos[0]][pos[1]][pos[2]].v - f4_volt[1][pos[0]+1][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]].v + f4_volt[0][pos[0]][pos[1]+1][pos[2]].v); } } } diff --git a/FDTD/engine_sse.h b/FDTD/engine_sse.h index b980965..c9cbdaf 100644 --- a/FDTD/engine_sse.h +++ b/FDTD/engine_sse.h @@ -32,8 +32,8 @@ public: virtual unsigned int GetNumberOfTimesteps() {return numTS;}; - inline virtual FDTD_FLOAT& GetVolt( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { return volt_[n][x][y][z/4].f[z%4]; } - inline virtual FDTD_FLOAT& GetCurr( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { return curr_[n][x][y][z/4].f[z%4]; } + inline virtual FDTD_FLOAT& GetVolt( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { return f4_volt[n][x][y][z/4].f[z%4]; } + inline virtual FDTD_FLOAT& GetCurr( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { return f4_curr[n][x][y][z/4].f[z%4]; } protected: Engine_sse(const Operator_sse* op); @@ -42,8 +42,8 @@ protected: virtual void UpdateVoltages(); virtual void UpdateCurrents(); - f4vector**** volt_; - f4vector**** curr_; + f4vector**** f4_volt; + f4vector**** f4_curr; }; #endif // ENGINE_SSE_H diff --git a/FDTD/operator_sse.cpp b/FDTD/operator_sse.cpp index 3846d53..23f7d73 100644 --- a/FDTD/operator_sse.cpp +++ b/FDTD/operator_sse.cpp @@ -37,83 +37,31 @@ Operator_sse::~Operator_sse() void Operator_sse::Init() { Operator::Init(); - vv_ = 0; - vi_ = 0; - iv_ = 0; - ii_ = 0; + f4_vv = 0; + f4_vi = 0; + f4_iv = 0; + f4_ii = 0; } void Operator_sse::Reset() { - Delete_N_3DArray_v4sf(vv_,numLines); - Delete_N_3DArray_v4sf(vi_,numLines); - Delete_N_3DArray_v4sf(iv_,numLines); - Delete_N_3DArray_v4sf(ii_,numLines); + Delete_N_3DArray_v4sf(f4_vv,numLines); + Delete_N_3DArray_v4sf(f4_vi,numLines); + Delete_N_3DArray_v4sf(f4_iv,numLines); + Delete_N_3DArray_v4sf(f4_ii,numLines); Operator::Reset(); - Init(); // FIXME this calls Operator::Init() twice... +// Init(); // FIXME this calls Operator::Init() twice... } void Operator_sse::InitOperator() { - Operator::InitOperator(); - Delete_N_3DArray_v4sf(vv_,numLines); - Delete_N_3DArray_v4sf(vi_,numLines); - Delete_N_3DArray_v4sf(iv_,numLines); - Delete_N_3DArray_v4sf(ii_,numLines); - vv_ = Create_N_3DArray_v4sf(numLines); - vi_ = Create_N_3DArray_v4sf(numLines); - iv_ = Create_N_3DArray_v4sf(numLines); - ii_ = Create_N_3DArray_v4sf(numLines); + Delete_N_3DArray_v4sf(f4_vv,numLines); + Delete_N_3DArray_v4sf(f4_vi,numLines); + Delete_N_3DArray_v4sf(f4_iv,numLines); + Delete_N_3DArray_v4sf(f4_ii,numLines); + f4_vv = Create_N_3DArray_v4sf(numLines); + f4_vi = Create_N_3DArray_v4sf(numLines); + f4_iv = Create_N_3DArray_v4sf(numLines); + f4_ii = Create_N_3DArray_v4sf(numLines); } -int Operator_sse::CalcECOperator() -{ - Operator::CalcECOperator(); - - // copy operator to aligned memory - // FIXME this is really inefficient! - unsigned int pos[3]; - for (int n=0;n<3;++n) - { - for (pos[0]=0;pos[0]