diff --git a/FDTD/engine_sse.cpp b/FDTD/engine_sse.cpp index e434b68..514072c 100644 --- a/FDTD/engine_sse.cpp +++ b/FDTD/engine_sse.cpp @@ -45,23 +45,26 @@ void Engine_sse::Init() { numTS = 0; volt_ = Create_N_3DArray_v4sf(numLines); - curr = Create_N_3DArray(numLines); + curr_ = Create_N_3DArray_v4sf(numLines); + volt = 0; // not used + curr = 0; // not used +// Engine::Init(); //FIXME currently postprocessing operates on volt and curr arrays, which are not updated by this engine!!!! } void Engine_sse::Reset() { Delete_N_3DArray_v4sf(volt_,numLines); - volt=NULL; - Delete_N_3DArray(curr,numLines); - curr=NULL; + volt_ = 0; + Delete_N_3DArray_v4sf(curr_,numLines); + curr_ = 0; } void Engine_sse::UpdateVoltages() { - unsigned int pos[4]; - bool shift[3]; + unsigned int pos[3]; + bool shift[2]; + f4vector temp; - //voltage updates for (pos[0]=0;pos[0]vv_[0][pos[0]][pos[1]][pos[2]].v; + volt_[0][pos[0]][pos[1]][pos[2]].v += Op->vi_[0][pos[0]][pos[1]][pos[2]].v * ( curr_[2][pos[0]][pos[1]][pos[2]].v - curr_[2][pos[0]][pos[1]-shift[1]][pos[2]].v - curr_[1][pos[0]][pos[1]][pos[2]].v + temp.v ); + + // y-polarization + temp.f[0] = curr_[0][pos[0]][pos[1]][pos[2]-(bool)pos[2]].f[3]; + temp.f[1] = curr_[0][pos[0]][pos[1]][pos[2]].f[0]; + temp.f[2] = curr_[0][pos[0]][pos[1]][pos[2]].f[1]; + temp.f[3] = curr_[0][pos[0]][pos[1]][pos[2]].f[2]; volt_[1][pos[0]][pos[1]][pos[2]].v *= Op->vv_[1][pos[0]][pos[1]][pos[2]].v; + volt_[1][pos[0]][pos[1]][pos[2]].v += Op->vi_[1][pos[0]][pos[1]][pos[2]].v * ( curr_[0][pos[0]][pos[1]][pos[2]].v - temp.v - curr_[2][pos[0]][pos[1]][pos[2]].v + curr_[2][pos[0]-shift[0]][pos[1]][pos[2]].v); + + // z-polarization volt_[2][pos[0]][pos[1]][pos[2]].v *= Op->vv_[2][pos[0]][pos[1]][pos[2]].v; - - - for (pos[3]=0;pos[3]<4;++pos[3]) { - shift[2]=pos[2]+pos[3]; - volt_[0][pos[0]][pos[1]][pos[2]].f[pos[3]] += Op->vi_[0][pos[0]][pos[1]][pos[2]].f[pos[3]] * ( curr[2][pos[0]][pos[1]][pos[2]] - curr[2][pos[0]][pos[1]-shift[1]][pos[2]] - curr[1][pos[0]][pos[1]][pos[2]] + curr[1][pos[0]][pos[1]][pos[2]-shift[2]]); - volt_[1][pos[0]][pos[1]][pos[2]].f[pos[3]] += Op->vi_[1][pos[0]][pos[1]][pos[2]].f[pos[3]] * ( curr[0][pos[0]][pos[1]][pos[2]] - curr[0][pos[0]][pos[1]][pos[2]-shift[2]] - curr[2][pos[0]][pos[1]][pos[2]] + curr[2][pos[0]-shift[0]][pos[1]][pos[2]]); - volt_[2][pos[0]][pos[1]][pos[2]].f[pos[3]] += Op->vi_[2][pos[0]][pos[1]][pos[2]].f[pos[3]] * ( curr[1][pos[0]][pos[1]][pos[2]] - curr[1][pos[0]-shift[0]][pos[1]][pos[2]] - curr[0][pos[0]][pos[1]][pos[2]] + curr[0][pos[0]][pos[1]-shift[1]][pos[2]]); - } + volt_[2][pos[0]][pos[1]][pos[2]].v += Op->vi_[2][pos[0]][pos[1]][pos[2]].v * ( curr_[1][pos[0]][pos[1]][pos[2]].v - curr_[1][pos[0]-shift[0]][pos[1]][pos[2]].v - curr_[0][pos[0]][pos[1]][pos[2]].v + curr_[0][pos[0]][pos[1]-shift[1]][pos[2]].v); } } } @@ -91,37 +100,47 @@ void Engine_sse::UpdateVoltages() void Engine_sse::ApplyVoltageExcite() { int exc_pos; + unsigned int pos; //soft voltage excitation here (E-field excite) for (unsigned int n=0;nE_Exc_Count;++n) { exc_pos = (int)numTS - (int)Op->E_Exc_delay[n]; exc_pos *= (exc_pos>0 && exc_pos<=(int)Op->ExciteLength); -// if (n==0) cerr << numTS << " => " << Op->ExciteSignal[exc_pos] << endl; - volt[Op->E_Exc_dir[n]][Op->E_Exc_index[0][n]][Op->E_Exc_index[1][n]][Op->E_Exc_index[2][n]] += Op->E_Exc_amp[n]*Op->ExciteSignal[exc_pos]; + pos = Op->E_Exc_index[2][n]; + volt_[Op->E_Exc_dir[n]][Op->E_Exc_index[0][n]][Op->E_Exc_index[1][n]][pos/4].f[pos%4] += Op->E_Exc_amp[n]*Op->ExciteSignal[exc_pos]; } } void Engine_sse::UpdateCurrents() { - unsigned int pos[3]; + unsigned int pos[5]; + f4vector temp; + for (pos[0]=0;pos[0]ii[0][pos[0]][pos[1]][pos[2]]; - curr[0][pos[0]][pos[1]][pos[2]] += Op->iv[0][pos[0]][pos[1]][pos[2]] * ( volt[2][pos[0]][pos[1]][pos[2]] - volt[2][pos[0]][pos[1]+1][pos[2]] - volt[1][pos[0]][pos[1]][pos[2]] + volt[1][pos[0]][pos[1]][pos[2]+1]); + // x-pol + temp.f[0] = volt_[1][pos[0]][pos[1]][pos[2]].f[1]; + temp.f[1] = volt_[1][pos[0]][pos[1]][pos[2]].f[2]; + temp.f[2] = volt_[1][pos[0]][pos[1]][pos[2]].f[3]; + temp.f[3] = volt_[1][pos[0]][pos[1]][pos[2]+1].f[0]; // FIXME outside sim area + curr_[0][pos[0]][pos[1]][pos[2]].v *= Op->ii_[0][pos[0]][pos[1]][pos[2]].v; + curr_[0][pos[0]][pos[1]][pos[2]].v += Op->iv_[0][pos[0]][pos[1]][pos[2]].v * ( volt_[2][pos[0]][pos[1]][pos[2]].v - volt_[2][pos[0]][pos[1]+1][pos[2]].v - volt_[1][pos[0]][pos[1]][pos[2]].v + temp.v); - //for y - curr[1][pos[0]][pos[1]][pos[2]] *= Op->ii[1][pos[0]][pos[1]][pos[2]]; - curr[1][pos[0]][pos[1]][pos[2]] += Op->iv[1][pos[0]][pos[1]][pos[2]] * ( volt[0][pos[0]][pos[1]][pos[2]] - volt[0][pos[0]][pos[1]][pos[2]+1] - volt[2][pos[0]][pos[1]][pos[2]] + volt[2][pos[0]+1][pos[1]][pos[2]]); + // y-pol + temp.f[0] = volt_[0][pos[0]][pos[1]][pos[2]].f[1]; + temp.f[1] = volt_[0][pos[0]][pos[1]][pos[2]].f[2]; + temp.f[2] = volt_[0][pos[0]][pos[1]][pos[2]].f[3]; + temp.f[3] = volt_[0][pos[0]][pos[1]][pos[2]+1].f[0]; // FIXME outside sim area + curr_[1][pos[0]][pos[1]][pos[2]].v *= Op->ii_[1][pos[0]][pos[1]][pos[2]].v; + curr_[1][pos[0]][pos[1]][pos[2]].v += Op->iv_[1][pos[0]][pos[1]][pos[2]].v * ( volt_[0][pos[0]][pos[1]][pos[2]].v - temp.v - volt_[2][pos[0]][pos[1]][pos[2]].v + volt_[2][pos[0]+1][pos[1]][pos[2]].v); - //for z - curr[2][pos[0]][pos[1]][pos[2]] *= Op->ii[2][pos[0]][pos[1]][pos[2]]; - curr[2][pos[0]][pos[1]][pos[2]] += Op->iv[2][pos[0]][pos[1]][pos[2]] * ( volt[1][pos[0]][pos[1]][pos[2]] - volt[1][pos[0]+1][pos[1]][pos[2]] - volt[0][pos[0]][pos[1]][pos[2]] + volt[0][pos[0]][pos[1]+1][pos[2]]); + // z-pol + curr_[2][pos[0]][pos[1]][pos[2]].v *= Op->ii_[2][pos[0]][pos[1]][pos[2]].v; + curr_[2][pos[0]][pos[1]][pos[2]].v += Op->iv_[2][pos[0]][pos[1]][pos[2]].v * ( volt_[1][pos[0]][pos[1]][pos[2]].v - volt_[1][pos[0]+1][pos[1]][pos[2]].v - volt_[0][pos[0]][pos[1]][pos[2]].v + volt_[0][pos[0]][pos[1]+1][pos[2]].v); } } } diff --git a/FDTD/engine_sse.h b/FDTD/engine_sse.h index a1f1bc5..17d74b0 100644 --- a/FDTD/engine_sse.h +++ b/FDTD/engine_sse.h @@ -35,8 +35,8 @@ public: virtual unsigned int GetNumberOfTimesteps() {return numTS;}; -// virtual f4vector**** GetVoltages() {return volt;}; - virtual FDTD_FLOAT**** GetCurrents() {return curr;}; + inline virtual FDTD_FLOAT GetVolt( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { return volt_[n][x][y][z/4].f[z%4]; } + inline virtual FDTD_FLOAT GetCurr( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { return curr_[n][x][y][z/4].f[z%4]; } protected: Engine_sse(const Operator* op); @@ -50,7 +50,7 @@ protected: unsigned int numLines[3]; f4vector**** volt_; - FDTD_FLOAT**** curr; + f4vector**** curr_; unsigned int numTS; }; diff --git a/FDTD/operator.cpp b/FDTD/operator.cpp index 429e863..546a02b 100644 --- a/FDTD/operator.cpp +++ b/FDTD/operator.cpp @@ -51,6 +51,8 @@ void Operator::Init() vi_=NULL; iv=NULL; ii=NULL; + iv_=NULL; + ii_=NULL; for (int n=0;n<3;++n) { discLines[n]=NULL; @@ -81,6 +83,8 @@ void Operator::Reset() Delete_N_3DArray_v4sf(vi_,numLines); Delete_N_3DArray(iv,numLines); Delete_N_3DArray(ii,numLines); + Delete_N_3DArray_v4sf(iv_,numLines); + Delete_N_3DArray_v4sf(ii_,numLines); for (int n=0;n<3;++n) { delete[] discLines[n]; @@ -510,12 +514,16 @@ void Operator::InitOperator() Delete_N_3DArray_v4sf(vi_,numLines); Delete_N_3DArray(iv,numLines); Delete_N_3DArray(ii,numLines); + Delete_N_3DArray_v4sf(iv_,numLines); + Delete_N_3DArray_v4sf(ii_,numLines); vv = Create_N_3DArray(numLines); vi = Create_N_3DArray(numLines); vv_ = Create_N_3DArray_v4sf(numLines); vi_ = Create_N_3DArray_v4sf(numLines); iv = Create_N_3DArray(numLines); ii = Create_N_3DArray(numLines); + iv_ = Create_N_3DArray_v4sf(numLines); + ii_ = Create_N_3DArray_v4sf(numLines); } inline void Operator::Calc_ECOperatorPos(int n, unsigned int* pos) @@ -524,9 +532,6 @@ inline void Operator::Calc_ECOperatorPos(int n, unsigned int* pos) vv[n][pos[0]][pos[1]][pos[2]] = (1-dT*EC_G[n][i]/2/EC_C[n][i])/(1+dT*EC_G[n][i]/2/EC_C[n][i]); vi[n][pos[0]][pos[1]][pos[2]] = (dT/EC_C[n][i])/(1+dT*EC_G[n][i]/2/EC_C[n][i]); - vv_[n][pos[0]][pos[1]][pos[2]/4].f[pos[2]%4] = vv[n][pos[0]][pos[1]][pos[2]]; - vi_[n][pos[0]][pos[1]][pos[2]/4].f[pos[2]%4] = vi[n][pos[0]][pos[1]][pos[2]]; - ii[n][pos[0]][pos[1]][pos[2]] = (1-dT*EC_R[n][i]/2/EC_L[n][i])/(1+dT*EC_R[n][i]/2/EC_L[n][i]); iv[n][pos[0]][pos[1]][pos[2]] = (dT/EC_L[n][i])/(1+dT*EC_R[n][i]/2/EC_L[n][i]); } @@ -572,6 +577,27 @@ int Operator::CalcECOperator() if (CalcEFieldExcitation()==false) return -1; CalcPEC(); + + // copy operator to aligned memory (only for sse engine) + // FIXME this is really inefficient! + for (int n=0;n<3;++n) + { + for (pos[0]=0;pos[0]