From 71dde7ea49918c033ae91487c38db959d381736f Mon Sep 17 00:00:00 2001 From: Yifeng Li Date: Wed, 18 Jan 2023 17:51:33 +0000 Subject: [PATCH] FDTD: reformat code of update equations. The original update equations in the FDTD engine have extremely long lines and are difficult to read and work with. This patch inserts line breaks, it aligns all array indexes by x/y/z coordinates to make it easy to visually compare. Signed-off-by: Yifeng Li --- FDTD/engine.cpp | 66 ++++++++++++--- FDTD/engine_sse.cpp | 132 +++++++++++++++++++++++------ FDTD/engine_sse_compressed.cpp | 148 ++++++++++++++++++++++++++------- 3 files changed, 282 insertions(+), 64 deletions(-) diff --git a/FDTD/engine.cpp b/FDTD/engine.cpp index 26725e6..b7e8320 100644 --- a/FDTD/engine.cpp +++ b/FDTD/engine.cpp @@ -123,16 +123,37 @@ void Engine::UpdateVoltages(unsigned int startX, unsigned int numX) shift[2]=pos[2]; //do the updates here //for x - volt[0][pos[0]][pos[1]][pos[2]] *= Op->vv[0][pos[0]][pos[1]][pos[2]]; - volt[0][pos[0]][pos[1]][pos[2]] += Op->vi[0][pos[0]][pos[1]][pos[2]] * ( curr[2][pos[0]][pos[1]][pos[2]] - curr[2][pos[0]][pos[1]-shift[1]][pos[2]] - curr[1][pos[0]][pos[1]][pos[2]] + curr[1][pos[0]][pos[1]][pos[2]-shift[2]]); + volt[0][pos[0]][pos[1]][pos[2]] *= + Op->vv[0][pos[0]][pos[1]][pos[2]]; + volt[0][pos[0]][pos[1]][pos[2]] += + Op->vi[0][pos[0]][pos[1]][pos[2]] * ( + curr[2][pos[0]][pos[1] ][pos[2] ] - + curr[2][pos[0]][pos[1]-shift[1]][pos[2] ] - + curr[1][pos[0]][pos[1] ][pos[2] ] + + curr[1][pos[0]][pos[1] ][pos[2]-shift[2]] + ); //for y - volt[1][pos[0]][pos[1]][pos[2]] *= Op->vv[1][pos[0]][pos[1]][pos[2]]; - volt[1][pos[0]][pos[1]][pos[2]] += Op->vi[1][pos[0]][pos[1]][pos[2]] * ( curr[0][pos[0]][pos[1]][pos[2]] - curr[0][pos[0]][pos[1]][pos[2]-shift[2]] - curr[2][pos[0]][pos[1]][pos[2]] + curr[2][pos[0]-shift[0]][pos[1]][pos[2]]); + volt[1][pos[0]][pos[1]][pos[2]] *= + Op->vv[1][pos[0]][pos[1]][pos[2]]; + volt[1][pos[0]][pos[1]][pos[2]] += + Op->vi[1][pos[0]][pos[1]][pos[2]] * ( + curr[0][pos[0] ][pos[1]][pos[2] ] - + curr[0][pos[0] ][pos[1]][pos[2]-shift[2]] - + curr[2][pos[0] ][pos[1]][pos[2] ] + + curr[2][pos[0]-shift[0]][pos[1]][pos[2] ] + ); //for z - volt[2][pos[0]][pos[1]][pos[2]] *= Op->vv[2][pos[0]][pos[1]][pos[2]]; - volt[2][pos[0]][pos[1]][pos[2]] += Op->vi[2][pos[0]][pos[1]][pos[2]] * ( curr[1][pos[0]][pos[1]][pos[2]] - curr[1][pos[0]-shift[0]][pos[1]][pos[2]] - curr[0][pos[0]][pos[1]][pos[2]] + curr[0][pos[0]][pos[1]-shift[1]][pos[2]]); + volt[2][pos[0]][pos[1]][pos[2]] *= + Op->vv[2][pos[0]][pos[1]][pos[2]]; + volt[2][pos[0]][pos[1]][pos[2]] += + Op->vi[2][pos[0]][pos[1]][pos[2]] * ( + curr[1][pos[0] ][pos[1] ][pos[2]] - + curr[1][pos[0]-shift[0]][pos[1] ][pos[2]] - + curr[0][pos[0] ][pos[1] ][pos[2]] + + curr[0][pos[0] ][pos[1]-shift[1]][pos[2]] + ); } } ++pos[0]; @@ -151,16 +172,37 @@ void Engine::UpdateCurrents(unsigned int startX, unsigned int numX) { //do the updates here //for x - curr[0][pos[0]][pos[1]][pos[2]] *= Op->ii[0][pos[0]][pos[1]][pos[2]]; - curr[0][pos[0]][pos[1]][pos[2]] += Op->iv[0][pos[0]][pos[1]][pos[2]] * ( volt[2][pos[0]][pos[1]][pos[2]] - volt[2][pos[0]][pos[1]+1][pos[2]] - volt[1][pos[0]][pos[1]][pos[2]] + volt[1][pos[0]][pos[1]][pos[2]+1]); + curr[0][pos[0]][pos[1]][pos[2]] *= + Op->ii[0][pos[0]][pos[1]][pos[2]]; + curr[0][pos[0]][pos[1]][pos[2]] += + Op->iv[0][pos[0]][pos[1]][pos[2]] * ( + volt[2][pos[0]][pos[1] ][pos[2] ] - + volt[2][pos[0]][pos[1]+1][pos[2] ] - + volt[1][pos[0]][pos[1] ][pos[2] ] + + volt[1][pos[0]][pos[1] ][pos[2]+1] + ); //for y - curr[1][pos[0]][pos[1]][pos[2]] *= Op->ii[1][pos[0]][pos[1]][pos[2]]; - curr[1][pos[0]][pos[1]][pos[2]] += Op->iv[1][pos[0]][pos[1]][pos[2]] * ( volt[0][pos[0]][pos[1]][pos[2]] - volt[0][pos[0]][pos[1]][pos[2]+1] - volt[2][pos[0]][pos[1]][pos[2]] + volt[2][pos[0]+1][pos[1]][pos[2]]); + curr[1][pos[0]][pos[1]][pos[2]] *= + Op->ii[1][pos[0]][pos[1]][pos[2]]; + curr[1][pos[0]][pos[1]][pos[2]] += + Op->iv[1][pos[0]][pos[1]][pos[2]] * ( + volt[0][pos[0] ][pos[1]][pos[2] ] - + volt[0][pos[0] ][pos[1]][pos[2]+1] - + volt[2][pos[0] ][pos[1]][pos[2] ] + + volt[2][pos[0]+1][pos[1]][pos[2] ] + ); //for z - curr[2][pos[0]][pos[1]][pos[2]] *= Op->ii[2][pos[0]][pos[1]][pos[2]]; - curr[2][pos[0]][pos[1]][pos[2]] += Op->iv[2][pos[0]][pos[1]][pos[2]] * ( volt[1][pos[0]][pos[1]][pos[2]] - volt[1][pos[0]+1][pos[1]][pos[2]] - volt[0][pos[0]][pos[1]][pos[2]] + volt[0][pos[0]][pos[1]+1][pos[2]]); + curr[2][pos[0]][pos[1]][pos[2]] *= + Op->ii[2][pos[0]][pos[1]][pos[2]]; + curr[2][pos[0]][pos[1]][pos[2]] += + Op->iv[2][pos[0]][pos[1]][pos[2]] * ( + volt[1][pos[0] ][pos[1] ][pos[2]] - + volt[1][pos[0]+1][pos[1] ][pos[2]] - + volt[0][pos[0] ][pos[1] ][pos[2]] + + volt[0][pos[0] ][pos[1]+1][pos[2]] + ); } } ++pos[0]; diff --git a/FDTD/engine_sse.cpp b/FDTD/engine_sse.cpp index 2e06685..067c4b7 100644 --- a/FDTD/engine_sse.cpp +++ b/FDTD/engine_sse.cpp @@ -91,16 +91,37 @@ void Engine_sse::UpdateVoltages(unsigned int startX, unsigned int numX) for (pos[2]=1; pos[2]f4_vv[0][pos[0]][pos[1]][pos[2]].v; - f4_volt[0][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[0][pos[0]][pos[1]][pos[2]].v * ( f4_curr[2][pos[0]][pos[1]][pos[2]].v - f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2]].v - f4_curr[1][pos[0]][pos[1]][pos[2]].v + f4_curr[1][pos[0]][pos[1]][pos[2]-1].v ); + f4_volt[0][pos[0]][pos[1]][pos[2]].v *= + Op->f4_vv[0][pos[0]][pos[1]][pos[2]].v; + f4_volt[0][pos[0]][pos[1]][pos[2]].v += + Op->f4_vi[0][pos[0]][pos[1]][pos[2]].v * ( + f4_curr[2][pos[0]][pos[1] ][pos[2]].v - + f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2]].v - + f4_curr[1][pos[0]][pos[1] ][pos[2]].v + + f4_curr[1][pos[0]][pos[1] ][pos[2]-1].v + ); // y-polarization - f4_volt[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv[1][pos[0]][pos[1]][pos[2]].v; - f4_volt[1][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[1][pos[0]][pos[1]][pos[2]].v * ( f4_curr[0][pos[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]-1].v - f4_curr[2][pos[0]][pos[1]][pos[2]].v + f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2]].v); + f4_volt[1][pos[0]][pos[1]][pos[2]].v *= + Op->f4_vv[1][pos[0]][pos[1]][pos[2]].v; + f4_volt[1][pos[0]][pos[1]][pos[2]].v += + Op->f4_vi[1][pos[0]][pos[1]][pos[2]].v * ( + f4_curr[0][pos[0] ][pos[1]][pos[2] ].v - + f4_curr[0][pos[0] ][pos[1]][pos[2]-1].v - + f4_curr[2][pos[0] ][pos[1]][pos[2] ].v + + f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2] ].v + ); // z-polarization - f4_volt[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv[2][pos[0]][pos[1]][pos[2]].v; - f4_volt[2][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[2][pos[0]][pos[1]][pos[2]].v * ( f4_curr[1][pos[0]][pos[1]][pos[2]].v - f4_curr[1][pos[0]-shift[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]].v + f4_curr[0][pos[0]][pos[1]-shift[1]][pos[2]].v); + f4_volt[2][pos[0]][pos[1]][pos[2]].v *= + Op->f4_vv[2][pos[0]][pos[1]][pos[2]].v; + f4_volt[2][pos[0]][pos[1]][pos[2]].v += + Op->f4_vi[2][pos[0]][pos[1]][pos[2]].v * ( + f4_curr[1][pos[0] ][pos[1] ][pos[2]].v - + f4_curr[1][pos[0]-shift[0]][pos[1] ][pos[2]].v - + f4_curr[0][pos[0] ][pos[1] ][pos[2]].v + + f4_curr[0][pos[0] ][pos[1]-shift[1]][pos[2]].v + ); } // for pos[2] = 0 @@ -109,20 +130,41 @@ void Engine_sse::UpdateVoltages(unsigned int startX, unsigned int numX) temp.f[1] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[0]; temp.f[2] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[1]; temp.f[3] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[2]; - f4_volt[0][pos[0]][pos[1]][0].v *= Op->f4_vv[0][pos[0]][pos[1]][0].v; - f4_volt[0][pos[0]][pos[1]][0].v += Op->f4_vi[0][pos[0]][pos[1]][0].v * ( f4_curr[2][pos[0]][pos[1]][0].v - f4_curr[2][pos[0]][pos[1]-shift[1]][0].v - f4_curr[1][pos[0]][pos[1]][0].v + temp.v ); + f4_volt[0][pos[0]][pos[1]][0].v *= + Op->f4_vv[0][pos[0]][pos[1]][0].v; + f4_volt[0][pos[0]][pos[1]][0].v += + Op->f4_vi[0][pos[0]][pos[1]][0].v * ( + f4_curr[2][pos[0]][pos[1] ][0].v - + f4_curr[2][pos[0]][pos[1]-shift[1]][0].v - + f4_curr[1][pos[0]][pos[1] ][0].v + + temp.v + ); // y-polarization temp.f[0] = 0; temp.f[1] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[0]; temp.f[2] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[1]; temp.f[3] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[2]; - f4_volt[1][pos[0]][pos[1]][0].v *= Op->f4_vv[1][pos[0]][pos[1]][0].v; - f4_volt[1][pos[0]][pos[1]][0].v += Op->f4_vi[1][pos[0]][pos[1]][0].v * ( f4_curr[0][pos[0]][pos[1]][0].v - temp.v - f4_curr[2][pos[0]][pos[1]][0].v + f4_curr[2][pos[0]-shift[0]][pos[1]][0].v); + f4_volt[1][pos[0]][pos[1]][0].v *= + Op->f4_vv[1][pos[0]][pos[1]][0].v; + f4_volt[1][pos[0]][pos[1]][0].v += + Op->f4_vi[1][pos[0]][pos[1]][0].v * ( + f4_curr[0][pos[0] ][pos[1]][0].v - + temp.v - + f4_curr[2][pos[0] ][pos[1]][0].v + + f4_curr[2][pos[0]-shift[0]][pos[1]][0].v + ); // z-polarization - f4_volt[2][pos[0]][pos[1]][0].v *= Op->f4_vv[2][pos[0]][pos[1]][0].v; - f4_volt[2][pos[0]][pos[1]][0].v += Op->f4_vi[2][pos[0]][pos[1]][0].v * ( f4_curr[1][pos[0]][pos[1]][0].v - f4_curr[1][pos[0]-shift[0]][pos[1]][0].v - f4_curr[0][pos[0]][pos[1]][0].v + f4_curr[0][pos[0]][pos[1]-shift[1]][0].v); + f4_volt[2][pos[0]][pos[1]][0].v *= + Op->f4_vv[2][pos[0]][pos[1]][0].v; + f4_volt[2][pos[0]][pos[1]][0].v += + Op->f4_vi[2][pos[0]][pos[1]][0].v * ( + f4_curr[1][pos[0] ][pos[1] ][0].v - + f4_curr[1][pos[0]-shift[0]][pos[1] ][0].v - + f4_curr[0][pos[0] ][pos[1] ][0].v + + f4_curr[0][pos[0] ][pos[1]-shift[1]][0].v + ); } ++pos[0]; } @@ -141,16 +183,37 @@ void Engine_sse::UpdateCurrents(unsigned int startX, unsigned int numX) for (pos[2]=0; pos[2]f4_ii[0][pos[0]][pos[1]][pos[2]].v; - f4_curr[0][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[0][pos[0]][pos[1]][pos[2]].v * ( f4_volt[2][pos[0]][pos[1]][pos[2]].v - f4_volt[2][pos[0]][pos[1]+1][pos[2]].v - f4_volt[1][pos[0]][pos[1]][pos[2]].v + f4_volt[1][pos[0]][pos[1]][pos[2]+1].v); + f4_curr[0][pos[0]][pos[1]][pos[2]].v *= + Op->f4_ii[0][pos[0]][pos[1]][pos[2]].v; + f4_curr[0][pos[0]][pos[1]][pos[2]].v += + Op->f4_iv[0][pos[0]][pos[1]][pos[2]].v * ( + f4_volt[2][pos[0]][pos[1] ][pos[2] ].v - + f4_volt[2][pos[0]][pos[1]+1][pos[2] ].v - + f4_volt[1][pos[0]][pos[1] ][pos[2] ].v + + f4_volt[1][pos[0]][pos[1] ][pos[2]+1].v + ); // y-pol - f4_curr[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii[1][pos[0]][pos[1]][pos[2]].v; - f4_curr[1][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[1][pos[0]][pos[1]][pos[2]].v * ( f4_volt[0][pos[0]][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]+1].v - f4_volt[2][pos[0]][pos[1]][pos[2]].v + f4_volt[2][pos[0]+1][pos[1]][pos[2]].v); + f4_curr[1][pos[0]][pos[1]][pos[2]].v *= + Op->f4_ii[1][pos[0]][pos[1]][pos[2]].v; + f4_curr[1][pos[0]][pos[1]][pos[2]].v += + Op->f4_iv[1][pos[0]][pos[1]][pos[2]].v * ( + f4_volt[0][pos[0] ][pos[1]][pos[2] ].v - + f4_volt[0][pos[0] ][pos[1]][pos[2]+1].v - + f4_volt[2][pos[0] ][pos[1]][pos[2] ].v + + f4_volt[2][pos[0]+1][pos[1]][pos[2] ].v + ); // z-pol - f4_curr[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii[2][pos[0]][pos[1]][pos[2]].v; - f4_curr[2][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[2][pos[0]][pos[1]][pos[2]].v * ( f4_volt[1][pos[0]][pos[1]][pos[2]].v - f4_volt[1][pos[0]+1][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]].v + f4_volt[0][pos[0]][pos[1]+1][pos[2]].v); + f4_curr[2][pos[0]][pos[1]][pos[2]].v *= + Op->f4_ii[2][pos[0]][pos[1]][pos[2]].v; + f4_curr[2][pos[0]][pos[1]][pos[2]].v += + Op->f4_iv[2][pos[0]][pos[1]][pos[2]].v * ( + f4_volt[1][pos[0] ][pos[1] ][pos[2]].v - + f4_volt[1][pos[0]+1][pos[1] ][pos[2]].v - + f4_volt[0][pos[0] ][pos[1] ][pos[2]].v + + f4_volt[0][pos[0] ][pos[1]+1][pos[2]].v + ); } // for pos[2] = numVectors-1 @@ -159,20 +222,41 @@ void Engine_sse::UpdateCurrents(unsigned int startX, unsigned int numX) temp.f[1] = f4_volt[1][pos[0]][pos[1]][0].f[2]; temp.f[2] = f4_volt[1][pos[0]][pos[1]][0].f[3]; temp.f[3] = 0; - f4_curr[0][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii[0][pos[0]][pos[1]][numVectors-1].v; - f4_curr[0][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv[0][pos[0]][pos[1]][numVectors-1].v * ( f4_volt[2][pos[0]][pos[1]][numVectors-1].v - f4_volt[2][pos[0]][pos[1]+1][numVectors-1].v - f4_volt[1][pos[0]][pos[1]][numVectors-1].v + temp.v); + f4_curr[0][pos[0]][pos[1]][numVectors-1].v *= + Op->f4_ii[0][pos[0]][pos[1]][numVectors-1].v; + f4_curr[0][pos[0]][pos[1]][numVectors-1].v += + Op->f4_iv[0][pos[0]][pos[1]][numVectors-1].v * ( + f4_volt[2][pos[0]][pos[1] ][numVectors-1].v - + f4_volt[2][pos[0]][pos[1]+1][numVectors-1].v - + f4_volt[1][pos[0]][pos[1] ][numVectors-1].v + + temp.v + ); // y-pol temp.f[0] = f4_volt[0][pos[0]][pos[1]][0].f[1]; temp.f[1] = f4_volt[0][pos[0]][pos[1]][0].f[2]; temp.f[2] = f4_volt[0][pos[0]][pos[1]][0].f[3]; temp.f[3] = 0; - f4_curr[1][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii[1][pos[0]][pos[1]][numVectors-1].v; - f4_curr[1][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv[1][pos[0]][pos[1]][numVectors-1].v * ( f4_volt[0][pos[0]][pos[1]][numVectors-1].v - temp.v - f4_volt[2][pos[0]][pos[1]][numVectors-1].v + f4_volt[2][pos[0]+1][pos[1]][numVectors-1].v); + f4_curr[1][pos[0]][pos[1]][numVectors-1].v *= + Op->f4_ii[1][pos[0]][pos[1]][numVectors-1].v; + f4_curr[1][pos[0]][pos[1]][numVectors-1].v += + Op->f4_iv[1][pos[0]][pos[1]][numVectors-1].v * ( + f4_volt[0][pos[0] ][pos[1]][numVectors-1].v - + temp.v - + f4_volt[2][pos[0] ][pos[1]][numVectors-1].v + + f4_volt[2][pos[0]+1][pos[1]][numVectors-1].v + ); // z-pol - f4_curr[2][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii[2][pos[0]][pos[1]][numVectors-1].v; - f4_curr[2][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv[2][pos[0]][pos[1]][numVectors-1].v * ( f4_volt[1][pos[0]][pos[1]][numVectors-1].v - f4_volt[1][pos[0]+1][pos[1]][numVectors-1].v - f4_volt[0][pos[0]][pos[1]][numVectors-1].v + f4_volt[0][pos[0]][pos[1]+1][numVectors-1].v); + f4_curr[2][pos[0]][pos[1]][numVectors-1].v *= + Op->f4_ii[2][pos[0]][pos[1]][numVectors-1].v; + f4_curr[2][pos[0]][pos[1]][numVectors-1].v += + Op->f4_iv[2][pos[0]][pos[1]][numVectors-1].v * ( + f4_volt[1][pos[0] ][pos[1] ][numVectors-1].v - + f4_volt[1][pos[0]+1][pos[1] ][numVectors-1].v - + f4_volt[0][pos[0] ][pos[1] ][numVectors-1].v + + f4_volt[0][pos[0] ][pos[1]+1][numVectors-1].v + ); } ++pos[0]; } diff --git a/FDTD/engine_sse_compressed.cpp b/FDTD/engine_sse_compressed.cpp index 8e83387..e82a752 100644 --- a/FDTD/engine_sse_compressed.cpp +++ b/FDTD/engine_sse_compressed.cpp @@ -55,47 +55,93 @@ void Engine_SSE_Compressed::UpdateVoltages(unsigned int startX, unsigned int num { index = Op->m_Op_index[pos[0]][pos[1]][pos[2]]; // x-polarization - f4_volt[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv_Compressed[0][index].v; - f4_volt[0][pos[0]][pos[1]][pos[2]].v += Op->f4_vi_Compressed[0][index].v * ( f4_curr[2][pos[0]][pos[1]][pos[2]].v - f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2]].v - f4_curr[1][pos[0]][pos[1]][pos[2]].v + f4_curr[1][pos[0]][pos[1]][pos[2]-1].v ); + f4_volt[0][pos[0]][pos[1]][pos[2]].v *= + Op->f4_vv_Compressed[0][index].v; + f4_volt[0][pos[0]][pos[1]][pos[2]].v += + Op->f4_vi_Compressed[0][index].v * ( + f4_curr[2][pos[0]][pos[1] ][pos[2] ].v - + f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2] ].v - + f4_curr[1][pos[0]][pos[1] ][pos[2] ].v + + f4_curr[1][pos[0]][pos[1] ][pos[2]-1].v + ); // y-polarization - f4_volt[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv_Compressed[1][index].v; - f4_volt[1][pos[0]][pos[1]][pos[2]].v += Op->f4_vi_Compressed[1][index].v * ( f4_curr[0][pos[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]-1].v - f4_curr[2][pos[0]][pos[1]][pos[2]].v + f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2]].v); + f4_volt[1][pos[0]][pos[1]][pos[2]].v *= + Op->f4_vv_Compressed[1][index].v; + f4_volt[1][pos[0]][pos[1]][pos[2]].v += + Op->f4_vi_Compressed[1][index].v * ( + f4_curr[0][pos[0] ][pos[1]][pos[2] ].v - + f4_curr[0][pos[0] ][pos[1]][pos[2]-1].v - + f4_curr[2][pos[0] ][pos[1]][pos[2] ].v + + f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2] ].v + ); // z-polarization - f4_volt[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv_Compressed[2][index].v; - f4_volt[2][pos[0]][pos[1]][pos[2]].v += Op->f4_vi_Compressed[2][index].v * ( f4_curr[1][pos[0]][pos[1]][pos[2]].v - f4_curr[1][pos[0]-shift[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]].v + f4_curr[0][pos[0]][pos[1]-shift[1]][pos[2]].v); + f4_volt[2][pos[0]][pos[1]][pos[2]].v *= + Op->f4_vv_Compressed[2][index].v; + f4_volt[2][pos[0]][pos[1]][pos[2]].v += + Op->f4_vi_Compressed[2][index].v * ( + f4_curr[1][pos[0] ][pos[1]] [pos[2]].v - + f4_curr[1][pos[0]-shift[0]][pos[1]] [pos[2]].v - + f4_curr[0][pos[0] ][pos[1]] [pos[2]].v + + f4_curr[0][pos[0] ][pos[1]-shift[1]][pos[2]].v + ); } // for pos[2] = 0 // x-polarization index = Op->m_Op_index[pos[0]][pos[1]][0]; #ifdef __SSE2__ - temp.v = (__m128)_mm_slli_si128( (__m128i)f4_curr[1][pos[0]][pos[1]][numVectors-1].v, 4 ); + temp.v = (__m128)_mm_slli_si128( + (__m128i)f4_curr[1][pos[0]][pos[1]][numVectors-1].v, 4 + ); #else temp.f[0] = 0; temp.f[1] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[0]; temp.f[2] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[1]; temp.f[3] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[2]; #endif - f4_volt[0][pos[0]][pos[1]][0].v *= Op->f4_vv_Compressed[0][index].v; - f4_volt[0][pos[0]][pos[1]][0].v += Op->f4_vi_Compressed[0][index].v * ( f4_curr[2][pos[0]][pos[1]][0].v - f4_curr[2][pos[0]][pos[1]-shift[1]][0].v - f4_curr[1][pos[0]][pos[1]][0].v + temp.v ); + f4_volt[0][pos[0]][pos[1]][0].v *= + Op->f4_vv_Compressed[0][index].v; + f4_volt[0][pos[0]][pos[1]][0].v += + Op->f4_vi_Compressed[0][index].v * ( + f4_curr[2][pos[0]][pos[1] ][0].v - + f4_curr[2][pos[0]][pos[1]-shift[1]][0].v - + f4_curr[1][pos[0]][pos[1] ][0].v + + temp.v + ); // y-polarization #ifdef __SSE2__ - temp.v = (__m128)_mm_slli_si128( (__m128i)f4_curr[0][pos[0]][pos[1]][numVectors-1].v, 4 ); + temp.v = (__m128)_mm_slli_si128( + (__m128i)f4_curr[0][pos[0]][pos[1]][numVectors-1].v, 4 + ); #else temp.f[0] = 0; temp.f[1] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[0]; temp.f[2] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[1]; temp.f[3] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[2]; #endif - f4_volt[1][pos[0]][pos[1]][0].v *= Op->f4_vv_Compressed[1][index].v; - f4_volt[1][pos[0]][pos[1]][0].v += Op->f4_vi_Compressed[1][index].v * ( f4_curr[0][pos[0]][pos[1]][0].v - temp.v - f4_curr[2][pos[0]][pos[1]][0].v + f4_curr[2][pos[0]-shift[0]][pos[1]][0].v); + f4_volt[1][pos[0]][pos[1]][0].v *= + Op->f4_vv_Compressed[1][index].v; + f4_volt[1][pos[0]][pos[1]][0].v += + Op->f4_vi_Compressed[1][index].v * ( + f4_curr[0][pos[0] ][pos[1]][0].v - + temp.v - + f4_curr[2][pos[0] ][pos[1]][0].v + + f4_curr[2][pos[0]-shift[0]][pos[1]][0].v + ); // z-polarization - f4_volt[2][pos[0]][pos[1]][0].v *= Op->f4_vv_Compressed[2][index].v; - f4_volt[2][pos[0]][pos[1]][0].v += Op->f4_vi_Compressed[2][index].v * ( f4_curr[1][pos[0]][pos[1]][0].v - f4_curr[1][pos[0]-shift[0]][pos[1]][0].v - f4_curr[0][pos[0]][pos[1]][0].v + f4_curr[0][pos[0]][pos[1]-shift[1]][0].v); + f4_volt[2][pos[0]][pos[1]][0].v *= + Op->f4_vv_Compressed[2][index].v; + f4_volt[2][pos[0]][pos[1]][0].v += + Op->f4_vi_Compressed[2][index].v * ( + f4_curr[1][pos[0] ][pos[1] ][0].v - + f4_curr[1][pos[0]-shift[0]][pos[1] ][0].v - + f4_curr[0][pos[0] ][pos[1] ][0].v + + f4_curr[0][pos[0] ][pos[1]-shift[1]][0].v + ); } ++pos[0]; } @@ -116,47 +162,93 @@ void Engine_SSE_Compressed::UpdateCurrents(unsigned int startX, unsigned int num { index = Op->m_Op_index[pos[0]][pos[1]][pos[2]]; // x-pol - f4_curr[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii_Compressed[0][index].v; - f4_curr[0][pos[0]][pos[1]][pos[2]].v += Op->f4_iv_Compressed[0][index].v * ( f4_volt[2][pos[0]][pos[1]][pos[2]].v - f4_volt[2][pos[0]][pos[1]+1][pos[2]].v - f4_volt[1][pos[0]][pos[1]][pos[2]].v + f4_volt[1][pos[0]][pos[1]][pos[2]+1].v); + f4_curr[0][pos[0]][pos[1]][pos[2]].v *= + Op->f4_ii_Compressed[0][index].v; + f4_curr[0][pos[0]][pos[1]][pos[2]].v += + Op->f4_iv_Compressed[0][index].v * ( + f4_volt[2][pos[0]][pos[1] ][pos[2] ].v - + f4_volt[2][pos[0]][pos[1]+1][pos[2] ].v - + f4_volt[1][pos[0]][pos[1] ][pos[2] ].v + + f4_volt[1][pos[0]][pos[1] ][pos[2]+1].v + ); // y-pol - f4_curr[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii_Compressed[1][index].v; - f4_curr[1][pos[0]][pos[1]][pos[2]].v += Op->f4_iv_Compressed[1][index].v * ( f4_volt[0][pos[0]][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]+1].v - f4_volt[2][pos[0]][pos[1]][pos[2]].v + f4_volt[2][pos[0]+1][pos[1]][pos[2]].v); + f4_curr[1][pos[0]][pos[1]][pos[2]].v *= + Op->f4_ii_Compressed[1][index].v; + f4_curr[1][pos[0]][pos[1]][pos[2]].v += + Op->f4_iv_Compressed[1][index].v * ( + f4_volt[0][pos[0] ][pos[1]][pos[2] ].v - + f4_volt[0][pos[0] ][pos[1]][pos[2]+1].v - + f4_volt[2][pos[0] ][pos[1]][pos[2] ].v + + f4_volt[2][pos[0]+1][pos[1]][pos[2] ].v + ); // z-pol - f4_curr[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii_Compressed[2][index].v; - f4_curr[2][pos[0]][pos[1]][pos[2]].v += Op->f4_iv_Compressed[2][index].v * ( f4_volt[1][pos[0]][pos[1]][pos[2]].v - f4_volt[1][pos[0]+1][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]].v + f4_volt[0][pos[0]][pos[1]+1][pos[2]].v); + f4_curr[2][pos[0]][pos[1]][pos[2]].v *= + Op->f4_ii_Compressed[2][index].v; + f4_curr[2][pos[0]][pos[1]][pos[2]].v += + Op->f4_iv_Compressed[2][index].v * ( + f4_volt[1][pos[0] ][pos[1] ][pos[2]].v - + f4_volt[1][pos[0]+1][pos[1] ][pos[2]].v - + f4_volt[0][pos[0] ][pos[1] ][pos[2]].v + + f4_volt[0][pos[0] ][pos[1]+1][pos[2]].v + ); } index = Op->m_Op_index[pos[0]][pos[1]][numVectors-1]; // for pos[2] = numVectors-1 // x-pol #ifdef __SSE2__ - temp.v = (__m128)_mm_srli_si128( (__m128i)f4_volt[1][pos[0]][pos[1]][0].v, 4 ); + temp.v = (__m128)_mm_srli_si128( + (__m128i)f4_volt[1][pos[0]][pos[1]][0].v, 4 + ); #else temp.f[0] = f4_volt[1][pos[0]][pos[1]][0].f[1]; temp.f[1] = f4_volt[1][pos[0]][pos[1]][0].f[2]; temp.f[2] = f4_volt[1][pos[0]][pos[1]][0].f[3]; temp.f[3] = 0; #endif - f4_curr[0][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii_Compressed[0][index].v; - f4_curr[0][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv_Compressed[0][index].v * ( f4_volt[2][pos[0]][pos[1]][numVectors-1].v - f4_volt[2][pos[0]][pos[1]+1][numVectors-1].v - f4_volt[1][pos[0]][pos[1]][numVectors-1].v + temp.v); + f4_curr[0][pos[0]][pos[1]][numVectors-1].v *= + Op->f4_ii_Compressed[0][index].v; + f4_curr[0][pos[0]][pos[1]][numVectors-1].v += + Op->f4_iv_Compressed[0][index].v * ( + f4_volt[2][pos[0]][pos[1] ][numVectors-1].v - + f4_volt[2][pos[0]][pos[1]+1][numVectors-1].v - + f4_volt[1][pos[0]][pos[1] ][numVectors-1].v + + temp.v + ); // y-pol #ifdef __SSE2__ - temp.v = (__m128)_mm_srli_si128( (__m128i)f4_volt[0][pos[0]][pos[1]][0].v, 4 ); + temp.v = (__m128)_mm_srli_si128( + (__m128i)f4_volt[0][pos[0]][pos[1]][0].v, 4 + ); #else temp.f[0] = f4_volt[0][pos[0]][pos[1]][0].f[1]; temp.f[1] = f4_volt[0][pos[0]][pos[1]][0].f[2]; temp.f[2] = f4_volt[0][pos[0]][pos[1]][0].f[3]; temp.f[3] = 0; #endif - f4_curr[1][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii_Compressed[1][index].v; - f4_curr[1][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv_Compressed[1][index].v * ( f4_volt[0][pos[0]][pos[1]][numVectors-1].v - temp.v - f4_volt[2][pos[0]][pos[1]][numVectors-1].v + f4_volt[2][pos[0]+1][pos[1]][numVectors-1].v); + f4_curr[1][pos[0]][pos[1]][numVectors-1].v *= + Op->f4_ii_Compressed[1][index].v; + f4_curr[1][pos[0]][pos[1]][numVectors-1].v += + Op->f4_iv_Compressed[1][index].v * ( + f4_volt[0][pos[0] ][pos[1]][numVectors-1].v - + temp.v - + f4_volt[2][pos[0] ][pos[1]][numVectors-1].v + + f4_volt[2][pos[0]+1][pos[1]][numVectors-1].v + ); // z-pol - f4_curr[2][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii_Compressed[2][index].v; - f4_curr[2][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv_Compressed[2][index].v * ( f4_volt[1][pos[0]][pos[1]][numVectors-1].v - f4_volt[1][pos[0]+1][pos[1]][numVectors-1].v - f4_volt[0][pos[0]][pos[1]][numVectors-1].v + f4_volt[0][pos[0]][pos[1]+1][numVectors-1].v); + f4_curr[2][pos[0]][pos[1]][numVectors-1].v *= + Op->f4_ii_Compressed[2][index].v; + f4_curr[2][pos[0]][pos[1]][numVectors-1].v += + Op->f4_iv_Compressed[2][index].v * ( + f4_volt[1][pos[0] ][pos[1] ][numVectors-1].v - + f4_volt[1][pos[0]+1][pos[1] ][numVectors-1].v - + f4_volt[0][pos[0] ][pos[1] ][numVectors-1].v + + f4_volt[0][pos[0] ][pos[1]+1][numVectors-1].v + ); } ++pos[0]; }