FDTD: reformat code of update equations.

The original update equations in the FDTD engine have extremely
long lines and are difficult to read and work with. This patch
inserts line breaks, it aligns all array indexes by x/y/z
coordinates to make it easy to visually compare.

Signed-off-by: Yifeng Li <tomli@tomli.me>
pull/129/head
Yifeng Li 2023-01-18 17:51:33 +00:00 committed by Thorsten Liebig
parent 5b8cf2f2ed
commit 71dde7ea49
3 changed files with 282 additions and 64 deletions

View File

@ -123,16 +123,37 @@ void Engine::UpdateVoltages(unsigned int startX, unsigned int numX)
shift[2]=pos[2]; shift[2]=pos[2];
//do the updates here //do the updates here
//for x //for x
volt[0][pos[0]][pos[1]][pos[2]] *= Op->vv[0][pos[0]][pos[1]][pos[2]]; volt[0][pos[0]][pos[1]][pos[2]] *=
volt[0][pos[0]][pos[1]][pos[2]] += Op->vi[0][pos[0]][pos[1]][pos[2]] * ( curr[2][pos[0]][pos[1]][pos[2]] - curr[2][pos[0]][pos[1]-shift[1]][pos[2]] - curr[1][pos[0]][pos[1]][pos[2]] + curr[1][pos[0]][pos[1]][pos[2]-shift[2]]); Op->vv[0][pos[0]][pos[1]][pos[2]];
volt[0][pos[0]][pos[1]][pos[2]] +=
Op->vi[0][pos[0]][pos[1]][pos[2]] * (
curr[2][pos[0]][pos[1] ][pos[2] ] -
curr[2][pos[0]][pos[1]-shift[1]][pos[2] ] -
curr[1][pos[0]][pos[1] ][pos[2] ] +
curr[1][pos[0]][pos[1] ][pos[2]-shift[2]]
);
//for y //for y
volt[1][pos[0]][pos[1]][pos[2]] *= Op->vv[1][pos[0]][pos[1]][pos[2]]; volt[1][pos[0]][pos[1]][pos[2]] *=
volt[1][pos[0]][pos[1]][pos[2]] += Op->vi[1][pos[0]][pos[1]][pos[2]] * ( curr[0][pos[0]][pos[1]][pos[2]] - curr[0][pos[0]][pos[1]][pos[2]-shift[2]] - curr[2][pos[0]][pos[1]][pos[2]] + curr[2][pos[0]-shift[0]][pos[1]][pos[2]]); Op->vv[1][pos[0]][pos[1]][pos[2]];
volt[1][pos[0]][pos[1]][pos[2]] +=
Op->vi[1][pos[0]][pos[1]][pos[2]] * (
curr[0][pos[0] ][pos[1]][pos[2] ] -
curr[0][pos[0] ][pos[1]][pos[2]-shift[2]] -
curr[2][pos[0] ][pos[1]][pos[2] ] +
curr[2][pos[0]-shift[0]][pos[1]][pos[2] ]
);
//for z //for z
volt[2][pos[0]][pos[1]][pos[2]] *= Op->vv[2][pos[0]][pos[1]][pos[2]]; volt[2][pos[0]][pos[1]][pos[2]] *=
volt[2][pos[0]][pos[1]][pos[2]] += Op->vi[2][pos[0]][pos[1]][pos[2]] * ( curr[1][pos[0]][pos[1]][pos[2]] - curr[1][pos[0]-shift[0]][pos[1]][pos[2]] - curr[0][pos[0]][pos[1]][pos[2]] + curr[0][pos[0]][pos[1]-shift[1]][pos[2]]); Op->vv[2][pos[0]][pos[1]][pos[2]];
volt[2][pos[0]][pos[1]][pos[2]] +=
Op->vi[2][pos[0]][pos[1]][pos[2]] * (
curr[1][pos[0] ][pos[1] ][pos[2]] -
curr[1][pos[0]-shift[0]][pos[1] ][pos[2]] -
curr[0][pos[0] ][pos[1] ][pos[2]] +
curr[0][pos[0] ][pos[1]-shift[1]][pos[2]]
);
} }
} }
++pos[0]; ++pos[0];
@ -151,16 +172,37 @@ void Engine::UpdateCurrents(unsigned int startX, unsigned int numX)
{ {
//do the updates here //do the updates here
//for x //for x
curr[0][pos[0]][pos[1]][pos[2]] *= Op->ii[0][pos[0]][pos[1]][pos[2]]; curr[0][pos[0]][pos[1]][pos[2]] *=
curr[0][pos[0]][pos[1]][pos[2]] += Op->iv[0][pos[0]][pos[1]][pos[2]] * ( volt[2][pos[0]][pos[1]][pos[2]] - volt[2][pos[0]][pos[1]+1][pos[2]] - volt[1][pos[0]][pos[1]][pos[2]] + volt[1][pos[0]][pos[1]][pos[2]+1]); Op->ii[0][pos[0]][pos[1]][pos[2]];
curr[0][pos[0]][pos[1]][pos[2]] +=
Op->iv[0][pos[0]][pos[1]][pos[2]] * (
volt[2][pos[0]][pos[1] ][pos[2] ] -
volt[2][pos[0]][pos[1]+1][pos[2] ] -
volt[1][pos[0]][pos[1] ][pos[2] ] +
volt[1][pos[0]][pos[1] ][pos[2]+1]
);
//for y //for y
curr[1][pos[0]][pos[1]][pos[2]] *= Op->ii[1][pos[0]][pos[1]][pos[2]]; curr[1][pos[0]][pos[1]][pos[2]] *=
curr[1][pos[0]][pos[1]][pos[2]] += Op->iv[1][pos[0]][pos[1]][pos[2]] * ( volt[0][pos[0]][pos[1]][pos[2]] - volt[0][pos[0]][pos[1]][pos[2]+1] - volt[2][pos[0]][pos[1]][pos[2]] + volt[2][pos[0]+1][pos[1]][pos[2]]); Op->ii[1][pos[0]][pos[1]][pos[2]];
curr[1][pos[0]][pos[1]][pos[2]] +=
Op->iv[1][pos[0]][pos[1]][pos[2]] * (
volt[0][pos[0] ][pos[1]][pos[2] ] -
volt[0][pos[0] ][pos[1]][pos[2]+1] -
volt[2][pos[0] ][pos[1]][pos[2] ] +
volt[2][pos[0]+1][pos[1]][pos[2] ]
);
//for z //for z
curr[2][pos[0]][pos[1]][pos[2]] *= Op->ii[2][pos[0]][pos[1]][pos[2]]; curr[2][pos[0]][pos[1]][pos[2]] *=
curr[2][pos[0]][pos[1]][pos[2]] += Op->iv[2][pos[0]][pos[1]][pos[2]] * ( volt[1][pos[0]][pos[1]][pos[2]] - volt[1][pos[0]+1][pos[1]][pos[2]] - volt[0][pos[0]][pos[1]][pos[2]] + volt[0][pos[0]][pos[1]+1][pos[2]]); Op->ii[2][pos[0]][pos[1]][pos[2]];
curr[2][pos[0]][pos[1]][pos[2]] +=
Op->iv[2][pos[0]][pos[1]][pos[2]] * (
volt[1][pos[0] ][pos[1] ][pos[2]] -
volt[1][pos[0]+1][pos[1] ][pos[2]] -
volt[0][pos[0] ][pos[1] ][pos[2]] +
volt[0][pos[0] ][pos[1]+1][pos[2]]
);
} }
} }
++pos[0]; ++pos[0];

View File

@ -91,16 +91,37 @@ void Engine_sse::UpdateVoltages(unsigned int startX, unsigned int numX)
for (pos[2]=1; pos[2]<numVectors; ++pos[2]) for (pos[2]=1; pos[2]<numVectors; ++pos[2])
{ {
// x-polarization // x-polarization
f4_volt[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv[0][pos[0]][pos[1]][pos[2]].v; f4_volt[0][pos[0]][pos[1]][pos[2]].v *=
f4_volt[0][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[0][pos[0]][pos[1]][pos[2]].v * ( f4_curr[2][pos[0]][pos[1]][pos[2]].v - f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2]].v - f4_curr[1][pos[0]][pos[1]][pos[2]].v + f4_curr[1][pos[0]][pos[1]][pos[2]-1].v ); Op->f4_vv[0][pos[0]][pos[1]][pos[2]].v;
f4_volt[0][pos[0]][pos[1]][pos[2]].v +=
Op->f4_vi[0][pos[0]][pos[1]][pos[2]].v * (
f4_curr[2][pos[0]][pos[1] ][pos[2]].v -
f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2]].v -
f4_curr[1][pos[0]][pos[1] ][pos[2]].v +
f4_curr[1][pos[0]][pos[1] ][pos[2]-1].v
);
// y-polarization // y-polarization
f4_volt[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv[1][pos[0]][pos[1]][pos[2]].v; f4_volt[1][pos[0]][pos[1]][pos[2]].v *=
f4_volt[1][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[1][pos[0]][pos[1]][pos[2]].v * ( f4_curr[0][pos[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]-1].v - f4_curr[2][pos[0]][pos[1]][pos[2]].v + f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2]].v); Op->f4_vv[1][pos[0]][pos[1]][pos[2]].v;
f4_volt[1][pos[0]][pos[1]][pos[2]].v +=
Op->f4_vi[1][pos[0]][pos[1]][pos[2]].v * (
f4_curr[0][pos[0] ][pos[1]][pos[2] ].v -
f4_curr[0][pos[0] ][pos[1]][pos[2]-1].v -
f4_curr[2][pos[0] ][pos[1]][pos[2] ].v +
f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2] ].v
);
// z-polarization // z-polarization
f4_volt[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv[2][pos[0]][pos[1]][pos[2]].v; f4_volt[2][pos[0]][pos[1]][pos[2]].v *=
f4_volt[2][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[2][pos[0]][pos[1]][pos[2]].v * ( f4_curr[1][pos[0]][pos[1]][pos[2]].v - f4_curr[1][pos[0]-shift[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]].v + f4_curr[0][pos[0]][pos[1]-shift[1]][pos[2]].v); Op->f4_vv[2][pos[0]][pos[1]][pos[2]].v;
f4_volt[2][pos[0]][pos[1]][pos[2]].v +=
Op->f4_vi[2][pos[0]][pos[1]][pos[2]].v * (
f4_curr[1][pos[0] ][pos[1] ][pos[2]].v -
f4_curr[1][pos[0]-shift[0]][pos[1] ][pos[2]].v -
f4_curr[0][pos[0] ][pos[1] ][pos[2]].v +
f4_curr[0][pos[0] ][pos[1]-shift[1]][pos[2]].v
);
} }
// for pos[2] = 0 // for pos[2] = 0
@ -109,20 +130,41 @@ void Engine_sse::UpdateVoltages(unsigned int startX, unsigned int numX)
temp.f[1] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[0]; temp.f[1] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[0];
temp.f[2] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[1]; temp.f[2] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[1];
temp.f[3] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[2]; temp.f[3] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[2];
f4_volt[0][pos[0]][pos[1]][0].v *= Op->f4_vv[0][pos[0]][pos[1]][0].v; f4_volt[0][pos[0]][pos[1]][0].v *=
f4_volt[0][pos[0]][pos[1]][0].v += Op->f4_vi[0][pos[0]][pos[1]][0].v * ( f4_curr[2][pos[0]][pos[1]][0].v - f4_curr[2][pos[0]][pos[1]-shift[1]][0].v - f4_curr[1][pos[0]][pos[1]][0].v + temp.v ); Op->f4_vv[0][pos[0]][pos[1]][0].v;
f4_volt[0][pos[0]][pos[1]][0].v +=
Op->f4_vi[0][pos[0]][pos[1]][0].v * (
f4_curr[2][pos[0]][pos[1] ][0].v -
f4_curr[2][pos[0]][pos[1]-shift[1]][0].v -
f4_curr[1][pos[0]][pos[1] ][0].v +
temp.v
);
// y-polarization // y-polarization
temp.f[0] = 0; temp.f[0] = 0;
temp.f[1] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[0]; temp.f[1] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[0];
temp.f[2] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[1]; temp.f[2] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[1];
temp.f[3] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[2]; temp.f[3] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[2];
f4_volt[1][pos[0]][pos[1]][0].v *= Op->f4_vv[1][pos[0]][pos[1]][0].v; f4_volt[1][pos[0]][pos[1]][0].v *=
f4_volt[1][pos[0]][pos[1]][0].v += Op->f4_vi[1][pos[0]][pos[1]][0].v * ( f4_curr[0][pos[0]][pos[1]][0].v - temp.v - f4_curr[2][pos[0]][pos[1]][0].v + f4_curr[2][pos[0]-shift[0]][pos[1]][0].v); Op->f4_vv[1][pos[0]][pos[1]][0].v;
f4_volt[1][pos[0]][pos[1]][0].v +=
Op->f4_vi[1][pos[0]][pos[1]][0].v * (
f4_curr[0][pos[0] ][pos[1]][0].v -
temp.v -
f4_curr[2][pos[0] ][pos[1]][0].v +
f4_curr[2][pos[0]-shift[0]][pos[1]][0].v
);
// z-polarization // z-polarization
f4_volt[2][pos[0]][pos[1]][0].v *= Op->f4_vv[2][pos[0]][pos[1]][0].v; f4_volt[2][pos[0]][pos[1]][0].v *=
f4_volt[2][pos[0]][pos[1]][0].v += Op->f4_vi[2][pos[0]][pos[1]][0].v * ( f4_curr[1][pos[0]][pos[1]][0].v - f4_curr[1][pos[0]-shift[0]][pos[1]][0].v - f4_curr[0][pos[0]][pos[1]][0].v + f4_curr[0][pos[0]][pos[1]-shift[1]][0].v); Op->f4_vv[2][pos[0]][pos[1]][0].v;
f4_volt[2][pos[0]][pos[1]][0].v +=
Op->f4_vi[2][pos[0]][pos[1]][0].v * (
f4_curr[1][pos[0] ][pos[1] ][0].v -
f4_curr[1][pos[0]-shift[0]][pos[1] ][0].v -
f4_curr[0][pos[0] ][pos[1] ][0].v +
f4_curr[0][pos[0] ][pos[1]-shift[1]][0].v
);
} }
++pos[0]; ++pos[0];
} }
@ -141,16 +183,37 @@ void Engine_sse::UpdateCurrents(unsigned int startX, unsigned int numX)
for (pos[2]=0; pos[2]<numVectors-1; ++pos[2]) for (pos[2]=0; pos[2]<numVectors-1; ++pos[2])
{ {
// x-pol // x-pol
f4_curr[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii[0][pos[0]][pos[1]][pos[2]].v; f4_curr[0][pos[0]][pos[1]][pos[2]].v *=
f4_curr[0][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[0][pos[0]][pos[1]][pos[2]].v * ( f4_volt[2][pos[0]][pos[1]][pos[2]].v - f4_volt[2][pos[0]][pos[1]+1][pos[2]].v - f4_volt[1][pos[0]][pos[1]][pos[2]].v + f4_volt[1][pos[0]][pos[1]][pos[2]+1].v); Op->f4_ii[0][pos[0]][pos[1]][pos[2]].v;
f4_curr[0][pos[0]][pos[1]][pos[2]].v +=
Op->f4_iv[0][pos[0]][pos[1]][pos[2]].v * (
f4_volt[2][pos[0]][pos[1] ][pos[2] ].v -
f4_volt[2][pos[0]][pos[1]+1][pos[2] ].v -
f4_volt[1][pos[0]][pos[1] ][pos[2] ].v +
f4_volt[1][pos[0]][pos[1] ][pos[2]+1].v
);
// y-pol // y-pol
f4_curr[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii[1][pos[0]][pos[1]][pos[2]].v; f4_curr[1][pos[0]][pos[1]][pos[2]].v *=
f4_curr[1][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[1][pos[0]][pos[1]][pos[2]].v * ( f4_volt[0][pos[0]][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]+1].v - f4_volt[2][pos[0]][pos[1]][pos[2]].v + f4_volt[2][pos[0]+1][pos[1]][pos[2]].v); Op->f4_ii[1][pos[0]][pos[1]][pos[2]].v;
f4_curr[1][pos[0]][pos[1]][pos[2]].v +=
Op->f4_iv[1][pos[0]][pos[1]][pos[2]].v * (
f4_volt[0][pos[0] ][pos[1]][pos[2] ].v -
f4_volt[0][pos[0] ][pos[1]][pos[2]+1].v -
f4_volt[2][pos[0] ][pos[1]][pos[2] ].v +
f4_volt[2][pos[0]+1][pos[1]][pos[2] ].v
);
// z-pol // z-pol
f4_curr[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii[2][pos[0]][pos[1]][pos[2]].v; f4_curr[2][pos[0]][pos[1]][pos[2]].v *=
f4_curr[2][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[2][pos[0]][pos[1]][pos[2]].v * ( f4_volt[1][pos[0]][pos[1]][pos[2]].v - f4_volt[1][pos[0]+1][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]].v + f4_volt[0][pos[0]][pos[1]+1][pos[2]].v); Op->f4_ii[2][pos[0]][pos[1]][pos[2]].v;
f4_curr[2][pos[0]][pos[1]][pos[2]].v +=
Op->f4_iv[2][pos[0]][pos[1]][pos[2]].v * (
f4_volt[1][pos[0] ][pos[1] ][pos[2]].v -
f4_volt[1][pos[0]+1][pos[1] ][pos[2]].v -
f4_volt[0][pos[0] ][pos[1] ][pos[2]].v +
f4_volt[0][pos[0] ][pos[1]+1][pos[2]].v
);
} }
// for pos[2] = numVectors-1 // for pos[2] = numVectors-1
@ -159,20 +222,41 @@ void Engine_sse::UpdateCurrents(unsigned int startX, unsigned int numX)
temp.f[1] = f4_volt[1][pos[0]][pos[1]][0].f[2]; temp.f[1] = f4_volt[1][pos[0]][pos[1]][0].f[2];
temp.f[2] = f4_volt[1][pos[0]][pos[1]][0].f[3]; temp.f[2] = f4_volt[1][pos[0]][pos[1]][0].f[3];
temp.f[3] = 0; temp.f[3] = 0;
f4_curr[0][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii[0][pos[0]][pos[1]][numVectors-1].v; f4_curr[0][pos[0]][pos[1]][numVectors-1].v *=
f4_curr[0][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv[0][pos[0]][pos[1]][numVectors-1].v * ( f4_volt[2][pos[0]][pos[1]][numVectors-1].v - f4_volt[2][pos[0]][pos[1]+1][numVectors-1].v - f4_volt[1][pos[0]][pos[1]][numVectors-1].v + temp.v); Op->f4_ii[0][pos[0]][pos[1]][numVectors-1].v;
f4_curr[0][pos[0]][pos[1]][numVectors-1].v +=
Op->f4_iv[0][pos[0]][pos[1]][numVectors-1].v * (
f4_volt[2][pos[0]][pos[1] ][numVectors-1].v -
f4_volt[2][pos[0]][pos[1]+1][numVectors-1].v -
f4_volt[1][pos[0]][pos[1] ][numVectors-1].v +
temp.v
);
// y-pol // y-pol
temp.f[0] = f4_volt[0][pos[0]][pos[1]][0].f[1]; temp.f[0] = f4_volt[0][pos[0]][pos[1]][0].f[1];
temp.f[1] = f4_volt[0][pos[0]][pos[1]][0].f[2]; temp.f[1] = f4_volt[0][pos[0]][pos[1]][0].f[2];
temp.f[2] = f4_volt[0][pos[0]][pos[1]][0].f[3]; temp.f[2] = f4_volt[0][pos[0]][pos[1]][0].f[3];
temp.f[3] = 0; temp.f[3] = 0;
f4_curr[1][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii[1][pos[0]][pos[1]][numVectors-1].v; f4_curr[1][pos[0]][pos[1]][numVectors-1].v *=
f4_curr[1][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv[1][pos[0]][pos[1]][numVectors-1].v * ( f4_volt[0][pos[0]][pos[1]][numVectors-1].v - temp.v - f4_volt[2][pos[0]][pos[1]][numVectors-1].v + f4_volt[2][pos[0]+1][pos[1]][numVectors-1].v); Op->f4_ii[1][pos[0]][pos[1]][numVectors-1].v;
f4_curr[1][pos[0]][pos[1]][numVectors-1].v +=
Op->f4_iv[1][pos[0]][pos[1]][numVectors-1].v * (
f4_volt[0][pos[0] ][pos[1]][numVectors-1].v -
temp.v -
f4_volt[2][pos[0] ][pos[1]][numVectors-1].v +
f4_volt[2][pos[0]+1][pos[1]][numVectors-1].v
);
// z-pol // z-pol
f4_curr[2][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii[2][pos[0]][pos[1]][numVectors-1].v; f4_curr[2][pos[0]][pos[1]][numVectors-1].v *=
f4_curr[2][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv[2][pos[0]][pos[1]][numVectors-1].v * ( f4_volt[1][pos[0]][pos[1]][numVectors-1].v - f4_volt[1][pos[0]+1][pos[1]][numVectors-1].v - f4_volt[0][pos[0]][pos[1]][numVectors-1].v + f4_volt[0][pos[0]][pos[1]+1][numVectors-1].v); Op->f4_ii[2][pos[0]][pos[1]][numVectors-1].v;
f4_curr[2][pos[0]][pos[1]][numVectors-1].v +=
Op->f4_iv[2][pos[0]][pos[1]][numVectors-1].v * (
f4_volt[1][pos[0] ][pos[1] ][numVectors-1].v -
f4_volt[1][pos[0]+1][pos[1] ][numVectors-1].v -
f4_volt[0][pos[0] ][pos[1] ][numVectors-1].v +
f4_volt[0][pos[0] ][pos[1]+1][numVectors-1].v
);
} }
++pos[0]; ++pos[0];
} }

View File

@ -55,47 +55,93 @@ void Engine_SSE_Compressed::UpdateVoltages(unsigned int startX, unsigned int num
{ {
index = Op->m_Op_index[pos[0]][pos[1]][pos[2]]; index = Op->m_Op_index[pos[0]][pos[1]][pos[2]];
// x-polarization // x-polarization
f4_volt[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv_Compressed[0][index].v; f4_volt[0][pos[0]][pos[1]][pos[2]].v *=
f4_volt[0][pos[0]][pos[1]][pos[2]].v += Op->f4_vi_Compressed[0][index].v * ( f4_curr[2][pos[0]][pos[1]][pos[2]].v - f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2]].v - f4_curr[1][pos[0]][pos[1]][pos[2]].v + f4_curr[1][pos[0]][pos[1]][pos[2]-1].v ); Op->f4_vv_Compressed[0][index].v;
f4_volt[0][pos[0]][pos[1]][pos[2]].v +=
Op->f4_vi_Compressed[0][index].v * (
f4_curr[2][pos[0]][pos[1] ][pos[2] ].v -
f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2] ].v -
f4_curr[1][pos[0]][pos[1] ][pos[2] ].v +
f4_curr[1][pos[0]][pos[1] ][pos[2]-1].v
);
// y-polarization // y-polarization
f4_volt[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv_Compressed[1][index].v; f4_volt[1][pos[0]][pos[1]][pos[2]].v *=
f4_volt[1][pos[0]][pos[1]][pos[2]].v += Op->f4_vi_Compressed[1][index].v * ( f4_curr[0][pos[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]-1].v - f4_curr[2][pos[0]][pos[1]][pos[2]].v + f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2]].v); Op->f4_vv_Compressed[1][index].v;
f4_volt[1][pos[0]][pos[1]][pos[2]].v +=
Op->f4_vi_Compressed[1][index].v * (
f4_curr[0][pos[0] ][pos[1]][pos[2] ].v -
f4_curr[0][pos[0] ][pos[1]][pos[2]-1].v -
f4_curr[2][pos[0] ][pos[1]][pos[2] ].v +
f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2] ].v
);
// z-polarization // z-polarization
f4_volt[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv_Compressed[2][index].v; f4_volt[2][pos[0]][pos[1]][pos[2]].v *=
f4_volt[2][pos[0]][pos[1]][pos[2]].v += Op->f4_vi_Compressed[2][index].v * ( f4_curr[1][pos[0]][pos[1]][pos[2]].v - f4_curr[1][pos[0]-shift[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]].v + f4_curr[0][pos[0]][pos[1]-shift[1]][pos[2]].v); Op->f4_vv_Compressed[2][index].v;
f4_volt[2][pos[0]][pos[1]][pos[2]].v +=
Op->f4_vi_Compressed[2][index].v * (
f4_curr[1][pos[0] ][pos[1]] [pos[2]].v -
f4_curr[1][pos[0]-shift[0]][pos[1]] [pos[2]].v -
f4_curr[0][pos[0] ][pos[1]] [pos[2]].v +
f4_curr[0][pos[0] ][pos[1]-shift[1]][pos[2]].v
);
} }
// for pos[2] = 0 // for pos[2] = 0
// x-polarization // x-polarization
index = Op->m_Op_index[pos[0]][pos[1]][0]; index = Op->m_Op_index[pos[0]][pos[1]][0];
#ifdef __SSE2__ #ifdef __SSE2__
temp.v = (__m128)_mm_slli_si128( (__m128i)f4_curr[1][pos[0]][pos[1]][numVectors-1].v, 4 ); temp.v = (__m128)_mm_slli_si128(
(__m128i)f4_curr[1][pos[0]][pos[1]][numVectors-1].v, 4
);
#else #else
temp.f[0] = 0; temp.f[0] = 0;
temp.f[1] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[0]; temp.f[1] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[0];
temp.f[2] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[1]; temp.f[2] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[1];
temp.f[3] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[2]; temp.f[3] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[2];
#endif #endif
f4_volt[0][pos[0]][pos[1]][0].v *= Op->f4_vv_Compressed[0][index].v; f4_volt[0][pos[0]][pos[1]][0].v *=
f4_volt[0][pos[0]][pos[1]][0].v += Op->f4_vi_Compressed[0][index].v * ( f4_curr[2][pos[0]][pos[1]][0].v - f4_curr[2][pos[0]][pos[1]-shift[1]][0].v - f4_curr[1][pos[0]][pos[1]][0].v + temp.v ); Op->f4_vv_Compressed[0][index].v;
f4_volt[0][pos[0]][pos[1]][0].v +=
Op->f4_vi_Compressed[0][index].v * (
f4_curr[2][pos[0]][pos[1] ][0].v -
f4_curr[2][pos[0]][pos[1]-shift[1]][0].v -
f4_curr[1][pos[0]][pos[1] ][0].v +
temp.v
);
// y-polarization // y-polarization
#ifdef __SSE2__ #ifdef __SSE2__
temp.v = (__m128)_mm_slli_si128( (__m128i)f4_curr[0][pos[0]][pos[1]][numVectors-1].v, 4 ); temp.v = (__m128)_mm_slli_si128(
(__m128i)f4_curr[0][pos[0]][pos[1]][numVectors-1].v, 4
);
#else #else
temp.f[0] = 0; temp.f[0] = 0;
temp.f[1] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[0]; temp.f[1] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[0];
temp.f[2] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[1]; temp.f[2] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[1];
temp.f[3] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[2]; temp.f[3] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[2];
#endif #endif
f4_volt[1][pos[0]][pos[1]][0].v *= Op->f4_vv_Compressed[1][index].v; f4_volt[1][pos[0]][pos[1]][0].v *=
f4_volt[1][pos[0]][pos[1]][0].v += Op->f4_vi_Compressed[1][index].v * ( f4_curr[0][pos[0]][pos[1]][0].v - temp.v - f4_curr[2][pos[0]][pos[1]][0].v + f4_curr[2][pos[0]-shift[0]][pos[1]][0].v); Op->f4_vv_Compressed[1][index].v;
f4_volt[1][pos[0]][pos[1]][0].v +=
Op->f4_vi_Compressed[1][index].v * (
f4_curr[0][pos[0] ][pos[1]][0].v -
temp.v -
f4_curr[2][pos[0] ][pos[1]][0].v +
f4_curr[2][pos[0]-shift[0]][pos[1]][0].v
);
// z-polarization // z-polarization
f4_volt[2][pos[0]][pos[1]][0].v *= Op->f4_vv_Compressed[2][index].v; f4_volt[2][pos[0]][pos[1]][0].v *=
f4_volt[2][pos[0]][pos[1]][0].v += Op->f4_vi_Compressed[2][index].v * ( f4_curr[1][pos[0]][pos[1]][0].v - f4_curr[1][pos[0]-shift[0]][pos[1]][0].v - f4_curr[0][pos[0]][pos[1]][0].v + f4_curr[0][pos[0]][pos[1]-shift[1]][0].v); Op->f4_vv_Compressed[2][index].v;
f4_volt[2][pos[0]][pos[1]][0].v +=
Op->f4_vi_Compressed[2][index].v * (
f4_curr[1][pos[0] ][pos[1] ][0].v -
f4_curr[1][pos[0]-shift[0]][pos[1] ][0].v -
f4_curr[0][pos[0] ][pos[1] ][0].v +
f4_curr[0][pos[0] ][pos[1]-shift[1]][0].v
);
} }
++pos[0]; ++pos[0];
} }
@ -116,47 +162,93 @@ void Engine_SSE_Compressed::UpdateCurrents(unsigned int startX, unsigned int num
{ {
index = Op->m_Op_index[pos[0]][pos[1]][pos[2]]; index = Op->m_Op_index[pos[0]][pos[1]][pos[2]];
// x-pol // x-pol
f4_curr[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii_Compressed[0][index].v; f4_curr[0][pos[0]][pos[1]][pos[2]].v *=
f4_curr[0][pos[0]][pos[1]][pos[2]].v += Op->f4_iv_Compressed[0][index].v * ( f4_volt[2][pos[0]][pos[1]][pos[2]].v - f4_volt[2][pos[0]][pos[1]+1][pos[2]].v - f4_volt[1][pos[0]][pos[1]][pos[2]].v + f4_volt[1][pos[0]][pos[1]][pos[2]+1].v); Op->f4_ii_Compressed[0][index].v;
f4_curr[0][pos[0]][pos[1]][pos[2]].v +=
Op->f4_iv_Compressed[0][index].v * (
f4_volt[2][pos[0]][pos[1] ][pos[2] ].v -
f4_volt[2][pos[0]][pos[1]+1][pos[2] ].v -
f4_volt[1][pos[0]][pos[1] ][pos[2] ].v +
f4_volt[1][pos[0]][pos[1] ][pos[2]+1].v
);
// y-pol // y-pol
f4_curr[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii_Compressed[1][index].v; f4_curr[1][pos[0]][pos[1]][pos[2]].v *=
f4_curr[1][pos[0]][pos[1]][pos[2]].v += Op->f4_iv_Compressed[1][index].v * ( f4_volt[0][pos[0]][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]+1].v - f4_volt[2][pos[0]][pos[1]][pos[2]].v + f4_volt[2][pos[0]+1][pos[1]][pos[2]].v); Op->f4_ii_Compressed[1][index].v;
f4_curr[1][pos[0]][pos[1]][pos[2]].v +=
Op->f4_iv_Compressed[1][index].v * (
f4_volt[0][pos[0] ][pos[1]][pos[2] ].v -
f4_volt[0][pos[0] ][pos[1]][pos[2]+1].v -
f4_volt[2][pos[0] ][pos[1]][pos[2] ].v +
f4_volt[2][pos[0]+1][pos[1]][pos[2] ].v
);
// z-pol // z-pol
f4_curr[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii_Compressed[2][index].v; f4_curr[2][pos[0]][pos[1]][pos[2]].v *=
f4_curr[2][pos[0]][pos[1]][pos[2]].v += Op->f4_iv_Compressed[2][index].v * ( f4_volt[1][pos[0]][pos[1]][pos[2]].v - f4_volt[1][pos[0]+1][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]].v + f4_volt[0][pos[0]][pos[1]+1][pos[2]].v); Op->f4_ii_Compressed[2][index].v;
f4_curr[2][pos[0]][pos[1]][pos[2]].v +=
Op->f4_iv_Compressed[2][index].v * (
f4_volt[1][pos[0] ][pos[1] ][pos[2]].v -
f4_volt[1][pos[0]+1][pos[1] ][pos[2]].v -
f4_volt[0][pos[0] ][pos[1] ][pos[2]].v +
f4_volt[0][pos[0] ][pos[1]+1][pos[2]].v
);
} }
index = Op->m_Op_index[pos[0]][pos[1]][numVectors-1]; index = Op->m_Op_index[pos[0]][pos[1]][numVectors-1];
// for pos[2] = numVectors-1 // for pos[2] = numVectors-1
// x-pol // x-pol
#ifdef __SSE2__ #ifdef __SSE2__
temp.v = (__m128)_mm_srli_si128( (__m128i)f4_volt[1][pos[0]][pos[1]][0].v, 4 ); temp.v = (__m128)_mm_srli_si128(
(__m128i)f4_volt[1][pos[0]][pos[1]][0].v, 4
);
#else #else
temp.f[0] = f4_volt[1][pos[0]][pos[1]][0].f[1]; temp.f[0] = f4_volt[1][pos[0]][pos[1]][0].f[1];
temp.f[1] = f4_volt[1][pos[0]][pos[1]][0].f[2]; temp.f[1] = f4_volt[1][pos[0]][pos[1]][0].f[2];
temp.f[2] = f4_volt[1][pos[0]][pos[1]][0].f[3]; temp.f[2] = f4_volt[1][pos[0]][pos[1]][0].f[3];
temp.f[3] = 0; temp.f[3] = 0;
#endif #endif
f4_curr[0][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii_Compressed[0][index].v; f4_curr[0][pos[0]][pos[1]][numVectors-1].v *=
f4_curr[0][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv_Compressed[0][index].v * ( f4_volt[2][pos[0]][pos[1]][numVectors-1].v - f4_volt[2][pos[0]][pos[1]+1][numVectors-1].v - f4_volt[1][pos[0]][pos[1]][numVectors-1].v + temp.v); Op->f4_ii_Compressed[0][index].v;
f4_curr[0][pos[0]][pos[1]][numVectors-1].v +=
Op->f4_iv_Compressed[0][index].v * (
f4_volt[2][pos[0]][pos[1] ][numVectors-1].v -
f4_volt[2][pos[0]][pos[1]+1][numVectors-1].v -
f4_volt[1][pos[0]][pos[1] ][numVectors-1].v +
temp.v
);
// y-pol // y-pol
#ifdef __SSE2__ #ifdef __SSE2__
temp.v = (__m128)_mm_srli_si128( (__m128i)f4_volt[0][pos[0]][pos[1]][0].v, 4 ); temp.v = (__m128)_mm_srli_si128(
(__m128i)f4_volt[0][pos[0]][pos[1]][0].v, 4
);
#else #else
temp.f[0] = f4_volt[0][pos[0]][pos[1]][0].f[1]; temp.f[0] = f4_volt[0][pos[0]][pos[1]][0].f[1];
temp.f[1] = f4_volt[0][pos[0]][pos[1]][0].f[2]; temp.f[1] = f4_volt[0][pos[0]][pos[1]][0].f[2];
temp.f[2] = f4_volt[0][pos[0]][pos[1]][0].f[3]; temp.f[2] = f4_volt[0][pos[0]][pos[1]][0].f[3];
temp.f[3] = 0; temp.f[3] = 0;
#endif #endif
f4_curr[1][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii_Compressed[1][index].v; f4_curr[1][pos[0]][pos[1]][numVectors-1].v *=
f4_curr[1][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv_Compressed[1][index].v * ( f4_volt[0][pos[0]][pos[1]][numVectors-1].v - temp.v - f4_volt[2][pos[0]][pos[1]][numVectors-1].v + f4_volt[2][pos[0]+1][pos[1]][numVectors-1].v); Op->f4_ii_Compressed[1][index].v;
f4_curr[1][pos[0]][pos[1]][numVectors-1].v +=
Op->f4_iv_Compressed[1][index].v * (
f4_volt[0][pos[0] ][pos[1]][numVectors-1].v -
temp.v -
f4_volt[2][pos[0] ][pos[1]][numVectors-1].v +
f4_volt[2][pos[0]+1][pos[1]][numVectors-1].v
);
// z-pol // z-pol
f4_curr[2][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii_Compressed[2][index].v; f4_curr[2][pos[0]][pos[1]][numVectors-1].v *=
f4_curr[2][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv_Compressed[2][index].v * ( f4_volt[1][pos[0]][pos[1]][numVectors-1].v - f4_volt[1][pos[0]+1][pos[1]][numVectors-1].v - f4_volt[0][pos[0]][pos[1]][numVectors-1].v + f4_volt[0][pos[0]][pos[1]+1][numVectors-1].v); Op->f4_ii_Compressed[2][index].v;
f4_curr[2][pos[0]][pos[1]][numVectors-1].v +=
Op->f4_iv_Compressed[2][index].v * (
f4_volt[1][pos[0] ][pos[1] ][numVectors-1].v -
f4_volt[1][pos[0]+1][pos[1] ][numVectors-1].v -
f4_volt[0][pos[0] ][pos[1] ][numVectors-1].v +
f4_volt[0][pos[0] ][pos[1]+1][numVectors-1].v
);
} }
++pos[0]; ++pos[0];
} }