FDTD: reformat code of update equations.
The original update equations in the FDTD engine have extremely long lines and are difficult to read and work with. This patch inserts line breaks, it aligns all array indexes by x/y/z coordinates to make it easy to visually compare. Signed-off-by: Yifeng Li <tomli@tomli.me>pull/129/head
parent
5b8cf2f2ed
commit
71dde7ea49
|
@ -123,16 +123,37 @@ void Engine::UpdateVoltages(unsigned int startX, unsigned int numX)
|
|||
shift[2]=pos[2];
|
||||
//do the updates here
|
||||
//for x
|
||||
volt[0][pos[0]][pos[1]][pos[2]] *= Op->vv[0][pos[0]][pos[1]][pos[2]];
|
||||
volt[0][pos[0]][pos[1]][pos[2]] += Op->vi[0][pos[0]][pos[1]][pos[2]] * ( curr[2][pos[0]][pos[1]][pos[2]] - curr[2][pos[0]][pos[1]-shift[1]][pos[2]] - curr[1][pos[0]][pos[1]][pos[2]] + curr[1][pos[0]][pos[1]][pos[2]-shift[2]]);
|
||||
volt[0][pos[0]][pos[1]][pos[2]] *=
|
||||
Op->vv[0][pos[0]][pos[1]][pos[2]];
|
||||
volt[0][pos[0]][pos[1]][pos[2]] +=
|
||||
Op->vi[0][pos[0]][pos[1]][pos[2]] * (
|
||||
curr[2][pos[0]][pos[1] ][pos[2] ] -
|
||||
curr[2][pos[0]][pos[1]-shift[1]][pos[2] ] -
|
||||
curr[1][pos[0]][pos[1] ][pos[2] ] +
|
||||
curr[1][pos[0]][pos[1] ][pos[2]-shift[2]]
|
||||
);
|
||||
|
||||
//for y
|
||||
volt[1][pos[0]][pos[1]][pos[2]] *= Op->vv[1][pos[0]][pos[1]][pos[2]];
|
||||
volt[1][pos[0]][pos[1]][pos[2]] += Op->vi[1][pos[0]][pos[1]][pos[2]] * ( curr[0][pos[0]][pos[1]][pos[2]] - curr[0][pos[0]][pos[1]][pos[2]-shift[2]] - curr[2][pos[0]][pos[1]][pos[2]] + curr[2][pos[0]-shift[0]][pos[1]][pos[2]]);
|
||||
volt[1][pos[0]][pos[1]][pos[2]] *=
|
||||
Op->vv[1][pos[0]][pos[1]][pos[2]];
|
||||
volt[1][pos[0]][pos[1]][pos[2]] +=
|
||||
Op->vi[1][pos[0]][pos[1]][pos[2]] * (
|
||||
curr[0][pos[0] ][pos[1]][pos[2] ] -
|
||||
curr[0][pos[0] ][pos[1]][pos[2]-shift[2]] -
|
||||
curr[2][pos[0] ][pos[1]][pos[2] ] +
|
||||
curr[2][pos[0]-shift[0]][pos[1]][pos[2] ]
|
||||
);
|
||||
|
||||
//for z
|
||||
volt[2][pos[0]][pos[1]][pos[2]] *= Op->vv[2][pos[0]][pos[1]][pos[2]];
|
||||
volt[2][pos[0]][pos[1]][pos[2]] += Op->vi[2][pos[0]][pos[1]][pos[2]] * ( curr[1][pos[0]][pos[1]][pos[2]] - curr[1][pos[0]-shift[0]][pos[1]][pos[2]] - curr[0][pos[0]][pos[1]][pos[2]] + curr[0][pos[0]][pos[1]-shift[1]][pos[2]]);
|
||||
volt[2][pos[0]][pos[1]][pos[2]] *=
|
||||
Op->vv[2][pos[0]][pos[1]][pos[2]];
|
||||
volt[2][pos[0]][pos[1]][pos[2]] +=
|
||||
Op->vi[2][pos[0]][pos[1]][pos[2]] * (
|
||||
curr[1][pos[0] ][pos[1] ][pos[2]] -
|
||||
curr[1][pos[0]-shift[0]][pos[1] ][pos[2]] -
|
||||
curr[0][pos[0] ][pos[1] ][pos[2]] +
|
||||
curr[0][pos[0] ][pos[1]-shift[1]][pos[2]]
|
||||
);
|
||||
}
|
||||
}
|
||||
++pos[0];
|
||||
|
@ -151,16 +172,37 @@ void Engine::UpdateCurrents(unsigned int startX, unsigned int numX)
|
|||
{
|
||||
//do the updates here
|
||||
//for x
|
||||
curr[0][pos[0]][pos[1]][pos[2]] *= Op->ii[0][pos[0]][pos[1]][pos[2]];
|
||||
curr[0][pos[0]][pos[1]][pos[2]] += Op->iv[0][pos[0]][pos[1]][pos[2]] * ( volt[2][pos[0]][pos[1]][pos[2]] - volt[2][pos[0]][pos[1]+1][pos[2]] - volt[1][pos[0]][pos[1]][pos[2]] + volt[1][pos[0]][pos[1]][pos[2]+1]);
|
||||
curr[0][pos[0]][pos[1]][pos[2]] *=
|
||||
Op->ii[0][pos[0]][pos[1]][pos[2]];
|
||||
curr[0][pos[0]][pos[1]][pos[2]] +=
|
||||
Op->iv[0][pos[0]][pos[1]][pos[2]] * (
|
||||
volt[2][pos[0]][pos[1] ][pos[2] ] -
|
||||
volt[2][pos[0]][pos[1]+1][pos[2] ] -
|
||||
volt[1][pos[0]][pos[1] ][pos[2] ] +
|
||||
volt[1][pos[0]][pos[1] ][pos[2]+1]
|
||||
);
|
||||
|
||||
//for y
|
||||
curr[1][pos[0]][pos[1]][pos[2]] *= Op->ii[1][pos[0]][pos[1]][pos[2]];
|
||||
curr[1][pos[0]][pos[1]][pos[2]] += Op->iv[1][pos[0]][pos[1]][pos[2]] * ( volt[0][pos[0]][pos[1]][pos[2]] - volt[0][pos[0]][pos[1]][pos[2]+1] - volt[2][pos[0]][pos[1]][pos[2]] + volt[2][pos[0]+1][pos[1]][pos[2]]);
|
||||
curr[1][pos[0]][pos[1]][pos[2]] *=
|
||||
Op->ii[1][pos[0]][pos[1]][pos[2]];
|
||||
curr[1][pos[0]][pos[1]][pos[2]] +=
|
||||
Op->iv[1][pos[0]][pos[1]][pos[2]] * (
|
||||
volt[0][pos[0] ][pos[1]][pos[2] ] -
|
||||
volt[0][pos[0] ][pos[1]][pos[2]+1] -
|
||||
volt[2][pos[0] ][pos[1]][pos[2] ] +
|
||||
volt[2][pos[0]+1][pos[1]][pos[2] ]
|
||||
);
|
||||
|
||||
//for z
|
||||
curr[2][pos[0]][pos[1]][pos[2]] *= Op->ii[2][pos[0]][pos[1]][pos[2]];
|
||||
curr[2][pos[0]][pos[1]][pos[2]] += Op->iv[2][pos[0]][pos[1]][pos[2]] * ( volt[1][pos[0]][pos[1]][pos[2]] - volt[1][pos[0]+1][pos[1]][pos[2]] - volt[0][pos[0]][pos[1]][pos[2]] + volt[0][pos[0]][pos[1]+1][pos[2]]);
|
||||
curr[2][pos[0]][pos[1]][pos[2]] *=
|
||||
Op->ii[2][pos[0]][pos[1]][pos[2]];
|
||||
curr[2][pos[0]][pos[1]][pos[2]] +=
|
||||
Op->iv[2][pos[0]][pos[1]][pos[2]] * (
|
||||
volt[1][pos[0] ][pos[1] ][pos[2]] -
|
||||
volt[1][pos[0]+1][pos[1] ][pos[2]] -
|
||||
volt[0][pos[0] ][pos[1] ][pos[2]] +
|
||||
volt[0][pos[0] ][pos[1]+1][pos[2]]
|
||||
);
|
||||
}
|
||||
}
|
||||
++pos[0];
|
||||
|
|
|
@ -91,16 +91,37 @@ void Engine_sse::UpdateVoltages(unsigned int startX, unsigned int numX)
|
|||
for (pos[2]=1; pos[2]<numVectors; ++pos[2])
|
||||
{
|
||||
// x-polarization
|
||||
f4_volt[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv[0][pos[0]][pos[1]][pos[2]].v;
|
||||
f4_volt[0][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[0][pos[0]][pos[1]][pos[2]].v * ( f4_curr[2][pos[0]][pos[1]][pos[2]].v - f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2]].v - f4_curr[1][pos[0]][pos[1]][pos[2]].v + f4_curr[1][pos[0]][pos[1]][pos[2]-1].v );
|
||||
f4_volt[0][pos[0]][pos[1]][pos[2]].v *=
|
||||
Op->f4_vv[0][pos[0]][pos[1]][pos[2]].v;
|
||||
f4_volt[0][pos[0]][pos[1]][pos[2]].v +=
|
||||
Op->f4_vi[0][pos[0]][pos[1]][pos[2]].v * (
|
||||
f4_curr[2][pos[0]][pos[1] ][pos[2]].v -
|
||||
f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2]].v -
|
||||
f4_curr[1][pos[0]][pos[1] ][pos[2]].v +
|
||||
f4_curr[1][pos[0]][pos[1] ][pos[2]-1].v
|
||||
);
|
||||
|
||||
// y-polarization
|
||||
f4_volt[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv[1][pos[0]][pos[1]][pos[2]].v;
|
||||
f4_volt[1][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[1][pos[0]][pos[1]][pos[2]].v * ( f4_curr[0][pos[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]-1].v - f4_curr[2][pos[0]][pos[1]][pos[2]].v + f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2]].v);
|
||||
f4_volt[1][pos[0]][pos[1]][pos[2]].v *=
|
||||
Op->f4_vv[1][pos[0]][pos[1]][pos[2]].v;
|
||||
f4_volt[1][pos[0]][pos[1]][pos[2]].v +=
|
||||
Op->f4_vi[1][pos[0]][pos[1]][pos[2]].v * (
|
||||
f4_curr[0][pos[0] ][pos[1]][pos[2] ].v -
|
||||
f4_curr[0][pos[0] ][pos[1]][pos[2]-1].v -
|
||||
f4_curr[2][pos[0] ][pos[1]][pos[2] ].v +
|
||||
f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2] ].v
|
||||
);
|
||||
|
||||
// z-polarization
|
||||
f4_volt[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv[2][pos[0]][pos[1]][pos[2]].v;
|
||||
f4_volt[2][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[2][pos[0]][pos[1]][pos[2]].v * ( f4_curr[1][pos[0]][pos[1]][pos[2]].v - f4_curr[1][pos[0]-shift[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]].v + f4_curr[0][pos[0]][pos[1]-shift[1]][pos[2]].v);
|
||||
f4_volt[2][pos[0]][pos[1]][pos[2]].v *=
|
||||
Op->f4_vv[2][pos[0]][pos[1]][pos[2]].v;
|
||||
f4_volt[2][pos[0]][pos[1]][pos[2]].v +=
|
||||
Op->f4_vi[2][pos[0]][pos[1]][pos[2]].v * (
|
||||
f4_curr[1][pos[0] ][pos[1] ][pos[2]].v -
|
||||
f4_curr[1][pos[0]-shift[0]][pos[1] ][pos[2]].v -
|
||||
f4_curr[0][pos[0] ][pos[1] ][pos[2]].v +
|
||||
f4_curr[0][pos[0] ][pos[1]-shift[1]][pos[2]].v
|
||||
);
|
||||
}
|
||||
|
||||
// for pos[2] = 0
|
||||
|
@ -109,20 +130,41 @@ void Engine_sse::UpdateVoltages(unsigned int startX, unsigned int numX)
|
|||
temp.f[1] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[0];
|
||||
temp.f[2] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[1];
|
||||
temp.f[3] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[2];
|
||||
f4_volt[0][pos[0]][pos[1]][0].v *= Op->f4_vv[0][pos[0]][pos[1]][0].v;
|
||||
f4_volt[0][pos[0]][pos[1]][0].v += Op->f4_vi[0][pos[0]][pos[1]][0].v * ( f4_curr[2][pos[0]][pos[1]][0].v - f4_curr[2][pos[0]][pos[1]-shift[1]][0].v - f4_curr[1][pos[0]][pos[1]][0].v + temp.v );
|
||||
f4_volt[0][pos[0]][pos[1]][0].v *=
|
||||
Op->f4_vv[0][pos[0]][pos[1]][0].v;
|
||||
f4_volt[0][pos[0]][pos[1]][0].v +=
|
||||
Op->f4_vi[0][pos[0]][pos[1]][0].v * (
|
||||
f4_curr[2][pos[0]][pos[1] ][0].v -
|
||||
f4_curr[2][pos[0]][pos[1]-shift[1]][0].v -
|
||||
f4_curr[1][pos[0]][pos[1] ][0].v +
|
||||
temp.v
|
||||
);
|
||||
|
||||
// y-polarization
|
||||
temp.f[0] = 0;
|
||||
temp.f[1] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[0];
|
||||
temp.f[2] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[1];
|
||||
temp.f[3] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[2];
|
||||
f4_volt[1][pos[0]][pos[1]][0].v *= Op->f4_vv[1][pos[0]][pos[1]][0].v;
|
||||
f4_volt[1][pos[0]][pos[1]][0].v += Op->f4_vi[1][pos[0]][pos[1]][0].v * ( f4_curr[0][pos[0]][pos[1]][0].v - temp.v - f4_curr[2][pos[0]][pos[1]][0].v + f4_curr[2][pos[0]-shift[0]][pos[1]][0].v);
|
||||
f4_volt[1][pos[0]][pos[1]][0].v *=
|
||||
Op->f4_vv[1][pos[0]][pos[1]][0].v;
|
||||
f4_volt[1][pos[0]][pos[1]][0].v +=
|
||||
Op->f4_vi[1][pos[0]][pos[1]][0].v * (
|
||||
f4_curr[0][pos[0] ][pos[1]][0].v -
|
||||
temp.v -
|
||||
f4_curr[2][pos[0] ][pos[1]][0].v +
|
||||
f4_curr[2][pos[0]-shift[0]][pos[1]][0].v
|
||||
);
|
||||
|
||||
// z-polarization
|
||||
f4_volt[2][pos[0]][pos[1]][0].v *= Op->f4_vv[2][pos[0]][pos[1]][0].v;
|
||||
f4_volt[2][pos[0]][pos[1]][0].v += Op->f4_vi[2][pos[0]][pos[1]][0].v * ( f4_curr[1][pos[0]][pos[1]][0].v - f4_curr[1][pos[0]-shift[0]][pos[1]][0].v - f4_curr[0][pos[0]][pos[1]][0].v + f4_curr[0][pos[0]][pos[1]-shift[1]][0].v);
|
||||
f4_volt[2][pos[0]][pos[1]][0].v *=
|
||||
Op->f4_vv[2][pos[0]][pos[1]][0].v;
|
||||
f4_volt[2][pos[0]][pos[1]][0].v +=
|
||||
Op->f4_vi[2][pos[0]][pos[1]][0].v * (
|
||||
f4_curr[1][pos[0] ][pos[1] ][0].v -
|
||||
f4_curr[1][pos[0]-shift[0]][pos[1] ][0].v -
|
||||
f4_curr[0][pos[0] ][pos[1] ][0].v +
|
||||
f4_curr[0][pos[0] ][pos[1]-shift[1]][0].v
|
||||
);
|
||||
}
|
||||
++pos[0];
|
||||
}
|
||||
|
@ -141,16 +183,37 @@ void Engine_sse::UpdateCurrents(unsigned int startX, unsigned int numX)
|
|||
for (pos[2]=0; pos[2]<numVectors-1; ++pos[2])
|
||||
{
|
||||
// x-pol
|
||||
f4_curr[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii[0][pos[0]][pos[1]][pos[2]].v;
|
||||
f4_curr[0][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[0][pos[0]][pos[1]][pos[2]].v * ( f4_volt[2][pos[0]][pos[1]][pos[2]].v - f4_volt[2][pos[0]][pos[1]+1][pos[2]].v - f4_volt[1][pos[0]][pos[1]][pos[2]].v + f4_volt[1][pos[0]][pos[1]][pos[2]+1].v);
|
||||
f4_curr[0][pos[0]][pos[1]][pos[2]].v *=
|
||||
Op->f4_ii[0][pos[0]][pos[1]][pos[2]].v;
|
||||
f4_curr[0][pos[0]][pos[1]][pos[2]].v +=
|
||||
Op->f4_iv[0][pos[0]][pos[1]][pos[2]].v * (
|
||||
f4_volt[2][pos[0]][pos[1] ][pos[2] ].v -
|
||||
f4_volt[2][pos[0]][pos[1]+1][pos[2] ].v -
|
||||
f4_volt[1][pos[0]][pos[1] ][pos[2] ].v +
|
||||
f4_volt[1][pos[0]][pos[1] ][pos[2]+1].v
|
||||
);
|
||||
|
||||
// y-pol
|
||||
f4_curr[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii[1][pos[0]][pos[1]][pos[2]].v;
|
||||
f4_curr[1][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[1][pos[0]][pos[1]][pos[2]].v * ( f4_volt[0][pos[0]][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]+1].v - f4_volt[2][pos[0]][pos[1]][pos[2]].v + f4_volt[2][pos[0]+1][pos[1]][pos[2]].v);
|
||||
f4_curr[1][pos[0]][pos[1]][pos[2]].v *=
|
||||
Op->f4_ii[1][pos[0]][pos[1]][pos[2]].v;
|
||||
f4_curr[1][pos[0]][pos[1]][pos[2]].v +=
|
||||
Op->f4_iv[1][pos[0]][pos[1]][pos[2]].v * (
|
||||
f4_volt[0][pos[0] ][pos[1]][pos[2] ].v -
|
||||
f4_volt[0][pos[0] ][pos[1]][pos[2]+1].v -
|
||||
f4_volt[2][pos[0] ][pos[1]][pos[2] ].v +
|
||||
f4_volt[2][pos[0]+1][pos[1]][pos[2] ].v
|
||||
);
|
||||
|
||||
// z-pol
|
||||
f4_curr[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii[2][pos[0]][pos[1]][pos[2]].v;
|
||||
f4_curr[2][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[2][pos[0]][pos[1]][pos[2]].v * ( f4_volt[1][pos[0]][pos[1]][pos[2]].v - f4_volt[1][pos[0]+1][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]].v + f4_volt[0][pos[0]][pos[1]+1][pos[2]].v);
|
||||
f4_curr[2][pos[0]][pos[1]][pos[2]].v *=
|
||||
Op->f4_ii[2][pos[0]][pos[1]][pos[2]].v;
|
||||
f4_curr[2][pos[0]][pos[1]][pos[2]].v +=
|
||||
Op->f4_iv[2][pos[0]][pos[1]][pos[2]].v * (
|
||||
f4_volt[1][pos[0] ][pos[1] ][pos[2]].v -
|
||||
f4_volt[1][pos[0]+1][pos[1] ][pos[2]].v -
|
||||
f4_volt[0][pos[0] ][pos[1] ][pos[2]].v +
|
||||
f4_volt[0][pos[0] ][pos[1]+1][pos[2]].v
|
||||
);
|
||||
}
|
||||
|
||||
// for pos[2] = numVectors-1
|
||||
|
@ -159,20 +222,41 @@ void Engine_sse::UpdateCurrents(unsigned int startX, unsigned int numX)
|
|||
temp.f[1] = f4_volt[1][pos[0]][pos[1]][0].f[2];
|
||||
temp.f[2] = f4_volt[1][pos[0]][pos[1]][0].f[3];
|
||||
temp.f[3] = 0;
|
||||
f4_curr[0][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii[0][pos[0]][pos[1]][numVectors-1].v;
|
||||
f4_curr[0][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv[0][pos[0]][pos[1]][numVectors-1].v * ( f4_volt[2][pos[0]][pos[1]][numVectors-1].v - f4_volt[2][pos[0]][pos[1]+1][numVectors-1].v - f4_volt[1][pos[0]][pos[1]][numVectors-1].v + temp.v);
|
||||
f4_curr[0][pos[0]][pos[1]][numVectors-1].v *=
|
||||
Op->f4_ii[0][pos[0]][pos[1]][numVectors-1].v;
|
||||
f4_curr[0][pos[0]][pos[1]][numVectors-1].v +=
|
||||
Op->f4_iv[0][pos[0]][pos[1]][numVectors-1].v * (
|
||||
f4_volt[2][pos[0]][pos[1] ][numVectors-1].v -
|
||||
f4_volt[2][pos[0]][pos[1]+1][numVectors-1].v -
|
||||
f4_volt[1][pos[0]][pos[1] ][numVectors-1].v +
|
||||
temp.v
|
||||
);
|
||||
|
||||
// y-pol
|
||||
temp.f[0] = f4_volt[0][pos[0]][pos[1]][0].f[1];
|
||||
temp.f[1] = f4_volt[0][pos[0]][pos[1]][0].f[2];
|
||||
temp.f[2] = f4_volt[0][pos[0]][pos[1]][0].f[3];
|
||||
temp.f[3] = 0;
|
||||
f4_curr[1][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii[1][pos[0]][pos[1]][numVectors-1].v;
|
||||
f4_curr[1][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv[1][pos[0]][pos[1]][numVectors-1].v * ( f4_volt[0][pos[0]][pos[1]][numVectors-1].v - temp.v - f4_volt[2][pos[0]][pos[1]][numVectors-1].v + f4_volt[2][pos[0]+1][pos[1]][numVectors-1].v);
|
||||
f4_curr[1][pos[0]][pos[1]][numVectors-1].v *=
|
||||
Op->f4_ii[1][pos[0]][pos[1]][numVectors-1].v;
|
||||
f4_curr[1][pos[0]][pos[1]][numVectors-1].v +=
|
||||
Op->f4_iv[1][pos[0]][pos[1]][numVectors-1].v * (
|
||||
f4_volt[0][pos[0] ][pos[1]][numVectors-1].v -
|
||||
temp.v -
|
||||
f4_volt[2][pos[0] ][pos[1]][numVectors-1].v +
|
||||
f4_volt[2][pos[0]+1][pos[1]][numVectors-1].v
|
||||
);
|
||||
|
||||
// z-pol
|
||||
f4_curr[2][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii[2][pos[0]][pos[1]][numVectors-1].v;
|
||||
f4_curr[2][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv[2][pos[0]][pos[1]][numVectors-1].v * ( f4_volt[1][pos[0]][pos[1]][numVectors-1].v - f4_volt[1][pos[0]+1][pos[1]][numVectors-1].v - f4_volt[0][pos[0]][pos[1]][numVectors-1].v + f4_volt[0][pos[0]][pos[1]+1][numVectors-1].v);
|
||||
f4_curr[2][pos[0]][pos[1]][numVectors-1].v *=
|
||||
Op->f4_ii[2][pos[0]][pos[1]][numVectors-1].v;
|
||||
f4_curr[2][pos[0]][pos[1]][numVectors-1].v +=
|
||||
Op->f4_iv[2][pos[0]][pos[1]][numVectors-1].v * (
|
||||
f4_volt[1][pos[0] ][pos[1] ][numVectors-1].v -
|
||||
f4_volt[1][pos[0]+1][pos[1] ][numVectors-1].v -
|
||||
f4_volt[0][pos[0] ][pos[1] ][numVectors-1].v +
|
||||
f4_volt[0][pos[0] ][pos[1]+1][numVectors-1].v
|
||||
);
|
||||
}
|
||||
++pos[0];
|
||||
}
|
||||
|
|
|
@ -55,47 +55,93 @@ void Engine_SSE_Compressed::UpdateVoltages(unsigned int startX, unsigned int num
|
|||
{
|
||||
index = Op->m_Op_index[pos[0]][pos[1]][pos[2]];
|
||||
// x-polarization
|
||||
f4_volt[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv_Compressed[0][index].v;
|
||||
f4_volt[0][pos[0]][pos[1]][pos[2]].v += Op->f4_vi_Compressed[0][index].v * ( f4_curr[2][pos[0]][pos[1]][pos[2]].v - f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2]].v - f4_curr[1][pos[0]][pos[1]][pos[2]].v + f4_curr[1][pos[0]][pos[1]][pos[2]-1].v );
|
||||
f4_volt[0][pos[0]][pos[1]][pos[2]].v *=
|
||||
Op->f4_vv_Compressed[0][index].v;
|
||||
f4_volt[0][pos[0]][pos[1]][pos[2]].v +=
|
||||
Op->f4_vi_Compressed[0][index].v * (
|
||||
f4_curr[2][pos[0]][pos[1] ][pos[2] ].v -
|
||||
f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2] ].v -
|
||||
f4_curr[1][pos[0]][pos[1] ][pos[2] ].v +
|
||||
f4_curr[1][pos[0]][pos[1] ][pos[2]-1].v
|
||||
);
|
||||
|
||||
// y-polarization
|
||||
f4_volt[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv_Compressed[1][index].v;
|
||||
f4_volt[1][pos[0]][pos[1]][pos[2]].v += Op->f4_vi_Compressed[1][index].v * ( f4_curr[0][pos[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]-1].v - f4_curr[2][pos[0]][pos[1]][pos[2]].v + f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2]].v);
|
||||
f4_volt[1][pos[0]][pos[1]][pos[2]].v *=
|
||||
Op->f4_vv_Compressed[1][index].v;
|
||||
f4_volt[1][pos[0]][pos[1]][pos[2]].v +=
|
||||
Op->f4_vi_Compressed[1][index].v * (
|
||||
f4_curr[0][pos[0] ][pos[1]][pos[2] ].v -
|
||||
f4_curr[0][pos[0] ][pos[1]][pos[2]-1].v -
|
||||
f4_curr[2][pos[0] ][pos[1]][pos[2] ].v +
|
||||
f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2] ].v
|
||||
);
|
||||
|
||||
// z-polarization
|
||||
f4_volt[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv_Compressed[2][index].v;
|
||||
f4_volt[2][pos[0]][pos[1]][pos[2]].v += Op->f4_vi_Compressed[2][index].v * ( f4_curr[1][pos[0]][pos[1]][pos[2]].v - f4_curr[1][pos[0]-shift[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]].v + f4_curr[0][pos[0]][pos[1]-shift[1]][pos[2]].v);
|
||||
f4_volt[2][pos[0]][pos[1]][pos[2]].v *=
|
||||
Op->f4_vv_Compressed[2][index].v;
|
||||
f4_volt[2][pos[0]][pos[1]][pos[2]].v +=
|
||||
Op->f4_vi_Compressed[2][index].v * (
|
||||
f4_curr[1][pos[0] ][pos[1]] [pos[2]].v -
|
||||
f4_curr[1][pos[0]-shift[0]][pos[1]] [pos[2]].v -
|
||||
f4_curr[0][pos[0] ][pos[1]] [pos[2]].v +
|
||||
f4_curr[0][pos[0] ][pos[1]-shift[1]][pos[2]].v
|
||||
);
|
||||
}
|
||||
|
||||
// for pos[2] = 0
|
||||
// x-polarization
|
||||
index = Op->m_Op_index[pos[0]][pos[1]][0];
|
||||
#ifdef __SSE2__
|
||||
temp.v = (__m128)_mm_slli_si128( (__m128i)f4_curr[1][pos[0]][pos[1]][numVectors-1].v, 4 );
|
||||
temp.v = (__m128)_mm_slli_si128(
|
||||
(__m128i)f4_curr[1][pos[0]][pos[1]][numVectors-1].v, 4
|
||||
);
|
||||
#else
|
||||
temp.f[0] = 0;
|
||||
temp.f[1] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[0];
|
||||
temp.f[2] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[1];
|
||||
temp.f[3] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[2];
|
||||
#endif
|
||||
f4_volt[0][pos[0]][pos[1]][0].v *= Op->f4_vv_Compressed[0][index].v;
|
||||
f4_volt[0][pos[0]][pos[1]][0].v += Op->f4_vi_Compressed[0][index].v * ( f4_curr[2][pos[0]][pos[1]][0].v - f4_curr[2][pos[0]][pos[1]-shift[1]][0].v - f4_curr[1][pos[0]][pos[1]][0].v + temp.v );
|
||||
f4_volt[0][pos[0]][pos[1]][0].v *=
|
||||
Op->f4_vv_Compressed[0][index].v;
|
||||
f4_volt[0][pos[0]][pos[1]][0].v +=
|
||||
Op->f4_vi_Compressed[0][index].v * (
|
||||
f4_curr[2][pos[0]][pos[1] ][0].v -
|
||||
f4_curr[2][pos[0]][pos[1]-shift[1]][0].v -
|
||||
f4_curr[1][pos[0]][pos[1] ][0].v +
|
||||
temp.v
|
||||
);
|
||||
|
||||
// y-polarization
|
||||
#ifdef __SSE2__
|
||||
temp.v = (__m128)_mm_slli_si128( (__m128i)f4_curr[0][pos[0]][pos[1]][numVectors-1].v, 4 );
|
||||
temp.v = (__m128)_mm_slli_si128(
|
||||
(__m128i)f4_curr[0][pos[0]][pos[1]][numVectors-1].v, 4
|
||||
);
|
||||
#else
|
||||
temp.f[0] = 0;
|
||||
temp.f[1] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[0];
|
||||
temp.f[2] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[1];
|
||||
temp.f[3] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[2];
|
||||
#endif
|
||||
f4_volt[1][pos[0]][pos[1]][0].v *= Op->f4_vv_Compressed[1][index].v;
|
||||
f4_volt[1][pos[0]][pos[1]][0].v += Op->f4_vi_Compressed[1][index].v * ( f4_curr[0][pos[0]][pos[1]][0].v - temp.v - f4_curr[2][pos[0]][pos[1]][0].v + f4_curr[2][pos[0]-shift[0]][pos[1]][0].v);
|
||||
f4_volt[1][pos[0]][pos[1]][0].v *=
|
||||
Op->f4_vv_Compressed[1][index].v;
|
||||
f4_volt[1][pos[0]][pos[1]][0].v +=
|
||||
Op->f4_vi_Compressed[1][index].v * (
|
||||
f4_curr[0][pos[0] ][pos[1]][0].v -
|
||||
temp.v -
|
||||
f4_curr[2][pos[0] ][pos[1]][0].v +
|
||||
f4_curr[2][pos[0]-shift[0]][pos[1]][0].v
|
||||
);
|
||||
|
||||
// z-polarization
|
||||
f4_volt[2][pos[0]][pos[1]][0].v *= Op->f4_vv_Compressed[2][index].v;
|
||||
f4_volt[2][pos[0]][pos[1]][0].v += Op->f4_vi_Compressed[2][index].v * ( f4_curr[1][pos[0]][pos[1]][0].v - f4_curr[1][pos[0]-shift[0]][pos[1]][0].v - f4_curr[0][pos[0]][pos[1]][0].v + f4_curr[0][pos[0]][pos[1]-shift[1]][0].v);
|
||||
f4_volt[2][pos[0]][pos[1]][0].v *=
|
||||
Op->f4_vv_Compressed[2][index].v;
|
||||
f4_volt[2][pos[0]][pos[1]][0].v +=
|
||||
Op->f4_vi_Compressed[2][index].v * (
|
||||
f4_curr[1][pos[0] ][pos[1] ][0].v -
|
||||
f4_curr[1][pos[0]-shift[0]][pos[1] ][0].v -
|
||||
f4_curr[0][pos[0] ][pos[1] ][0].v +
|
||||
f4_curr[0][pos[0] ][pos[1]-shift[1]][0].v
|
||||
);
|
||||
}
|
||||
++pos[0];
|
||||
}
|
||||
|
@ -116,47 +162,93 @@ void Engine_SSE_Compressed::UpdateCurrents(unsigned int startX, unsigned int num
|
|||
{
|
||||
index = Op->m_Op_index[pos[0]][pos[1]][pos[2]];
|
||||
// x-pol
|
||||
f4_curr[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii_Compressed[0][index].v;
|
||||
f4_curr[0][pos[0]][pos[1]][pos[2]].v += Op->f4_iv_Compressed[0][index].v * ( f4_volt[2][pos[0]][pos[1]][pos[2]].v - f4_volt[2][pos[0]][pos[1]+1][pos[2]].v - f4_volt[1][pos[0]][pos[1]][pos[2]].v + f4_volt[1][pos[0]][pos[1]][pos[2]+1].v);
|
||||
f4_curr[0][pos[0]][pos[1]][pos[2]].v *=
|
||||
Op->f4_ii_Compressed[0][index].v;
|
||||
f4_curr[0][pos[0]][pos[1]][pos[2]].v +=
|
||||
Op->f4_iv_Compressed[0][index].v * (
|
||||
f4_volt[2][pos[0]][pos[1] ][pos[2] ].v -
|
||||
f4_volt[2][pos[0]][pos[1]+1][pos[2] ].v -
|
||||
f4_volt[1][pos[0]][pos[1] ][pos[2] ].v +
|
||||
f4_volt[1][pos[0]][pos[1] ][pos[2]+1].v
|
||||
);
|
||||
|
||||
// y-pol
|
||||
f4_curr[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii_Compressed[1][index].v;
|
||||
f4_curr[1][pos[0]][pos[1]][pos[2]].v += Op->f4_iv_Compressed[1][index].v * ( f4_volt[0][pos[0]][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]+1].v - f4_volt[2][pos[0]][pos[1]][pos[2]].v + f4_volt[2][pos[0]+1][pos[1]][pos[2]].v);
|
||||
f4_curr[1][pos[0]][pos[1]][pos[2]].v *=
|
||||
Op->f4_ii_Compressed[1][index].v;
|
||||
f4_curr[1][pos[0]][pos[1]][pos[2]].v +=
|
||||
Op->f4_iv_Compressed[1][index].v * (
|
||||
f4_volt[0][pos[0] ][pos[1]][pos[2] ].v -
|
||||
f4_volt[0][pos[0] ][pos[1]][pos[2]+1].v -
|
||||
f4_volt[2][pos[0] ][pos[1]][pos[2] ].v +
|
||||
f4_volt[2][pos[0]+1][pos[1]][pos[2] ].v
|
||||
);
|
||||
|
||||
// z-pol
|
||||
f4_curr[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii_Compressed[2][index].v;
|
||||
f4_curr[2][pos[0]][pos[1]][pos[2]].v += Op->f4_iv_Compressed[2][index].v * ( f4_volt[1][pos[0]][pos[1]][pos[2]].v - f4_volt[1][pos[0]+1][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]].v + f4_volt[0][pos[0]][pos[1]+1][pos[2]].v);
|
||||
f4_curr[2][pos[0]][pos[1]][pos[2]].v *=
|
||||
Op->f4_ii_Compressed[2][index].v;
|
||||
f4_curr[2][pos[0]][pos[1]][pos[2]].v +=
|
||||
Op->f4_iv_Compressed[2][index].v * (
|
||||
f4_volt[1][pos[0] ][pos[1] ][pos[2]].v -
|
||||
f4_volt[1][pos[0]+1][pos[1] ][pos[2]].v -
|
||||
f4_volt[0][pos[0] ][pos[1] ][pos[2]].v +
|
||||
f4_volt[0][pos[0] ][pos[1]+1][pos[2]].v
|
||||
);
|
||||
}
|
||||
|
||||
index = Op->m_Op_index[pos[0]][pos[1]][numVectors-1];
|
||||
// for pos[2] = numVectors-1
|
||||
// x-pol
|
||||
#ifdef __SSE2__
|
||||
temp.v = (__m128)_mm_srli_si128( (__m128i)f4_volt[1][pos[0]][pos[1]][0].v, 4 );
|
||||
temp.v = (__m128)_mm_srli_si128(
|
||||
(__m128i)f4_volt[1][pos[0]][pos[1]][0].v, 4
|
||||
);
|
||||
#else
|
||||
temp.f[0] = f4_volt[1][pos[0]][pos[1]][0].f[1];
|
||||
temp.f[1] = f4_volt[1][pos[0]][pos[1]][0].f[2];
|
||||
temp.f[2] = f4_volt[1][pos[0]][pos[1]][0].f[3];
|
||||
temp.f[3] = 0;
|
||||
#endif
|
||||
f4_curr[0][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii_Compressed[0][index].v;
|
||||
f4_curr[0][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv_Compressed[0][index].v * ( f4_volt[2][pos[0]][pos[1]][numVectors-1].v - f4_volt[2][pos[0]][pos[1]+1][numVectors-1].v - f4_volt[1][pos[0]][pos[1]][numVectors-1].v + temp.v);
|
||||
f4_curr[0][pos[0]][pos[1]][numVectors-1].v *=
|
||||
Op->f4_ii_Compressed[0][index].v;
|
||||
f4_curr[0][pos[0]][pos[1]][numVectors-1].v +=
|
||||
Op->f4_iv_Compressed[0][index].v * (
|
||||
f4_volt[2][pos[0]][pos[1] ][numVectors-1].v -
|
||||
f4_volt[2][pos[0]][pos[1]+1][numVectors-1].v -
|
||||
f4_volt[1][pos[0]][pos[1] ][numVectors-1].v +
|
||||
temp.v
|
||||
);
|
||||
|
||||
// y-pol
|
||||
#ifdef __SSE2__
|
||||
temp.v = (__m128)_mm_srli_si128( (__m128i)f4_volt[0][pos[0]][pos[1]][0].v, 4 );
|
||||
temp.v = (__m128)_mm_srli_si128(
|
||||
(__m128i)f4_volt[0][pos[0]][pos[1]][0].v, 4
|
||||
);
|
||||
#else
|
||||
temp.f[0] = f4_volt[0][pos[0]][pos[1]][0].f[1];
|
||||
temp.f[1] = f4_volt[0][pos[0]][pos[1]][0].f[2];
|
||||
temp.f[2] = f4_volt[0][pos[0]][pos[1]][0].f[3];
|
||||
temp.f[3] = 0;
|
||||
#endif
|
||||
f4_curr[1][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii_Compressed[1][index].v;
|
||||
f4_curr[1][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv_Compressed[1][index].v * ( f4_volt[0][pos[0]][pos[1]][numVectors-1].v - temp.v - f4_volt[2][pos[0]][pos[1]][numVectors-1].v + f4_volt[2][pos[0]+1][pos[1]][numVectors-1].v);
|
||||
f4_curr[1][pos[0]][pos[1]][numVectors-1].v *=
|
||||
Op->f4_ii_Compressed[1][index].v;
|
||||
f4_curr[1][pos[0]][pos[1]][numVectors-1].v +=
|
||||
Op->f4_iv_Compressed[1][index].v * (
|
||||
f4_volt[0][pos[0] ][pos[1]][numVectors-1].v -
|
||||
temp.v -
|
||||
f4_volt[2][pos[0] ][pos[1]][numVectors-1].v +
|
||||
f4_volt[2][pos[0]+1][pos[1]][numVectors-1].v
|
||||
);
|
||||
|
||||
// z-pol
|
||||
f4_curr[2][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii_Compressed[2][index].v;
|
||||
f4_curr[2][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv_Compressed[2][index].v * ( f4_volt[1][pos[0]][pos[1]][numVectors-1].v - f4_volt[1][pos[0]+1][pos[1]][numVectors-1].v - f4_volt[0][pos[0]][pos[1]][numVectors-1].v + f4_volt[0][pos[0]][pos[1]+1][numVectors-1].v);
|
||||
f4_curr[2][pos[0]][pos[1]][numVectors-1].v *=
|
||||
Op->f4_ii_Compressed[2][index].v;
|
||||
f4_curr[2][pos[0]][pos[1]][numVectors-1].v +=
|
||||
Op->f4_iv_Compressed[2][index].v * (
|
||||
f4_volt[1][pos[0] ][pos[1] ][numVectors-1].v -
|
||||
f4_volt[1][pos[0]+1][pos[1] ][numVectors-1].v -
|
||||
f4_volt[0][pos[0] ][pos[1] ][numVectors-1].v +
|
||||
f4_volt[0][pos[0] ][pos[1]+1][numVectors-1].v
|
||||
);
|
||||
}
|
||||
++pos[0];
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue