|
|
|
@ -33,6 +33,7 @@ Engine_sse::Engine_sse(const Operator_sse* op) : Engine(op)
|
|
|
|
|
{
|
|
|
|
|
numLines[n] = Op->GetNumberOfLines(n);
|
|
|
|
|
}
|
|
|
|
|
numVectors = ceil((double)numLines[2]/4.0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Engine_sse::~Engine_sse()
|
|
|
|
@ -63,34 +64,49 @@ void Engine_sse::UpdateVoltages()
|
|
|
|
|
bool shift[2];
|
|
|
|
|
f4vector temp;
|
|
|
|
|
|
|
|
|
|
unsigned int maxZ = numVectors;
|
|
|
|
|
|
|
|
|
|
for (pos[0]=0;pos[0]<numLines[0];++pos[0])
|
|
|
|
|
{
|
|
|
|
|
shift[0]=pos[0];
|
|
|
|
|
for (pos[1]=0;pos[1]<numLines[1];++pos[1])
|
|
|
|
|
{
|
|
|
|
|
shift[1]=pos[1];
|
|
|
|
|
for (pos[2]=0;pos[2]<ceil(numLines[2]/4);++pos[2])
|
|
|
|
|
for (pos[2]=1;pos[2]<maxZ;++pos[2])
|
|
|
|
|
{
|
|
|
|
|
// x-polarization
|
|
|
|
|
temp.f[0] = f4_curr[1][pos[0]][pos[1]][pos[2]-(bool)pos[2]].f[3];
|
|
|
|
|
temp.f[1] = f4_curr[1][pos[0]][pos[1]][pos[2]].f[0];
|
|
|
|
|
temp.f[2] = f4_curr[1][pos[0]][pos[1]][pos[2]].f[1];
|
|
|
|
|
temp.f[3] = f4_curr[1][pos[0]][pos[1]][pos[2]].f[2];
|
|
|
|
|
f4_volt[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv[0][pos[0]][pos[1]][pos[2]].v;
|
|
|
|
|
f4_volt[0][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[0][pos[0]][pos[1]][pos[2]].v * ( f4_curr[2][pos[0]][pos[1]][pos[2]].v - f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2]].v - f4_curr[1][pos[0]][pos[1]][pos[2]].v + temp.v );
|
|
|
|
|
f4_volt[0][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[0][pos[0]][pos[1]][pos[2]].v * ( f4_curr[2][pos[0]][pos[1]][pos[2]].v - f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2]].v - f4_curr[1][pos[0]][pos[1]][pos[2]].v + f4_curr[1][pos[0]][pos[1]][pos[2]-1].v );
|
|
|
|
|
|
|
|
|
|
// y-polarization
|
|
|
|
|
temp.f[0] = f4_curr[0][pos[0]][pos[1]][pos[2]-(bool)pos[2]].f[3];
|
|
|
|
|
temp.f[1] = f4_curr[0][pos[0]][pos[1]][pos[2]].f[0];
|
|
|
|
|
temp.f[2] = f4_curr[0][pos[0]][pos[1]][pos[2]].f[1];
|
|
|
|
|
temp.f[3] = f4_curr[0][pos[0]][pos[1]][pos[2]].f[2];
|
|
|
|
|
f4_volt[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv[1][pos[0]][pos[1]][pos[2]].v;
|
|
|
|
|
f4_volt[1][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[1][pos[0]][pos[1]][pos[2]].v * ( f4_curr[0][pos[0]][pos[1]][pos[2]].v - temp.v - f4_curr[2][pos[0]][pos[1]][pos[2]].v + f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2]].v);
|
|
|
|
|
f4_volt[1][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[1][pos[0]][pos[1]][pos[2]].v * ( f4_curr[0][pos[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]-1].v - f4_curr[2][pos[0]][pos[1]][pos[2]].v + f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2]].v);
|
|
|
|
|
|
|
|
|
|
// z-polarization
|
|
|
|
|
f4_volt[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv[2][pos[0]][pos[1]][pos[2]].v;
|
|
|
|
|
f4_volt[2][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[2][pos[0]][pos[1]][pos[2]].v * ( f4_curr[1][pos[0]][pos[1]][pos[2]].v - f4_curr[1][pos[0]-shift[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]].v + f4_curr[0][pos[0]][pos[1]-shift[1]][pos[2]].v);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// for pos[2] = 0
|
|
|
|
|
// x-polarization
|
|
|
|
|
temp.f[0] = 0;
|
|
|
|
|
temp.f[1] = f4_curr[1][pos[0]][pos[1]][maxZ-1].f[0];
|
|
|
|
|
temp.f[2] = f4_curr[1][pos[0]][pos[1]][maxZ-1].f[1];
|
|
|
|
|
temp.f[3] = f4_curr[1][pos[0]][pos[1]][maxZ-1].f[2];
|
|
|
|
|
f4_volt[0][pos[0]][pos[1]][0].v *= Op->f4_vv[0][pos[0]][pos[1]][0].v;
|
|
|
|
|
f4_volt[0][pos[0]][pos[1]][0].v += Op->f4_vi[0][pos[0]][pos[1]][0].v * ( f4_curr[2][pos[0]][pos[1]][0].v - f4_curr[2][pos[0]][pos[1]-shift[1]][0].v - f4_curr[1][pos[0]][pos[1]][0].v + temp.v );
|
|
|
|
|
|
|
|
|
|
// y-polarization
|
|
|
|
|
temp.f[0] = 0;
|
|
|
|
|
temp.f[1] = f4_curr[0][pos[0]][pos[1]][maxZ-1].f[0];
|
|
|
|
|
temp.f[2] = f4_curr[0][pos[0]][pos[1]][maxZ-1].f[1];
|
|
|
|
|
temp.f[3] = f4_curr[0][pos[0]][pos[1]][maxZ-1].f[2];
|
|
|
|
|
f4_volt[1][pos[0]][pos[1]][0].v *= Op->f4_vv[1][pos[0]][pos[1]][0].v;
|
|
|
|
|
f4_volt[1][pos[0]][pos[1]][0].v += Op->f4_vi[1][pos[0]][pos[1]][0].v * ( f4_curr[0][pos[0]][pos[1]][0].v - temp.v - f4_curr[2][pos[0]][pos[1]][0].v + f4_curr[2][pos[0]-shift[0]][pos[1]][0].v);
|
|
|
|
|
|
|
|
|
|
// z-polarization
|
|
|
|
|
f4_volt[2][pos[0]][pos[1]][0].v *= Op->f4_vv[2][pos[0]][pos[1]][0].v;
|
|
|
|
|
f4_volt[2][pos[0]][pos[1]][0].v += Op->f4_vi[2][pos[0]][pos[1]][0].v * ( f4_curr[1][pos[0]][pos[1]][0].v - f4_curr[1][pos[0]-shift[0]][pos[1]][0].v - f4_curr[0][pos[0]][pos[1]][0].v + f4_curr[0][pos[0]][pos[1]-shift[1]][0].v);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
@ -100,32 +116,47 @@ void Engine_sse::UpdateCurrents()
|
|
|
|
|
unsigned int pos[5];
|
|
|
|
|
f4vector temp;
|
|
|
|
|
|
|
|
|
|
unsigned int maxZ = numVectors;
|
|
|
|
|
|
|
|
|
|
for (pos[0]=0;pos[0]<numLines[0]-1;++pos[0])
|
|
|
|
|
{
|
|
|
|
|
for (pos[1]=0;pos[1]<numLines[1]-1;++pos[1])
|
|
|
|
|
{
|
|
|
|
|
for (pos[2]=0;pos[2]<ceil(numLines[2]/4);++pos[2]) // FIXME is this correct?
|
|
|
|
|
for (pos[2]=0;pos[2]<maxZ-1;++pos[2])
|
|
|
|
|
{
|
|
|
|
|
// x-pol
|
|
|
|
|
temp.f[0] = f4_volt[1][pos[0]][pos[1]][pos[2]].f[1];
|
|
|
|
|
temp.f[1] = f4_volt[1][pos[0]][pos[1]][pos[2]].f[2];
|
|
|
|
|
temp.f[2] = f4_volt[1][pos[0]][pos[1]][pos[2]].f[3];
|
|
|
|
|
temp.f[3] = f4_volt[1][pos[0]][pos[1]][pos[2]+1].f[0]; // FIXME outside sim area
|
|
|
|
|
f4_curr[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii[0][pos[0]][pos[1]][pos[2]].v;
|
|
|
|
|
f4_curr[0][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[0][pos[0]][pos[1]][pos[2]].v * ( f4_volt[2][pos[0]][pos[1]][pos[2]].v - f4_volt[2][pos[0]][pos[1]+1][pos[2]].v - f4_volt[1][pos[0]][pos[1]][pos[2]].v + temp.v);
|
|
|
|
|
f4_curr[0][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[0][pos[0]][pos[1]][pos[2]].v * ( f4_volt[2][pos[0]][pos[1]][pos[2]].v - f4_volt[2][pos[0]][pos[1]+1][pos[2]].v - f4_volt[1][pos[0]][pos[1]][pos[2]].v + f4_volt[1][pos[0]][pos[1]][pos[2]+1].v);
|
|
|
|
|
|
|
|
|
|
// y-pol
|
|
|
|
|
temp.f[0] = f4_volt[0][pos[0]][pos[1]][pos[2]].f[1];
|
|
|
|
|
temp.f[1] = f4_volt[0][pos[0]][pos[1]][pos[2]].f[2];
|
|
|
|
|
temp.f[2] = f4_volt[0][pos[0]][pos[1]][pos[2]].f[3];
|
|
|
|
|
temp.f[3] = f4_volt[0][pos[0]][pos[1]][pos[2]+1].f[0]; // FIXME outside sim area
|
|
|
|
|
f4_curr[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii[1][pos[0]][pos[1]][pos[2]].v;
|
|
|
|
|
f4_curr[1][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[1][pos[0]][pos[1]][pos[2]].v * ( f4_volt[0][pos[0]][pos[1]][pos[2]].v - temp.v - f4_volt[2][pos[0]][pos[1]][pos[2]].v + f4_volt[2][pos[0]+1][pos[1]][pos[2]].v);
|
|
|
|
|
f4_curr[1][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[1][pos[0]][pos[1]][pos[2]].v * ( f4_volt[0][pos[0]][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]+1].v - f4_volt[2][pos[0]][pos[1]][pos[2]].v + f4_volt[2][pos[0]+1][pos[1]][pos[2]].v);
|
|
|
|
|
|
|
|
|
|
// z-pol
|
|
|
|
|
f4_curr[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii[2][pos[0]][pos[1]][pos[2]].v;
|
|
|
|
|
f4_curr[2][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[2][pos[0]][pos[1]][pos[2]].v * ( f4_volt[1][pos[0]][pos[1]][pos[2]].v - f4_volt[1][pos[0]+1][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]].v + f4_volt[0][pos[0]][pos[1]+1][pos[2]].v);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// for pos[2] = maxZ-1
|
|
|
|
|
// x-pol
|
|
|
|
|
temp.f[0] = f4_volt[1][pos[0]][pos[1]][0].f[1];
|
|
|
|
|
temp.f[1] = f4_volt[1][pos[0]][pos[1]][0].f[2];
|
|
|
|
|
temp.f[2] = f4_volt[1][pos[0]][pos[1]][0].f[3];
|
|
|
|
|
temp.f[3] = 0;
|
|
|
|
|
f4_curr[0][pos[0]][pos[1]][maxZ-1].v *= Op->f4_ii[0][pos[0]][pos[1]][maxZ-1].v;
|
|
|
|
|
f4_curr[0][pos[0]][pos[1]][maxZ-1].v += Op->f4_iv[0][pos[0]][pos[1]][maxZ-1].v * ( f4_volt[2][pos[0]][pos[1]][maxZ-1].v - f4_volt[2][pos[0]][pos[1]+1][maxZ-1].v - f4_volt[1][pos[0]][pos[1]][maxZ-1].v + temp.v);
|
|
|
|
|
|
|
|
|
|
// y-pol
|
|
|
|
|
temp.f[0] = f4_volt[0][pos[0]][pos[1]][0].f[1];
|
|
|
|
|
temp.f[1] = f4_volt[0][pos[0]][pos[1]][0].f[2];
|
|
|
|
|
temp.f[2] = f4_volt[0][pos[0]][pos[1]][0].f[3];
|
|
|
|
|
temp.f[3] = 0;
|
|
|
|
|
f4_curr[1][pos[0]][pos[1]][maxZ-1].v *= Op->f4_ii[1][pos[0]][pos[1]][maxZ-1].v;
|
|
|
|
|
f4_curr[1][pos[0]][pos[1]][maxZ-1].v += Op->f4_iv[1][pos[0]][pos[1]][maxZ-1].v * ( f4_volt[0][pos[0]][pos[1]][maxZ-1].v - temp.v - f4_volt[2][pos[0]][pos[1]][maxZ-1].v + f4_volt[2][pos[0]+1][pos[1]][maxZ-1].v);
|
|
|
|
|
|
|
|
|
|
// z-pol
|
|
|
|
|
f4_curr[2][pos[0]][pos[1]][maxZ-1].v *= Op->f4_ii[2][pos[0]][pos[1]][maxZ-1].v;
|
|
|
|
|
f4_curr[2][pos[0]][pos[1]][maxZ-1].v += Op->f4_iv[2][pos[0]][pos[1]][maxZ-1].v * ( f4_volt[1][pos[0]][pos[1]][maxZ-1].v - f4_volt[1][pos[0]+1][pos[1]][maxZ-1].v - f4_volt[0][pos[0]][pos[1]][maxZ-1].v + f4_volt[0][pos[0]][pos[1]+1][maxZ-1].v);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|