upml extension: use inline engine access methods

todo: further speed optimization for this extension...

Signed-off-by: Thorsten Liebig <Thorsten.Liebig@gmx.de>
pull/1/head
Thorsten Liebig 2010-10-05 11:50:30 +02:00
parent 6c00a844d2
commit ae281d3dea
1 changed files with 302 additions and 55 deletions

View File

@ -53,7 +53,67 @@ void Engine_Ext_UPML::DoPreVoltageUpdates()
unsigned int pos[3]; unsigned int pos[3];
unsigned int loc_pos[3]; unsigned int loc_pos[3];
switch (m_Eng->GetType())
{
case Engine::BASIC:
{
for (loc_pos[0]=0;loc_pos[0]<m_Op_UPML->m_numLines[0];++loc_pos[0])
{
pos[0] = loc_pos[0] + m_Op_UPML->m_StartPos[0];
for (loc_pos[1]=0;loc_pos[1]<m_Op_UPML->m_numLines[1];++loc_pos[1])
{
pos[1] = loc_pos[1] + m_Op_UPML->m_StartPos[1];
for (loc_pos[2]=0;loc_pos[2]<m_Op_UPML->m_numLines[2];++loc_pos[2])
{
pos[2] = loc_pos[2] + m_Op_UPML->m_StartPos[2];
volt[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Op_UPML->vv[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] * m_Eng->Engine::GetVolt(0,pos)
- m_Op_UPML->vvfo[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] * volt_flux[0][loc_pos[0]][loc_pos[1]][loc_pos[2]];
m_Eng->Engine::SetVolt(0,pos, volt_flux[0][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
volt[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Op_UPML->vv[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] * m_Eng->Engine::GetVolt(1,pos)
- m_Op_UPML->vvfo[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] * volt_flux[1][loc_pos[0]][loc_pos[1]][loc_pos[2]];
m_Eng->Engine::SetVolt(1,pos, volt_flux[1][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
volt[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Op_UPML->vv[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] * m_Eng->Engine::GetVolt(2,pos)
- m_Op_UPML->vvfo[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] * volt_flux[2][loc_pos[0]][loc_pos[1]][loc_pos[2]];
m_Eng->Engine::SetVolt(2,pos, volt_flux[2][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
}
}
}
break;
}
case Engine::SSE:
{
Engine_sse* eng_sse = (Engine_sse*) m_Eng;
for (loc_pos[0]=0;loc_pos[0]<m_Op_UPML->m_numLines[0];++loc_pos[0])
{
pos[0] = loc_pos[0] + m_Op_UPML->m_StartPos[0];
for (loc_pos[1]=0;loc_pos[1]<m_Op_UPML->m_numLines[1];++loc_pos[1])
{
pos[1] = loc_pos[1] + m_Op_UPML->m_StartPos[1];
for (loc_pos[2]=0;loc_pos[2]<m_Op_UPML->m_numLines[2];++loc_pos[2])
{
pos[2] = loc_pos[2] + m_Op_UPML->m_StartPos[2];
volt[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Op_UPML->vv[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] * eng_sse->Engine_sse::GetVolt(0,pos)
- m_Op_UPML->vvfo[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] * volt_flux[0][loc_pos[0]][loc_pos[1]][loc_pos[2]];
eng_sse->Engine_sse::SetVolt(0,pos, volt_flux[0][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
volt[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Op_UPML->vv[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] * eng_sse->Engine_sse::GetVolt(1,pos)
- m_Op_UPML->vvfo[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] * volt_flux[1][loc_pos[0]][loc_pos[1]][loc_pos[2]];
eng_sse->Engine_sse::SetVolt(1,pos, volt_flux[1][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
volt[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Op_UPML->vv[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] * eng_sse->Engine_sse::GetVolt(2,pos)
- m_Op_UPML->vvfo[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] * volt_flux[2][loc_pos[0]][loc_pos[1]][loc_pos[2]];
eng_sse->Engine_sse::SetVolt(2,pos, volt_flux[2][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
}
}
}
break;
}
default:
{
for (loc_pos[0]=0;loc_pos[0]<m_Op_UPML->m_numLines[0];++loc_pos[0]) for (loc_pos[0]=0;loc_pos[0]<m_Op_UPML->m_numLines[0];++loc_pos[0])
{ {
pos[0] = loc_pos[0] + m_Op_UPML->m_StartPos[0]; pos[0] = loc_pos[0] + m_Op_UPML->m_StartPos[0];
@ -77,6 +137,10 @@ void Engine_Ext_UPML::DoPreVoltageUpdates()
} }
} }
} }
break;
}
}
} }
void Engine_Ext_UPML::DoPostVoltageUpdates() void Engine_Ext_UPML::DoPostVoltageUpdates()
@ -87,6 +151,61 @@ void Engine_Ext_UPML::DoPostVoltageUpdates()
unsigned int pos[3]; unsigned int pos[3];
unsigned int loc_pos[3]; unsigned int loc_pos[3];
switch (m_Eng->GetType())
{
case Engine::BASIC:
{
for (loc_pos[0]=0;loc_pos[0]<m_Op_UPML->m_numLines[0];++loc_pos[0])
{
pos[0] = loc_pos[0] + m_Op_UPML->m_StartPos[0];
for (loc_pos[1]=0;loc_pos[1]<m_Op_UPML->m_numLines[1];++loc_pos[1])
{
pos[1] = loc_pos[1] + m_Op_UPML->m_StartPos[1];
for (loc_pos[2]=0;loc_pos[2]<m_Op_UPML->m_numLines[2];++loc_pos[2])
{
pos[2] = loc_pos[2] + m_Op_UPML->m_StartPos[2];
volt_flux[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Eng->Engine::GetVolt(0,pos);
m_Eng->Engine::SetVolt(0,pos, volt[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] + m_Op_UPML->vvfn[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] * volt_flux[0][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
volt_flux[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Eng->Engine::GetVolt(1,pos);
m_Eng->Engine::SetVolt(1,pos, volt[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] + m_Op_UPML->vvfn[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] * volt_flux[1][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
volt_flux[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Eng->Engine::GetVolt(2,pos);
m_Eng->Engine::SetVolt(2,pos, volt[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] + m_Op_UPML->vvfn[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] * volt_flux[2][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
}
}
}
break;
}
case Engine::SSE:
{
Engine_sse* eng_sse = (Engine_sse*) m_Eng;
for (loc_pos[0]=0;loc_pos[0]<m_Op_UPML->m_numLines[0];++loc_pos[0])
{
pos[0] = loc_pos[0] + m_Op_UPML->m_StartPos[0];
for (loc_pos[1]=0;loc_pos[1]<m_Op_UPML->m_numLines[1];++loc_pos[1])
{
pos[1] = loc_pos[1] + m_Op_UPML->m_StartPos[1];
for (loc_pos[2]=0;loc_pos[2]<m_Op_UPML->m_numLines[2];++loc_pos[2])
{
pos[2] = loc_pos[2] + m_Op_UPML->m_StartPos[2];
volt_flux[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] = eng_sse->Engine_sse::GetVolt(0,pos);
eng_sse->Engine_sse::SetVolt(0,pos, volt[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] + m_Op_UPML->vvfn[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] * volt_flux[0][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
volt_flux[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] = eng_sse->Engine_sse::GetVolt(1,pos);
eng_sse->Engine_sse::SetVolt(1,pos, volt[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] + m_Op_UPML->vvfn[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] * volt_flux[1][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
volt_flux[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] = eng_sse->Engine_sse::GetVolt(2,pos);
eng_sse->Engine_sse::SetVolt(2,pos, volt[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] + m_Op_UPML->vvfn[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] * volt_flux[2][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
}
}
}
break;
}
default:
{
for (loc_pos[0]=0;loc_pos[0]<m_Op_UPML->m_numLines[0];++loc_pos[0]) for (loc_pos[0]=0;loc_pos[0]<m_Op_UPML->m_numLines[0];++loc_pos[0])
{ {
pos[0] = loc_pos[0] + m_Op_UPML->m_StartPos[0]; pos[0] = loc_pos[0] + m_Op_UPML->m_StartPos[0];
@ -107,6 +226,10 @@ void Engine_Ext_UPML::DoPostVoltageUpdates()
} }
} }
} }
break;
}
}
} }
void Engine_Ext_UPML::DoPreCurrentUpdates() void Engine_Ext_UPML::DoPreCurrentUpdates()
@ -117,6 +240,69 @@ void Engine_Ext_UPML::DoPreCurrentUpdates()
unsigned int pos[3]; unsigned int pos[3];
unsigned int loc_pos[3]; unsigned int loc_pos[3];
switch (m_Eng->GetType())
{
case Engine::BASIC:
{
for (loc_pos[0]=0;loc_pos[0]<m_Op_UPML->m_numLines[0];++loc_pos[0])
{
pos[0] = loc_pos[0] + m_Op_UPML->m_StartPos[0];
for (loc_pos[1]=0;loc_pos[1]<m_Op_UPML->m_numLines[1];++loc_pos[1])
{
pos[1] = loc_pos[1] + m_Op_UPML->m_StartPos[1];
for (loc_pos[2]=0;loc_pos[2]<m_Op_UPML->m_numLines[2];++loc_pos[2])
{
pos[2] = loc_pos[2] + m_Op_UPML->m_StartPos[2];
curr[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Op_UPML->ii[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] * m_Eng->Engine::GetCurr(0,pos)
- m_Op_UPML->iifo[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] * curr_flux[0][loc_pos[0]][loc_pos[1]][loc_pos[2]];
m_Eng->Engine::SetCurr(0,pos, curr_flux[0][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
curr[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Op_UPML->ii[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] * m_Eng->Engine::GetCurr(1,pos)
- m_Op_UPML->iifo[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] * curr_flux[1][loc_pos[0]][loc_pos[1]][loc_pos[2]];
m_Eng->Engine::SetCurr(1,pos, curr_flux[1][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
curr[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Op_UPML->ii[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] * m_Eng->Engine::GetCurr(2,pos)
- m_Op_UPML->iifo[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] * curr_flux[2][loc_pos[0]][loc_pos[1]][loc_pos[2]];
m_Eng->Engine::SetCurr(2,pos, curr_flux[2][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
}
}
}
break;
}
case Engine::SSE:
{
Engine_sse* eng_sse = (Engine_sse*) m_Eng;
for (loc_pos[0]=0;loc_pos[0]<m_Op_UPML->m_numLines[0];++loc_pos[0])
{
pos[0] = loc_pos[0] + m_Op_UPML->m_StartPos[0];
for (loc_pos[1]=0;loc_pos[1]<m_Op_UPML->m_numLines[1];++loc_pos[1])
{
pos[1] = loc_pos[1] + m_Op_UPML->m_StartPos[1];
for (loc_pos[2]=0;loc_pos[2]<m_Op_UPML->m_numLines[2];++loc_pos[2])
{
pos[2] = loc_pos[2] + m_Op_UPML->m_StartPos[2];
curr[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Op_UPML->ii[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] * eng_sse->Engine_sse::GetCurr(0,pos)
- m_Op_UPML->iifo[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] * curr_flux[0][loc_pos[0]][loc_pos[1]][loc_pos[2]];
eng_sse->Engine_sse::SetCurr(0,pos, curr_flux[0][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
curr[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Op_UPML->ii[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] * eng_sse->Engine_sse::GetCurr(1,pos)
- m_Op_UPML->iifo[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] * curr_flux[1][loc_pos[0]][loc_pos[1]][loc_pos[2]];
eng_sse->Engine_sse::SetCurr(1,pos, curr_flux[1][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
curr[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Op_UPML->ii[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] * eng_sse->Engine_sse::GetCurr(2,pos)
- m_Op_UPML->iifo[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] * curr_flux[2][loc_pos[0]][loc_pos[1]][loc_pos[2]];
eng_sse->Engine_sse::SetCurr(2,pos, curr_flux[2][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
}
}
}
break;
}
default:
{
for (loc_pos[0]=0;loc_pos[0]<m_Op_UPML->m_numLines[0];++loc_pos[0]) for (loc_pos[0]=0;loc_pos[0]<m_Op_UPML->m_numLines[0];++loc_pos[0])
{ {
pos[0] = loc_pos[0] + m_Op_UPML->m_StartPos[0]; pos[0] = loc_pos[0] + m_Op_UPML->m_StartPos[0];
@ -140,6 +326,9 @@ void Engine_Ext_UPML::DoPreCurrentUpdates()
} }
} }
} }
break;
}
}
} }
void Engine_Ext_UPML::DoPostCurrentUpdates() void Engine_Ext_UPML::DoPostCurrentUpdates()
@ -150,6 +339,61 @@ void Engine_Ext_UPML::DoPostCurrentUpdates()
unsigned int pos[3]; unsigned int pos[3];
unsigned int loc_pos[3]; unsigned int loc_pos[3];
switch (m_Eng->GetType())
{
case Engine::BASIC:
{
for (loc_pos[0]=0;loc_pos[0]<m_Op_UPML->m_numLines[0];++loc_pos[0])
{
pos[0] = loc_pos[0] + m_Op_UPML->m_StartPos[0];
for (loc_pos[1]=0;loc_pos[1]<m_Op_UPML->m_numLines[1];++loc_pos[1])
{
pos[1] = loc_pos[1] + m_Op_UPML->m_StartPos[1];
for (loc_pos[2]=0;loc_pos[2]<m_Op_UPML->m_numLines[2];++loc_pos[2])
{
pos[2] = loc_pos[2] + m_Op_UPML->m_StartPos[2];
curr_flux[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Eng->Engine::GetCurr(0,pos);
m_Eng->Engine::SetCurr(0,pos, curr[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] + m_Op_UPML->iifn[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] * curr_flux[0][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
curr_flux[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Eng->Engine::GetCurr(1,pos);
m_Eng->Engine::SetCurr(1,pos, curr[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] + m_Op_UPML->iifn[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] * curr_flux[1][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
curr_flux[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Eng->Engine::GetCurr(2,pos);
m_Eng->Engine::SetCurr(2,pos, curr[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] + m_Op_UPML->iifn[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] * curr_flux[2][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
}
}
}
break;
}
case Engine::SSE:
{
Engine_sse* eng_sse = (Engine_sse*) m_Eng;
for (loc_pos[0]=0;loc_pos[0]<m_Op_UPML->m_numLines[0];++loc_pos[0])
{
pos[0] = loc_pos[0] + m_Op_UPML->m_StartPos[0];
for (loc_pos[1]=0;loc_pos[1]<m_Op_UPML->m_numLines[1];++loc_pos[1])
{
pos[1] = loc_pos[1] + m_Op_UPML->m_StartPos[1];
for (loc_pos[2]=0;loc_pos[2]<m_Op_UPML->m_numLines[2];++loc_pos[2])
{
pos[2] = loc_pos[2] + m_Op_UPML->m_StartPos[2];
curr_flux[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] = eng_sse->Engine_sse::GetCurr(0,pos);
eng_sse->Engine_sse::SetCurr(0,pos, curr[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] + m_Op_UPML->iifn[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] * curr_flux[0][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
curr_flux[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] = eng_sse->Engine_sse::GetCurr(1,pos);
eng_sse->Engine_sse::SetCurr(1,pos, curr[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] + m_Op_UPML->iifn[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] * curr_flux[1][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
curr_flux[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] = eng_sse->Engine_sse::GetCurr(2,pos);
eng_sse->Engine_sse::SetCurr(2,pos, curr[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] + m_Op_UPML->iifn[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] * curr_flux[2][loc_pos[0]][loc_pos[1]][loc_pos[2]]);
}
}
}
break;
}
default:
{
for (loc_pos[0]=0;loc_pos[0]<m_Op_UPML->m_numLines[0];++loc_pos[0]) for (loc_pos[0]=0;loc_pos[0]<m_Op_UPML->m_numLines[0];++loc_pos[0])
{ {
pos[0] = loc_pos[0] + m_Op_UPML->m_StartPos[0]; pos[0] = loc_pos[0] + m_Op_UPML->m_StartPos[0];
@ -170,4 +414,7 @@ void Engine_Ext_UPML::DoPostCurrentUpdates()
} }
} }
} }
break;
}
}
} }