diff --git a/FDTD/engine_ext_upml.cpp b/FDTD/engine_ext_upml.cpp index f3e8548..b99afe4 100644 --- a/FDTD/engine_ext_upml.cpp +++ b/FDTD/engine_ext_upml.cpp @@ -20,6 +20,7 @@ #include "engine.h" #include "engine_sse.h" #include "tools/array_ops.h" +#include "tools/useful.h" Engine_Ext_UPML::Engine_Ext_UPML(Operator_Ext_UPML* op_ext) : Engine_Extension(op_ext) { @@ -32,6 +33,8 @@ Engine_Ext_UPML::Engine_Ext_UPML(Operator_Ext_UPML* op_ext) : Engine_Extension(o volt_flux = Create_N_3DArray(m_Op_UPML->m_numLines); curr = Create_N_3DArray(m_Op_UPML->m_numLines); curr_flux = Create_N_3DArray(m_Op_UPML->m_numLines); + + SetNumberOfThreads(1); } Engine_Ext_UPML::~Engine_Ext_UPML() @@ -46,19 +49,35 @@ Engine_Ext_UPML::~Engine_Ext_UPML() curr_flux=NULL; } -void Engine_Ext_UPML::DoPreVoltageUpdates() +void Engine_Ext_UPML::SetNumberOfThreads(int nrThread) +{ + Engine_Extension::SetNumberOfThreads(nrThread); + + m_numX = AssignJobs2Threads(m_Op_UPML->m_numLines[0],m_NrThreads,false); + m_start.resize(m_NrThreads,0); + m_start.at(0)=0; + for (size_t n=1;n=m_NrThreads) + return; + unsigned int pos[3]; unsigned int loc_pos[3]; switch (m_Eng->GetType()) { case Engine::BASIC: { - for (loc_pos[0]=0;loc_pos[0]m_numLines[0];++loc_pos[0]) + for (unsigned int lineX=0;lineXm_StartPos[0]; for (loc_pos[1]=0;loc_pos[1]m_numLines[1];++loc_pos[1]) { @@ -85,37 +104,39 @@ void Engine_Ext_UPML::DoPreVoltageUpdates() } case Engine::SSE: { - Engine_sse* eng_sse = (Engine_sse*) m_Eng; - for (loc_pos[0]=0;loc_pos[0]m_numLines[0];++loc_pos[0]) - { - pos[0] = loc_pos[0] + m_Op_UPML->m_StartPos[0]; - for (loc_pos[1]=0;loc_pos[1]m_numLines[1];++loc_pos[1]) + Engine_sse* eng_sse = (Engine_sse*) m_Eng; + for (unsigned int lineX=0;lineXm_StartPos[1]; - for (loc_pos[2]=0;loc_pos[2]m_numLines[2];++loc_pos[2]) + loc_pos[0]=lineX+m_start.at(threadID); + pos[0] = loc_pos[0] + m_Op_UPML->m_StartPos[0]; + for (loc_pos[1]=0;loc_pos[1]m_numLines[1];++loc_pos[1]) { - pos[2] = loc_pos[2] + m_Op_UPML->m_StartPos[2]; + pos[1] = loc_pos[1] + m_Op_UPML->m_StartPos[1]; + for (loc_pos[2]=0;loc_pos[2]m_numLines[2];++loc_pos[2]) + { + pos[2] = loc_pos[2] + m_Op_UPML->m_StartPos[2]; - volt[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Op_UPML->vv[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] * eng_sse->Engine_sse::GetVolt(0,pos) - - m_Op_UPML->vvfo[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] * volt_flux[0][loc_pos[0]][loc_pos[1]][loc_pos[2]]; - eng_sse->Engine_sse::SetVolt(0,pos, volt_flux[0][loc_pos[0]][loc_pos[1]][loc_pos[2]]); + volt[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Op_UPML->vv[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] * eng_sse->Engine_sse::GetVolt(0,pos) + - m_Op_UPML->vvfo[0][loc_pos[0]][loc_pos[1]][loc_pos[2]] * volt_flux[0][loc_pos[0]][loc_pos[1]][loc_pos[2]]; + eng_sse->Engine_sse::SetVolt(0,pos, volt_flux[0][loc_pos[0]][loc_pos[1]][loc_pos[2]]); - volt[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Op_UPML->vv[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] * eng_sse->Engine_sse::GetVolt(1,pos) - - m_Op_UPML->vvfo[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] * volt_flux[1][loc_pos[0]][loc_pos[1]][loc_pos[2]]; - eng_sse->Engine_sse::SetVolt(1,pos, volt_flux[1][loc_pos[0]][loc_pos[1]][loc_pos[2]]); + volt[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Op_UPML->vv[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] * eng_sse->Engine_sse::GetVolt(1,pos) + - m_Op_UPML->vvfo[1][loc_pos[0]][loc_pos[1]][loc_pos[2]] * volt_flux[1][loc_pos[0]][loc_pos[1]][loc_pos[2]]; + eng_sse->Engine_sse::SetVolt(1,pos, volt_flux[1][loc_pos[0]][loc_pos[1]][loc_pos[2]]); - volt[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Op_UPML->vv[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] * eng_sse->Engine_sse::GetVolt(2,pos) - - m_Op_UPML->vvfo[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] * volt_flux[2][loc_pos[0]][loc_pos[1]][loc_pos[2]]; - eng_sse->Engine_sse::SetVolt(2,pos, volt_flux[2][loc_pos[0]][loc_pos[1]][loc_pos[2]]); + volt[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] = m_Op_UPML->vv[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] * eng_sse->Engine_sse::GetVolt(2,pos) + - m_Op_UPML->vvfo[2][loc_pos[0]][loc_pos[1]][loc_pos[2]] * volt_flux[2][loc_pos[0]][loc_pos[1]][loc_pos[2]]; + eng_sse->Engine_sse::SetVolt(2,pos, volt_flux[2][loc_pos[0]][loc_pos[1]][loc_pos[2]]); + } } } - } - break; + break; } default: { - for (loc_pos[0]=0;loc_pos[0]m_numLines[0];++loc_pos[0]) + for (unsigned int lineX=0;lineXm_StartPos[0]; for (loc_pos[1]=0;loc_pos[1]m_numLines[1];++loc_pos[1]) { @@ -143,10 +164,12 @@ void Engine_Ext_UPML::DoPreVoltageUpdates() } -void Engine_Ext_UPML::DoPostVoltageUpdates() +void Engine_Ext_UPML::DoPostVoltageUpdates(int threadID) { if (m_Eng==NULL) return; + if (threadID>=m_NrThreads) + return; unsigned int pos[3]; unsigned int loc_pos[3]; @@ -155,8 +178,9 @@ void Engine_Ext_UPML::DoPostVoltageUpdates() { case Engine::BASIC: { - for (loc_pos[0]=0;loc_pos[0]m_numLines[0];++loc_pos[0]) + for (unsigned int lineX=0;lineXm_StartPos[0]; for (loc_pos[1]=0;loc_pos[1]m_numLines[1];++loc_pos[1]) { @@ -181,8 +205,9 @@ void Engine_Ext_UPML::DoPostVoltageUpdates() case Engine::SSE: { Engine_sse* eng_sse = (Engine_sse*) m_Eng; - for (loc_pos[0]=0;loc_pos[0]m_numLines[0];++loc_pos[0]) + for (unsigned int lineX=0;lineXm_StartPos[0]; for (loc_pos[1]=0;loc_pos[1]m_numLines[1];++loc_pos[1]) { @@ -206,8 +231,9 @@ void Engine_Ext_UPML::DoPostVoltageUpdates() } default: { - for (loc_pos[0]=0;loc_pos[0]m_numLines[0];++loc_pos[0]) + for (unsigned int lineX=0;lineXm_StartPos[0]; for (loc_pos[1]=0;loc_pos[1]m_numLines[1];++loc_pos[1]) { @@ -232,10 +258,12 @@ void Engine_Ext_UPML::DoPostVoltageUpdates() } -void Engine_Ext_UPML::DoPreCurrentUpdates() +void Engine_Ext_UPML::DoPreCurrentUpdates(int threadID) { if (m_Eng==NULL) return; + if (threadID>=m_NrThreads) + return; unsigned int pos[3]; unsigned int loc_pos[3]; @@ -245,8 +273,9 @@ void Engine_Ext_UPML::DoPreCurrentUpdates() { case Engine::BASIC: { - for (loc_pos[0]=0;loc_pos[0]m_numLines[0];++loc_pos[0]) + for (unsigned int lineX=0;lineXm_StartPos[0]; for (loc_pos[1]=0;loc_pos[1]m_numLines[1];++loc_pos[1]) { @@ -274,8 +303,9 @@ void Engine_Ext_UPML::DoPreCurrentUpdates() case Engine::SSE: { Engine_sse* eng_sse = (Engine_sse*) m_Eng; - for (loc_pos[0]=0;loc_pos[0]m_numLines[0];++loc_pos[0]) + for (unsigned int lineX=0;lineXm_StartPos[0]; for (loc_pos[1]=0;loc_pos[1]m_numLines[1];++loc_pos[1]) { @@ -303,8 +333,9 @@ void Engine_Ext_UPML::DoPreCurrentUpdates() } default: { - for (loc_pos[0]=0;loc_pos[0]m_numLines[0];++loc_pos[0]) + for (unsigned int lineX=0;lineXm_StartPos[0]; for (loc_pos[1]=0;loc_pos[1]m_numLines[1];++loc_pos[1]) { @@ -331,10 +362,12 @@ void Engine_Ext_UPML::DoPreCurrentUpdates() } } -void Engine_Ext_UPML::DoPostCurrentUpdates() +void Engine_Ext_UPML::DoPostCurrentUpdates(int threadID) { if (m_Eng==NULL) return; + if (threadID>=m_NrThreads) + return; unsigned int pos[3]; unsigned int loc_pos[3]; @@ -343,8 +376,9 @@ void Engine_Ext_UPML::DoPostCurrentUpdates() { case Engine::BASIC: { - for (loc_pos[0]=0;loc_pos[0]m_numLines[0];++loc_pos[0]) + for (unsigned int lineX=0;lineXm_StartPos[0]; for (loc_pos[1]=0;loc_pos[1]m_numLines[1];++loc_pos[1]) { @@ -369,8 +403,9 @@ void Engine_Ext_UPML::DoPostCurrentUpdates() case Engine::SSE: { Engine_sse* eng_sse = (Engine_sse*) m_Eng; - for (loc_pos[0]=0;loc_pos[0]m_numLines[0];++loc_pos[0]) + for (unsigned int lineX=0;lineXm_StartPos[0]; for (loc_pos[1]=0;loc_pos[1]m_numLines[1];++loc_pos[1]) { @@ -394,8 +429,9 @@ void Engine_Ext_UPML::DoPostCurrentUpdates() } default: { - for (loc_pos[0]=0;loc_pos[0]m_numLines[0];++loc_pos[0]) + for (unsigned int lineX=0;lineXm_StartPos[0]; for (loc_pos[1]=0;loc_pos[1]m_numLines[1];++loc_pos[1]) { diff --git a/FDTD/engine_ext_upml.h b/FDTD/engine_ext_upml.h index ea0a278..5cdc0f5 100644 --- a/FDTD/engine_ext_upml.h +++ b/FDTD/engine_ext_upml.h @@ -30,15 +30,24 @@ public: Engine_Ext_UPML(Operator_Ext_UPML* op_ext); virtual ~Engine_Ext_UPML(); - virtual void DoPreVoltageUpdates(); - virtual void DoPostVoltageUpdates(); + virtual void SetNumberOfThreads(int nrThread); - virtual void DoPreCurrentUpdates(); - virtual void DoPostCurrentUpdates(); + virtual void DoPreVoltageUpdates() {Engine_Ext_UPML::DoPreVoltageUpdates(0);}; + virtual void DoPreVoltageUpdates(int threadID); + virtual void DoPostVoltageUpdates() {Engine_Ext_UPML::DoPostVoltageUpdates(0);}; + virtual void DoPostVoltageUpdates(int threadID); + + virtual void DoPreCurrentUpdates() {Engine_Ext_UPML::DoPreCurrentUpdates(0);}; + virtual void DoPreCurrentUpdates(int threadID); + virtual void DoPostCurrentUpdates() {Engine_Ext_UPML::DoPostCurrentUpdates(0);}; + virtual void DoPostCurrentUpdates(int threadID); protected: Operator_Ext_UPML* m_Op_UPML; + vector m_start; + vector m_numX; + FDTD_FLOAT**** volt; FDTD_FLOAT**** curr; FDTD_FLOAT**** volt_flux; diff --git a/FDTD/engine_extension.cpp b/FDTD/engine_extension.cpp index 216a170..242d28c 100644 --- a/FDTD/engine_extension.cpp +++ b/FDTD/engine_extension.cpp @@ -24,12 +24,62 @@ Engine_Extension::Engine_Extension(Operator_Extension* op_ext) m_Op_ext = op_ext; m_Eng = NULL; m_Priority = 0; + m_NrThreads = 1; } Engine_Extension::~Engine_Extension() { } +void Engine_Extension::SetNumberOfThreads(int nrThread) +{ + if (nrThread<1) + return; + m_NrThreads=nrThread; +} + +void Engine_Extension::DoPreVoltageUpdates(int threadID) +{ + //if this method gets called the derived extension obviously doesn't support multithrading, calling non-MT method... + if (threadID==0) + DoPreVoltageUpdates(); +} + +void Engine_Extension::DoPostVoltageUpdates(int threadID) +{ + //if this method gets called the derived extension obviously doesn't support multithrading, calling non-MT method... + if (threadID==0) + DoPostVoltageUpdates(); +} + +void Engine_Extension::Apply2Voltages(int threadID) +{ + //if this method gets called the derived extension obviously doesn't support multithrading, calling non-MT method... + if (threadID==0) + Apply2Voltages(); +} + +void Engine_Extension::DoPreCurrentUpdates(int threadID) +{ + //if this method gets called the derived extension obviously doesn't support multithrading, calling non-MT method... + if (threadID==0) + DoPreCurrentUpdates(); +} + +void Engine_Extension::DoPostCurrentUpdates(int threadID) +{ + //if this method gets called the derived extension obviously doesn't support multithrading, calling non-MT method... + if (threadID==0) + DoPostCurrentUpdates(); +} + +void Engine_Extension::Apply2Current(int threadID) +{ + //if this method gets called the derived extension obviously doesn't support multithrading, calling non-MT method... + if (threadID==0) + Apply2Current(); +} + bool Engine_Extension::operator< (const Engine_Extension& other) { return (GetPriority()CalcStartStopLines( m_numThreads, m_Start_Lines, m_Stop_Lines ); cout << "Multithreaded engine using " << m_numThreads << " threads. Utilization: ("; - m_barrier_VoltUpdate = new boost::barrier(m_numThreads); // numThread workers - m_barrier_VoltExcite = new boost::barrier(m_numThreads); // numThread workers - m_barrier_CurrUpdate = new boost::barrier(m_numThreads); // numThread workers - m_barrier_CurrExcite = new boost::barrier(m_numThreads); // numThread workers - - m_barrier_PreVolt = new boost::barrier(m_numThreads); // numThread workers - m_barrier_PostVolt = new boost::barrier(m_numThreads); // numThread workers - m_barrier_PreCurr = new boost::barrier(m_numThreads); // numThread workers - m_barrier_PostCurr = new boost::barrier(m_numThreads); // numThread workers + m_IterateBarrier = new boost::barrier(m_numThreads); // numThread workers m_startBarrier = new boost::barrier(m_numThreads+1); // numThread workers + 1 controller m_stopBarrier = new boost::barrier(m_numThreads+1); // numThread workers + 1 controller @@ -131,6 +116,9 @@ void Engine_Multithread::Init() boost::thread *t = new boost::thread( NS_Engine_Multithread::thread(this,start,stop,stop_h,n) ); m_thread_group.add_thread( t ); } + + for (size_t n=0;nSetNumberOfThreads(m_numThreads); } void Engine_Multithread::Reset() @@ -146,14 +134,7 @@ void Engine_Multithread::Reset() m_stopThreads = true; m_stopBarrier->wait(); // wait for the threads to finish m_thread_group.join_all(); // wait for termination - delete m_barrier_VoltUpdate; m_barrier_VoltUpdate = 0; - delete m_barrier_VoltExcite; m_barrier_VoltExcite = 0; - delete m_barrier_PreVolt; m_barrier_PreVolt = 0; - delete m_barrier_PostVolt; m_barrier_PostVolt = 0; - delete m_barrier_CurrUpdate; m_barrier_CurrUpdate = 0; - delete m_barrier_CurrExcite; m_barrier_CurrExcite = 0; - delete m_barrier_PreCurr; m_barrier_PreCurr = 0; - delete m_barrier_PostCurr; m_barrier_PostCurr = 0; + delete m_IterateBarrier; m_IterateBarrier = 0; delete m_startBarrier; m_startBarrier = 0; delete m_stopBarrier; m_stopBarrier = 0; } @@ -174,6 +155,67 @@ bool Engine_Multithread::IterateTS(unsigned int iterTS) return true; } +void Engine_Multithread::DoPreVoltageUpdates(int threadID) +{ + //execute extensions in reverse order -> highest priority gets access to the voltages last + for (int n=m_Eng_exts.size()-1;n>=0;--n) + { + m_Eng_exts.at(n)->DoPreVoltageUpdates(threadID); + m_IterateBarrier->wait(); + } + +} + +void Engine_Multithread::DoPostVoltageUpdates(int threadID) +{ + //execute extensions in normal order -> highest priority gets access to the voltages first + for (size_t n=0;nDoPostVoltageUpdates(threadID); + m_IterateBarrier->wait(); + } +} + +void Engine_Multithread::Apply2Voltages(int threadID) +{ + //execute extensions in normal order -> highest priority gets access to the voltages first + for (size_t n=0;nApply2Voltages(threadID); + m_IterateBarrier->wait(); + } +} + +void Engine_Multithread::DoPreCurrentUpdates(int threadID) +{ + //execute extensions in reverse order -> highest priority gets access to the currents last + for (int n=m_Eng_exts.size()-1;n>=0;--n) + { + m_Eng_exts.at(n)->DoPreCurrentUpdates(threadID); + m_IterateBarrier->wait(); + } +} + +void Engine_Multithread::DoPostCurrentUpdates(int threadID) +{ + //execute extensions in normal order -> highest priority gets access to the currents first + for (size_t n=0;nDoPostCurrentUpdates(threadID); + m_IterateBarrier->wait(); + } +} + +void Engine_Multithread::Apply2Current(int threadID) +{ + //execute extensions in normal order -> highest priority gets access to the currents first + for (size_t n=0;nApply2Current(threadID); + m_IterateBarrier->wait(); + } +} + // // ************************************************************************************************************************* // @@ -205,10 +247,7 @@ void thread::operator()() for (unsigned int iter=0;iterm_iterTS;++iter) { // pre voltage stuff... - if (m_threadID==0) - m_enginePtr->DoPreVoltageUpdates(); - - m_enginePtr->m_barrier_PreVolt->wait(); + m_enginePtr->DoPreVoltageUpdates(m_threadID); //voltage updates m_enginePtr->UpdateVoltages(m_start,m_stop-m_start+1); @@ -217,56 +256,46 @@ void thread::operator()() DEBUG_TIME( m_enginePtr->m_timer_list[boost::this_thread::get_id()].push_back( timer1.elapsed() ); ) //cout << "Thread " << boost::this_thread::get_id() << " m_barrier1 waiting..." << endl; - m_enginePtr->m_barrier_VoltUpdate->wait(); + m_enginePtr->m_IterateBarrier->wait(); // record time DEBUG_TIME( m_enginePtr->m_timer_list[boost::this_thread::get_id()].push_back( timer1.elapsed() ); ) //post voltage stuff... - if (m_threadID==0) - { - m_enginePtr->DoPostVoltageUpdates(); - m_enginePtr->Apply2Voltages(); - } - m_enginePtr->m_barrier_PostVolt->wait(); + m_enginePtr->DoPostVoltageUpdates(m_threadID); + m_enginePtr->Apply2Voltages(m_threadID); // voltage excitation (E-field excite) by the first thread if (m_threadID==0) m_enginePtr->ApplyVoltageExcite(); - m_enginePtr->m_barrier_VoltExcite->wait(); + m_enginePtr->m_IterateBarrier->wait(); // voltage excitation finished // record time DEBUG_TIME( m_enginePtr->m_timer_list[boost::this_thread::get_id()].push_back( timer1.elapsed() ); ) //pre current stuff - if (m_threadID==0) - m_enginePtr->DoPreCurrentUpdates(); - m_enginePtr->m_barrier_PreCurr->wait(); + m_enginePtr->DoPreCurrentUpdates(m_threadID); //current updates m_enginePtr->UpdateCurrents(m_start,m_stop_h-m_start+1); // record time DEBUG_TIME( m_enginePtr->m_timer_list[boost::this_thread::get_id()].push_back( timer1.elapsed() ); ) - m_enginePtr->m_barrier_CurrUpdate->wait(); + m_enginePtr->m_IterateBarrier->wait(); // record time DEBUG_TIME( m_enginePtr->m_timer_list[boost::this_thread::get_id()].push_back( timer1.elapsed() ); ) //post current stuff - if (m_threadID==0) - { - m_enginePtr->DoPostCurrentUpdates(); - m_enginePtr->Apply2Current(); - } - m_enginePtr->m_barrier_PostCurr->wait(); + m_enginePtr->DoPostCurrentUpdates(m_threadID); + m_enginePtr->Apply2Current(m_threadID); // current excitation (H-field excite) by the first thread if (m_threadID==0) m_enginePtr->ApplyCurrentExcite(); - m_enginePtr->m_barrier_CurrExcite->wait(); + m_enginePtr->m_IterateBarrier->wait(); // current excitation finished if (m_threadID == 0) diff --git a/FDTD/engine_multithread.h b/FDTD/engine_multithread.h index 6831ee4..9662f4d 100644 --- a/FDTD/engine_multithread.h +++ b/FDTD/engine_multithread.h @@ -80,13 +80,20 @@ public: //! Iterate \a iterTS number of timesteps virtual bool IterateTS(unsigned int iterTS); + virtual void DoPreVoltageUpdates(int threadID); + virtual void DoPostVoltageUpdates(int threadID); + virtual void Apply2Voltages(int threadID); + + virtual void DoPreCurrentUpdates(int threadID); + virtual void DoPostCurrentUpdates(int threadID); + virtual void Apply2Current(int threadID); + protected: Engine_Multithread(const Operator_Multithread* op); const Operator_Multithread* m_Op_MT; boost::thread_group m_thread_group; boost::barrier *m_startBarrier, *m_stopBarrier; - boost::barrier *m_barrier_VoltUpdate, *m_barrier_VoltExcite, *m_barrier_PreVolt, *m_barrier_PostVolt; - boost::barrier *m_barrier_CurrUpdate, *m_barrier_CurrExcite, *m_barrier_PreCurr, *m_barrier_PostCurr; + boost::barrier *m_IterateBarrier; volatile unsigned int m_iterTS; unsigned int m_numThreads; //!< number of worker threads volatile bool m_stopThreads;