From 37ff221c18fcbeddf62944a9d72381dbe75e57cb Mon Sep 17 00:00:00 2001 From: Sebastian Held Date: Thu, 1 Apr 2010 16:11:55 +0200 Subject: [PATCH] multithreaded engine works but it's slow... --- FDTD/engine_multithread.cpp | 159 +++++++++++++++++++++--------------- FDTD/engine_multithread.h | 90 ++++++++++---------- 2 files changed, 139 insertions(+), 110 deletions(-) diff --git a/FDTD/engine_multithread.cpp b/FDTD/engine_multithread.cpp index 9e20b67..42ab92a 100644 --- a/FDTD/engine_multithread.cpp +++ b/FDTD/engine_multithread.cpp @@ -32,10 +32,9 @@ #include "boost/date_time/gregorian/gregorian.hpp" #include - //! \brief construct an Engine_Multithread instance //! it's the responsibility of the caller to free the returned pointer -Engine_Multithread* Engine_Multithread::createEngine(Operator* op, unsigned int numThreads) +Engine_Multithread* Engine_Multithread::createEngine(const Operator* op, unsigned int numThreads) { Engine_Multithread* e = new Engine_Multithread(op); e->setNumThreads( numThreads ); @@ -43,28 +42,29 @@ Engine_Multithread* Engine_Multithread::createEngine(Operator* op, unsigned int return e; } -Engine_Multithread::Engine_Multithread(Operator* op) : Engine(op) +Engine_Multithread::Engine_Multithread(const Operator* op) : Engine(op) { } Engine_Multithread::~Engine_Multithread() { #ifdef ENABLE_DEBUG_TIME - cout << "Engine_Multithread::~Engine_Multithread()" << endl; + NS_Engine_Multithread::DBG().cout() << "Engine_Multithread::~Engine_Multithread()" << endl; std::map >::iterator it; for (it=m_timer_list.begin(); it!=m_timer_list.end(); it++) { - std::cout << "*** DEBUG Thread: " << it->first << std::endl; + NS_Engine_Multithread::DBG().cout() << "*** DEBUG Thread: " << it->first << std::endl; std::vector::iterator it2; for (it2=it->second.begin(); it2second.end();) { - std::cout << "after voltage update, before barrier1: " << fixed << setprecision(6) << *(it2++) << std::endl; - std::cout << "after barrier1, before barrier2: " << fixed << setprecision(6) << *(it2++) << std::endl; - std::cout << "after barrier2, before current update: " << fixed << setprecision(6) << *(it2++) << std::endl; - std::cout << "after current update, before barrier3: " << fixed << setprecision(6) << *(it2++) << std::endl; - std::cout << "after barrier3: " << fixed << setprecision(6) << *(it2++) << std::endl; + NS_Engine_Multithread::DBG().cout() << "after voltage update, before barrier1: " << fixed << setprecision(6) << *(it2++) << std::endl; + NS_Engine_Multithread::DBG().cout() << "after barrier1, before barrier2: " << fixed << setprecision(6) << *(it2++) << std::endl; + NS_Engine_Multithread::DBG().cout() << "after barrier2, before current update: " << fixed << setprecision(6) << *(it2++) << std::endl; + NS_Engine_Multithread::DBG().cout() << "after current update, before barrier3: " << fixed << setprecision(6) << *(it2++) << std::endl; + NS_Engine_Multithread::DBG().cout() << "after barrier3: " << fixed << setprecision(6) << *(it2++) << std::endl; } } #endif + Reset(); } void Engine_Multithread::setNumThreads( unsigned int numThreads ) @@ -74,35 +74,55 @@ void Engine_Multithread::setNumThreads( unsigned int numThreads ) void Engine_Multithread::Init() { - Engine::Init(); - - m_numTS_times_threads = 0; + Engine::Init(); // gets cleaned up by Engine::~Engine() // initialize threads + m_stopThreads = false; if (m_numThreads == 0) m_numThreads = boost::thread::hardware_concurrency(); - std::cout << "using " << m_numThreads << " threads" << std::endl; + cout << "using " << m_numThreads << " threads" << std::endl; m_barrier1 = new boost::barrier(m_numThreads+1); // numThread workers + 1 excitation thread m_barrier2 = new boost::barrier(m_numThreads+1); // numThread workers + 1 excitation thread m_barrier3 = new boost::barrier(m_numThreads); // numThread workers m_startBarrier = new boost::barrier(m_numThreads+1); // numThread workers + 1 controller m_stopBarrier = new boost::barrier(m_numThreads+1); // numThread workers + 1 controller + unsigned int linesPerThread = round((float)Op->numLines[0] / (float)m_numThreads); for (unsigned int n=0; nnumLines[0]+m_numThreads-1) / m_numThreads; unsigned int start = n * linesPerThread; - unsigned int stop = min( (n+1) * linesPerThread - 1, Op->numLines[0]-1 ); - unsigned int stop_h = (n!=m_numThreads-1)?stop:stop-1; - std::cout << "###DEBUG## Thread " << n << ": start=" << start << " stop=" << stop << " stop_h=" << stop_h << std::endl; - boost::thread *t = new boost::thread( thread(this,start,stop,stop_h) ); + unsigned int stop = (n+1) * linesPerThread - 1; + unsigned int stop_h = stop; + if (n == m_numThreads-1) { + // last thread + stop = Op->numLines[0]-1; + stop_h = stop-1; + } + //NS_Engine_Multithread::DBG().cout() << "###DEBUG## Thread " << n << ": start=" << start << " stop=" << stop << " stop_h=" << stop_h << std::endl; + boost::thread *t = new boost::thread( NS_Engine_Multithread::thread(this,start,stop,stop_h,n) ); m_thread_group.add_thread( t ); } - boost::thread *t = new boost::thread( thread_e_excitation(this) ); + boost::thread *t = new boost::thread( NS_Engine_Multithread::thread_e_excitation(this) ); m_thread_group.add_thread( t ); } void Engine_Multithread::Reset() { + if (!m_stopThreads) { + // prevent multiple invocations + + // stop the threads + //NS_Engine_Multithread::DBG().cout() << "stopping all threads" << endl; + m_iterTS = 1; + m_startBarrier->wait(); // start the threads + m_stopThreads = true; + m_stopBarrier->wait(); // wait for the threads to finish + m_thread_group.join_all(); // wait for termination + delete m_barrier1; m_barrier1 = 0; + delete m_barrier2; m_barrier2 = 0; + delete m_barrier3; m_barrier3 = 0; + delete m_startBarrier; m_startBarrier = 0; + delete m_stopBarrier; m_stopBarrier = 0; + } Engine::Reset(); } @@ -110,43 +130,41 @@ void Engine_Multithread::Reset() bool Engine_Multithread::IterateTS(unsigned int iterTS) { m_iterTS = iterTS; + //cout << "bool Engine_Multithread::IterateTS(): starting threads ..."; m_startBarrier->wait(); // start the threads + //cout << "... threads started"; m_stopBarrier->wait(); // wait for the threads to finish time steps - numTS = m_numTS_times_threads / m_numThreads; return true; } +// +// ************************************************************************************************************************* +// +namespace NS_Engine_Multithread { - - - - - -thread::thread( Engine_Multithread* ptr, unsigned int start, unsigned int stop, unsigned int stop_h ) +thread::thread( Engine_Multithread* ptr, unsigned int start, unsigned int stop, unsigned int stop_h, unsigned int threadID ) { m_enginePtr = ptr; m_start = start; m_stop = stop; m_stop_h = stop_h; - Op = m_enginePtr->Op; - m_stopThread = false; -// volt = m_enginePtr->volt; -// curr = m_enginePtr->curr; + m_threadID = threadID; } void thread::operator()() { //std::cout << "thread::operator() Parameters: " << m_start << " " << m_stop << std::endl; + //DBG().cout() << "Thread " << m_threadID << " (" << boost::this_thread::get_id() << ") started." << endl; unsigned int pos[3]; bool shift[3]; - while (!m_stopThread) { + while (!m_enginePtr->m_stopThreads) { // wait for start - //cout << "Thread " << boost::this_thread::get_id() << " waiting..." << endl; + //DBG().cout() << "Thread " << m_threadID << " (" << boost::this_thread::get_id() << ") waiting..." << endl; m_enginePtr->m_startBarrier->wait(); //cout << "Thread " << boost::this_thread::get_id() << " waiting... started." << endl; @@ -158,24 +176,24 @@ void thread::operator()() for (pos[0]=m_start;pos[0]<=m_stop;++pos[0]) { shift[0]=pos[0]; - for (pos[1]=0;pos[1]numLines[1];++pos[1]) + for (pos[1]=0;pos[1]Op->numLines[1];++pos[1]) { shift[1]=pos[1]; - for (pos[2]=0;pos[2]numLines[2];++pos[2]) + for (pos[2]=0;pos[2]Op->numLines[2];++pos[2]) { shift[2]=pos[2]; //do the updates here //for x - m_enginePtr->volt[0][pos[0]][pos[1]][pos[2]] *= Op->vv[0][pos[0]][pos[1]][pos[2]]; - m_enginePtr->volt[0][pos[0]][pos[1]][pos[2]] += Op->vi[0][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->curr[2][pos[0]][pos[1]][pos[2]] - m_enginePtr->curr[2][pos[0]][pos[1]-shift[1]][pos[2]] - m_enginePtr->curr[1][pos[0]][pos[1]][pos[2]] + m_enginePtr->curr[1][pos[0]][pos[1]][pos[2]-shift[2]]); + m_enginePtr->volt[0][pos[0]][pos[1]][pos[2]] *= m_enginePtr->Op->vv[0][pos[0]][pos[1]][pos[2]]; + m_enginePtr->volt[0][pos[0]][pos[1]][pos[2]] += m_enginePtr->Op->vi[0][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->curr[2][pos[0]][pos[1]][pos[2]] - m_enginePtr->curr[2][pos[0]][pos[1]-shift[1]][pos[2]] - m_enginePtr->curr[1][pos[0]][pos[1]][pos[2]] + m_enginePtr->curr[1][pos[0]][pos[1]][pos[2]-shift[2]]); //for y - m_enginePtr->volt[1][pos[0]][pos[1]][pos[2]] *= Op->vv[1][pos[0]][pos[1]][pos[2]]; - m_enginePtr->volt[1][pos[0]][pos[1]][pos[2]] += Op->vi[1][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->curr[0][pos[0]][pos[1]][pos[2]] - m_enginePtr->curr[0][pos[0]][pos[1]][pos[2]-shift[2]] - m_enginePtr->curr[2][pos[0]][pos[1]][pos[2]] + m_enginePtr->curr[2][pos[0]-shift[0]][pos[1]][pos[2]]); + m_enginePtr->volt[1][pos[0]][pos[1]][pos[2]] *= m_enginePtr->Op->vv[1][pos[0]][pos[1]][pos[2]]; + m_enginePtr->volt[1][pos[0]][pos[1]][pos[2]] += m_enginePtr->Op->vi[1][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->curr[0][pos[0]][pos[1]][pos[2]] - m_enginePtr->curr[0][pos[0]][pos[1]][pos[2]-shift[2]] - m_enginePtr->curr[2][pos[0]][pos[1]][pos[2]] + m_enginePtr->curr[2][pos[0]-shift[0]][pos[1]][pos[2]]); //for x - m_enginePtr->volt[2][pos[0]][pos[1]][pos[2]] *= Op->vv[2][pos[0]][pos[1]][pos[2]]; - m_enginePtr->volt[2][pos[0]][pos[1]][pos[2]] += Op->vi[2][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->curr[1][pos[0]][pos[1]][pos[2]] - m_enginePtr->curr[1][pos[0]-shift[0]][pos[1]][pos[2]] - m_enginePtr->curr[0][pos[0]][pos[1]][pos[2]] + m_enginePtr->curr[0][pos[0]][pos[1]-shift[1]][pos[2]]); + m_enginePtr->volt[2][pos[0]][pos[1]][pos[2]] *= m_enginePtr->Op->vv[2][pos[0]][pos[1]][pos[2]]; + m_enginePtr->volt[2][pos[0]][pos[1]][pos[2]] += m_enginePtr->Op->vi[2][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->curr[1][pos[0]][pos[1]][pos[2]] - m_enginePtr->curr[1][pos[0]-shift[0]][pos[1]][pos[2]] - m_enginePtr->curr[0][pos[0]][pos[1]][pos[2]] + m_enginePtr->curr[0][pos[0]][pos[1]-shift[1]][pos[2]]); } } } @@ -200,22 +218,22 @@ void thread::operator()() //current updates for (pos[0]=m_start;pos[0]<=m_stop_h;++pos[0]) { - for (pos[1]=0;pos[1]numLines[1]-1;++pos[1]) + for (pos[1]=0;pos[1]Op->numLines[1]-1;++pos[1]) { - for (pos[2]=0;pos[2]numLines[2]-1;++pos[2]) + for (pos[2]=0;pos[2]Op->numLines[2]-1;++pos[2]) { //do the updates here //for x - m_enginePtr->curr[0][pos[0]][pos[1]][pos[2]] *= Op->ii[0][pos[0]][pos[1]][pos[2]]; - m_enginePtr->curr[0][pos[0]][pos[1]][pos[2]] += Op->iv[0][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->volt[2][pos[0]][pos[1]][pos[2]] - m_enginePtr->volt[2][pos[0]][pos[1]+1][pos[2]] - m_enginePtr->volt[1][pos[0]][pos[1]][pos[2]] + m_enginePtr->volt[1][pos[0]][pos[1]][pos[2]+1]); + m_enginePtr->curr[0][pos[0]][pos[1]][pos[2]] *= m_enginePtr->Op->ii[0][pos[0]][pos[1]][pos[2]]; + m_enginePtr->curr[0][pos[0]][pos[1]][pos[2]] += m_enginePtr->Op->iv[0][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->volt[2][pos[0]][pos[1]][pos[2]] - m_enginePtr->volt[2][pos[0]][pos[1]+1][pos[2]] - m_enginePtr->volt[1][pos[0]][pos[1]][pos[2]] + m_enginePtr->volt[1][pos[0]][pos[1]][pos[2]+1]); //for y - m_enginePtr->curr[1][pos[0]][pos[1]][pos[2]] *= Op->ii[1][pos[0]][pos[1]][pos[2]]; - m_enginePtr->curr[1][pos[0]][pos[1]][pos[2]] += Op->iv[1][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->volt[0][pos[0]][pos[1]][pos[2]] - m_enginePtr->volt[0][pos[0]][pos[1]][pos[2]+1] - m_enginePtr->volt[2][pos[0]][pos[1]][pos[2]] + m_enginePtr->volt[2][pos[0]+1][pos[1]][pos[2]]); + m_enginePtr->curr[1][pos[0]][pos[1]][pos[2]] *= m_enginePtr->Op->ii[1][pos[0]][pos[1]][pos[2]]; + m_enginePtr->curr[1][pos[0]][pos[1]][pos[2]] += m_enginePtr->Op->iv[1][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->volt[0][pos[0]][pos[1]][pos[2]] - m_enginePtr->volt[0][pos[0]][pos[1]][pos[2]+1] - m_enginePtr->volt[2][pos[0]][pos[1]][pos[2]] + m_enginePtr->volt[2][pos[0]+1][pos[1]][pos[2]]); //for x - m_enginePtr->curr[2][pos[0]][pos[1]][pos[2]] *= Op->ii[2][pos[0]][pos[1]][pos[2]]; - m_enginePtr->curr[2][pos[0]][pos[1]][pos[2]] += Op->iv[2][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->volt[1][pos[0]][pos[1]][pos[2]] - m_enginePtr->volt[1][pos[0]+1][pos[1]][pos[2]] - m_enginePtr->volt[0][pos[0]][pos[1]][pos[2]] + m_enginePtr->volt[0][pos[0]][pos[1]+1][pos[2]]); + m_enginePtr->curr[2][pos[0]][pos[1]][pos[2]] *= m_enginePtr->Op->ii[2][pos[0]][pos[1]][pos[2]]; + m_enginePtr->curr[2][pos[0]][pos[1]][pos[2]] += m_enginePtr->Op->iv[2][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->volt[1][pos[0]][pos[1]][pos[2]] - m_enginePtr->volt[1][pos[0]+1][pos[1]][pos[2]] - m_enginePtr->volt[0][pos[0]][pos[1]][pos[2]] + m_enginePtr->volt[0][pos[0]][pos[1]+1][pos[2]]); } } } @@ -230,45 +248,54 @@ void thread::operator()() //soft current excitation here (H-field excite) - ++m_enginePtr->m_numTS_times_threads; + if (m_threadID == 0) + ++m_enginePtr->numTS; // only the first thread increments numTS } m_enginePtr->m_stopBarrier->wait(); } + + //DBG().cout() << "Thread " << m_threadID << " (" << boost::this_thread::get_id() << ") finished." << endl; } +} // namespace + +// +// ************************************************************************************************************************* +// +namespace NS_Engine_Multithread { thread_e_excitation::thread_e_excitation( Engine_Multithread* ptr ) { m_enginePtr = ptr; - Op = m_enginePtr->Op; - m_stopThread = false; -// volt = m_enginePtr->volt; -// curr = m_enginePtr->curr; } void thread_e_excitation::operator()() { //std::cout << "thread_e_excitation::operator()" << std::endl; + //DBG().cout() << "Thread e_excitation (" << boost::this_thread::get_id() << ") started." << endl; - while (!m_stopThread) { + int exc_pos; + const unsigned int E_Exc_Count = m_enginePtr->Op->E_Exc_Count; - // waiting on thread + while (!m_enginePtr->m_stopThreads) + { + // waiting on NS_Engine_Multithread::thread m_enginePtr->m_barrier1->wait(); - int exc_pos; - unsigned int numTS = m_enginePtr->m_numTS_times_threads / m_enginePtr->m_numThreads; - - //soft voltage excitation here (E-field excite) - for (unsigned int n=0;nE_Exc_Count;++n) + // soft voltage excitation here (E-field excite) + for (unsigned int n=0;nE_Exc_delay[n]; - exc_pos*= (exc_pos>0 && exc_pos<=(int)Op->ExciteLength); - // if (n==0) cerr << numTS << " => " << Op->ExciteSignal[exc_pos] << endl; - m_enginePtr->volt[Op->E_Exc_dir[n]][Op->E_Exc_index[0][n]][Op->E_Exc_index[1][n]][Op->E_Exc_index[2][n]] += Op->E_Exc_amp[n]*Op->ExciteSignal[exc_pos]; + exc_pos = (int)m_enginePtr->numTS - (int)m_enginePtr->Op->E_Exc_delay[n]; + exc_pos*= (exc_pos>0 && exc_pos<=(int)m_enginePtr->Op->ExciteLength); + m_enginePtr->volt[m_enginePtr->Op->E_Exc_dir[n]][m_enginePtr->Op->E_Exc_index[0][n]][m_enginePtr->Op->E_Exc_index[1][n]][m_enginePtr->Op->E_Exc_index[2][n]] += m_enginePtr->Op->E_Exc_amp[n]*m_enginePtr->Op->ExciteSignal[exc_pos]; } - // continueing thread + // continue NS_Engine_Multithread::thread m_enginePtr->m_barrier2->wait(); } + + //DBG().cout() << "Thread e_excitation (" << boost::this_thread::get_id() << ") finished." << endl; } + +} // namespace diff --git a/FDTD/engine_multithread.h b/FDTD/engine_multithread.h index 12c9a8c..ef3623d 100644 --- a/FDTD/engine_multithread.h +++ b/FDTD/engine_multithread.h @@ -26,21 +26,54 @@ #include #include -//debug -class Timer { -public: - Timer() {gettimeofday(&t1,NULL);} - double elapsed() {gettimeofday(&t2,NULL); return (t2.tv_sec-t1.tv_sec) + (t2.tv_usec-t1.tv_usec)*1e-6;} -protected: - timeval t1,t2; -}; +class Engine_Multithread; + +namespace NS_Engine_Multithread { + + class DBG { // debug + public: + DBG() {} + ~DBG() { std::cout << os.str();} + std::ostringstream& cout() {return os;} + protected: + std::ostringstream os; + }; + + class Timer { //debug + public: + Timer() {gettimeofday(&t1,NULL);} + double elapsed() {gettimeofday(&t2,NULL); return (t2.tv_sec-t1.tv_sec) + (t2.tv_usec-t1.tv_usec)*1e-6;} + protected: + timeval t1,t2; + }; + + class thread { + public: + thread( Engine_Multithread* ptr, unsigned int start, unsigned int stop, unsigned int stop_h, unsigned int threadID ); + void operator()(); + + protected: + unsigned int m_start, m_stop, m_stop_h, m_threadID; + Engine_Multithread *m_enginePtr; + }; + + class thread_e_excitation { + public: + thread_e_excitation( Engine_Multithread* ptr); + void operator()(); + + protected: + Engine_Multithread *m_enginePtr; + }; +} // namespace + class Engine_Multithread : public Engine { - friend class thread; - friend class thread_e_excitation; + friend class NS_Engine_Multithread::thread; + friend class NS_Engine_Multithread::thread_e_excitation; public: - static Engine_Multithread* createEngine(Operator* op, unsigned int numThreads = 0); + static Engine_Multithread* createEngine(const Operator* op, unsigned int numThreads = 0); virtual ~Engine_Multithread(); virtual void setNumThreads( unsigned int numThreads ); @@ -50,48 +83,17 @@ public: //!Iterate a number of timesteps virtual bool IterateTS(unsigned int iterTS); - virtual unsigned int GetNumberOfTimesteps() {return m_numTS_times_threads / m_numThreads;} - protected: - Engine_Multithread(Operator* op); + Engine_Multithread(const Operator* op); boost::thread_group m_thread_group; boost::barrier *m_barrier1, *m_barrier2, *m_barrier3, *m_startBarrier, *m_stopBarrier; volatile unsigned int m_iterTS; - volatile unsigned int m_numTS_times_threads; //!< numTS times the number of worker threads unsigned int m_numThreads; //!< number of worker threads + volatile bool m_stopThreads; #ifdef ENABLE_DEBUG_TIME std::map > m_timer_list; #endif }; - - -class thread { -public: - thread( Engine_Multithread* ptr, unsigned int start, unsigned int stop, unsigned int stop_h ); - void operator()(); - -protected: - unsigned int m_start, m_stop, m_stop_h; - volatile bool m_stopThread; - Engine_Multithread *m_enginePtr; - Operator *Op; -// FDTD_FLOAT**** volt; -// FDTD_FLOAT**** curr; -}; - -class thread_e_excitation { -public: - thread_e_excitation( Engine_Multithread* ptr); - void operator()(); - -protected: - volatile bool m_stopThread; - Engine_Multithread *m_enginePtr; - Operator *Op; -// FDTD_FLOAT**** volt; -// FDTD_FLOAT**** curr; -}; - #endif // ENGINE_MULTITHREAD_H