From 6673aefd70c01dec3be12f27717f9a10563c817b Mon Sep 17 00:00:00 2001 From: Thorsten Liebig Date: Fri, 6 Jan 2023 20:01:07 +0100 Subject: [PATCH] engine: try to find optimal number of engine threads Signed-off-by: Thorsten Liebig --- FDTD/engine.h | 2 + FDTD/engine_multithread.cpp | 129 +++++++++++++++++++++------- FDTD/engine_multithread.h | 7 +- FDTD/operator_cylinder.cpp | 2 +- FDTD/operator_cylindermultigrid.cpp | 2 +- FDTD/operator_multithread.cpp | 5 +- FDTD/operator_multithread.h | 1 + openems.cpp | 8 ++ openems.h | 2 +- 9 files changed, 122 insertions(+), 36 deletions(-) diff --git a/FDTD/engine.h b/FDTD/engine.h index 72d17e1..3377f17 100644 --- a/FDTD/engine.h +++ b/FDTD/engine.h @@ -47,6 +47,8 @@ public: virtual unsigned int GetNumberOfTimesteps() {return numTS;} + virtual void NextInterval(float curr_speed) {}; + //this access functions muss be overloaded by any new engine using a different storage model inline virtual FDTD_FLOAT GetVolt( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { return volt[n][x][y][z]; } inline virtual FDTD_FLOAT GetVolt( unsigned int n, const unsigned int pos[3] ) const { return volt[n][pos[0]][pos[1]][pos[2]]; } diff --git a/FDTD/engine_multithread.cpp b/FDTD/engine_multithread.cpp index 7ebe0d7..b67b654 100644 --- a/FDTD/engine_multithread.cpp +++ b/FDTD/engine_multithread.cpp @@ -55,6 +55,12 @@ Engine_Multithread::Engine_Multithread(const Operator_Multithread* op) : ENGINE_ m_IterateBarrier = 0; m_startBarrier = 0; m_stopBarrier = 0; + m_thread_group = 0; + m_max_numThreads = boost::thread::hardware_concurrency(); + m_numThreads = 0; + m_last_speed = 0; + m_opt_speed = false; + m_stopThreads = true; #ifdef ENABLE_DEBUG_TIME m_MPI_Barrier = 0; @@ -92,27 +98,91 @@ void Engine_Multithread::setNumThreads( unsigned int numThreads ) void Engine_Multithread::Init() { m_stopThreads = true; + m_opt_speed = false; ENGINE_MULTITHREAD_BASE::Init(); // initialize threads m_stopThreads = false; if (m_numThreads == 0) - m_numThreads = boost::thread::hardware_concurrency(); + { + m_opt_speed = true; + m_numThreads = 1; + } + else if (m_numThreads > m_max_numThreads) + m_numThreads = m_max_numThreads; + +#ifdef MPI_SUPPORT + m_MPI_Barrier = 0; +#endif + this->changeNumThreads(m_numThreads); +} + +void Engine_Multithread::Reset() +{ + if (!m_stopThreads) // prevent multiple invocations + { + ClearExtensions(); //prevent extensions from interfering with thread reset... + + // stop the threads + //NS_Engine_Multithread::DBG().cout() << "stopping all threads" << endl; + m_iterTS = 1; + m_startBarrier->wait(); // start the threads + m_stopThreads = true; + m_stopBarrier->wait(); // wait for the threads to finish + m_thread_group->join_all(); // wait for termination + delete m_IterateBarrier; + m_IterateBarrier = 0; + delete m_startBarrier; + m_startBarrier = 0; + delete m_stopBarrier; + m_stopBarrier = 0; + delete m_thread_group; + m_thread_group = 0; + } + + ENGINE_MULTITHREAD_BASE::Reset(); +} + +void Engine_Multithread::changeNumThreads(unsigned int numThreads) +{ + if (m_thread_group!=0) + { + m_thread_group->interrupt_all(); + //m_stopThreads = true; + //m_startBarrier->wait(); // start the threads + m_thread_group->join_all(); // wait for termination + + delete m_thread_group; + m_thread_group = 0; + //m_stopThreads = false; + } + + m_numThreads = numThreads; + + if (g_settings.GetVerboseLevel()>0) + cout << "Multithreaded engine using " << m_numThreads << " threads. Utilization: ("; vector m_Start_Lines; vector m_Stop_Lines; m_Op_MT->CalcStartStopLines( m_numThreads, m_Start_Lines, m_Stop_Lines ); - if (g_settings.GetVerboseLevel()>0) - cout << "Multithreaded engine using " << m_numThreads << " threads. Utilization: ("; + if (m_IterateBarrier!=0) + delete m_IterateBarrier; + // make sure all threads are waiting m_IterateBarrier = new boost::barrier(m_numThreads); // numThread workers + if (m_startBarrier!=0) + delete m_startBarrier; m_startBarrier = new boost::barrier(m_numThreads+1); // numThread workers + 1 controller + + if (m_stopBarrier!=0) + delete m_stopBarrier; m_stopBarrier = new boost::barrier(m_numThreads+1); // numThread workers + 1 controller #ifdef MPI_SUPPORT m_MPI_Barrier = 0; #endif + m_thread_group = new boost::thread_group(); for (unsigned int n=0; nwait(); // start the threads - m_stopThreads = true; - m_stopBarrier->wait(); // wait for the threads to finish - m_thread_group.join_all(); // wait for termination - delete m_IterateBarrier; - m_IterateBarrier = 0; - delete m_startBarrier; - m_startBarrier = 0; - delete m_stopBarrier; - m_stopBarrier = 0; - } - - ENGINE_MULTITHREAD_BASE::Reset(); -} - bool Engine_Multithread::IterateTS(unsigned int iterTS) { m_iterTS = iterTS; - //cout << "bool Engine_Multithread::IterateTS(): starting threads ..."; + //cerr << "bool Engine_Multithread::IterateTS(): starting threads ..."; m_startBarrier->wait(); // start the threads - //cout << "... threads started"; + //cerr << "... threads started" << endl; m_stopBarrier->wait(); // wait for the threads to finish time steps + return true; } +void Engine_Multithread::NextInterval(float curr_speed) +{ + ENGINE_MULTITHREAD_BASE::NextInterval(curr_speed); + if (!m_opt_speed) return; + if (curr_speedchangeNumThreads(m_numThreads-1); + cout << "Multithreaded Engine: Best performance found using " << m_numThreads << " threads." << std::endl; + m_opt_speed = false; + } + else if (m_numThreadschangeNumThreads(m_numThreads+1); + } +} + void Engine_Multithread::DoPreVoltageUpdates(int threadID) { //execute extensions in reverse order -> highest priority gets access to the voltages last @@ -269,6 +333,11 @@ void thread::operator()() m_enginePtr->m_startBarrier->wait(); //cout << "Thread " << boost::this_thread::get_id() << " waiting... started." << endl; + if (m_enginePtr->m_stopThreads) + { + return; + } + DEBUG_TIME( Timer timer1 ); for (unsigned int iter=0; iterm_iterTS; ++iter) diff --git a/FDTD/engine_multithread.h b/FDTD/engine_multithread.h index dce3b48..1912e84 100644 --- a/FDTD/engine_multithread.h +++ b/FDTD/engine_multithread.h @@ -90,6 +90,7 @@ public: virtual void setNumThreads( unsigned int numThreads ); virtual void Init(); virtual void Reset(); + virtual void NextInterval(float curr_speed); //! Iterate \a iterTS number of timesteps virtual bool IterateTS(unsigned int iterTS); @@ -104,13 +105,17 @@ public: protected: Engine_Multithread(const Operator_Multithread* op); + void changeNumThreads(unsigned int numThreads); const Operator_Multithread* m_Op_MT; - boost::thread_group m_thread_group; + boost::thread_group *m_thread_group; boost::barrier *m_startBarrier, *m_stopBarrier; boost::barrier *m_IterateBarrier; volatile unsigned int m_iterTS; unsigned int m_numThreads; //!< number of worker threads + unsigned int m_max_numThreads; //!< max. number of worker threads volatile bool m_stopThreads; + bool m_opt_speed; + float m_last_speed; #ifdef MPI_SUPPORT /*! Workaround needed for subgridding scheme... (see Engine_CylinderMultiGrid) diff --git a/FDTD/operator_cylinder.cpp b/FDTD/operator_cylinder.cpp index 342415f..5bf772a 100644 --- a/FDTD/operator_cylinder.cpp +++ b/FDTD/operator_cylinder.cpp @@ -46,7 +46,7 @@ Operator_Cylinder::~Operator_Cylinder() Engine* Operator_Cylinder::CreateEngine() { //! create a special cylindrical-engine - m_Engine = Engine_Cylinder::New(this, m_numThreads); + m_Engine = Engine_Cylinder::New(this, m_orig_numThreads); return m_Engine; } diff --git a/FDTD/operator_cylindermultigrid.cpp b/FDTD/operator_cylindermultigrid.cpp index 88f9f82..87f8817 100644 --- a/FDTD/operator_cylindermultigrid.cpp +++ b/FDTD/operator_cylindermultigrid.cpp @@ -51,7 +51,7 @@ Operator_CylinderMultiGrid* Operator_CylinderMultiGrid::New(vector Split Engine* Operator_CylinderMultiGrid::CreateEngine() { - m_Engine = Engine_CylinderMultiGrid::New(this,m_numThreads); + m_Engine = Engine_CylinderMultiGrid::New(this, m_orig_numThreads); return m_Engine; } diff --git a/FDTD/operator_multithread.cpp b/FDTD/operator_multithread.cpp index 9a602a6..21cd290 100644 --- a/FDTD/operator_multithread.cpp +++ b/FDTD/operator_multithread.cpp @@ -36,11 +36,12 @@ Operator_Multithread::~Operator_Multithread() void Operator_Multithread::setNumThreads( unsigned int numThreads ) { m_numThreads = numThreads; + m_orig_numThreads = numThreads; } Engine* Operator_Multithread::CreateEngine() { - m_Engine = Engine_Multithread::New(this,m_numThreads); + m_Engine = Engine_Multithread::New(this, m_orig_numThreads); return m_Engine; } @@ -106,7 +107,7 @@ void Operator_Multithread::CalcStartStopLines(unsigned int &numThreads, vector boost::thread::hardware_concurrency())) m_numThreads = boost::thread::hardware_concurrency(); vector m_Start_Lines; diff --git a/FDTD/operator_multithread.h b/FDTD/operator_multithread.h index 65f4748..623691b 100644 --- a/FDTD/operator_multithread.h +++ b/FDTD/operator_multithread.h @@ -64,6 +64,7 @@ protected: boost::thread_group m_thread_group; unsigned int m_numThreads; // number of worker threads + unsigned int m_orig_numThreads; //! Calculate the start/stop lines for the multithreading operator and engine. /*! diff --git a/openems.cpp b/openems.cpp index 5f70f98..affb25c 100644 --- a/openems.cpp +++ b/openems.cpp @@ -245,6 +245,13 @@ bool openEMS::parseCommandLineArgument( const char *argv ) return false; } +void openEMS::SetNumberOfThreads(int val) +{ + if ((val<0) || (val>boost::thread::hardware_concurrency())) + val = boost::thread::hardware_concurrency(); + m_engine_numThreads = val; +} + string openEMS::GetExtLibsInfo(string prefix) { stringstream str; @@ -1203,6 +1210,7 @@ void openEMS::RunFDTD() if (m_DumpStats) DumpRunStatistics(__OPENEMS_RUN_STAT_FILE__, t_run, currTS, speed, currE); + FDTD_Eng->NextInterval(speed); } } if ((change>endCrit) && (FDTD_Op->GetExcitationSignal()->GetExciteType()==0)) diff --git a/openems.h b/openems.h index 17dee96..2ff3aee 100644 --- a/openems.h +++ b/openems.h @@ -76,7 +76,7 @@ public: void SetTimeStepFactor(double val) {m_TS_fac=val;} void SetMaxTime(double val) {m_maxTime=val;} - void SetNumberOfThreads(unsigned int val) {m_engine_numThreads = val;} + void SetNumberOfThreads(int val); void DebugMaterial() {DebugMat=true;} void DebugOperator() {DebugOp=true;}