multithreaded engine works

but it's slow...
pull/1/head
Sebastian Held 2010-04-01 16:11:55 +02:00
parent b9dea98687
commit 37ff221c18
2 changed files with 139 additions and 110 deletions

View File

@ -32,10 +32,9 @@
#include "boost/date_time/gregorian/gregorian.hpp"
#include <iomanip>
//! \brief construct an Engine_Multithread instance
//! it's the responsibility of the caller to free the returned pointer
Engine_Multithread* Engine_Multithread::createEngine(Operator* op, unsigned int numThreads)
Engine_Multithread* Engine_Multithread::createEngine(const Operator* op, unsigned int numThreads)
{
Engine_Multithread* e = new Engine_Multithread(op);
e->setNumThreads( numThreads );
@ -43,28 +42,29 @@ Engine_Multithread* Engine_Multithread::createEngine(Operator* op, unsigned int
return e;
}
Engine_Multithread::Engine_Multithread(Operator* op) : Engine(op)
Engine_Multithread::Engine_Multithread(const Operator* op) : Engine(op)
{
}
Engine_Multithread::~Engine_Multithread()
{
#ifdef ENABLE_DEBUG_TIME
cout << "Engine_Multithread::~Engine_Multithread()" << endl;
NS_Engine_Multithread::DBG().cout() << "Engine_Multithread::~Engine_Multithread()" << endl;
std::map<boost::thread::id, std::vector<double> >::iterator it;
for (it=m_timer_list.begin(); it!=m_timer_list.end(); it++) {
std::cout << "*** DEBUG Thread: " << it->first << std::endl;
NS_Engine_Multithread::DBG().cout() << "*** DEBUG Thread: " << it->first << std::endl;
std::vector<double>::iterator it2;
for (it2=it->second.begin(); it2<it->second.end();) {
std::cout << "after voltage update, before barrier1: " << fixed << setprecision(6) << *(it2++) << std::endl;
std::cout << "after barrier1, before barrier2: " << fixed << setprecision(6) << *(it2++) << std::endl;
std::cout << "after barrier2, before current update: " << fixed << setprecision(6) << *(it2++) << std::endl;
std::cout << "after current update, before barrier3: " << fixed << setprecision(6) << *(it2++) << std::endl;
std::cout << "after barrier3: " << fixed << setprecision(6) << *(it2++) << std::endl;
NS_Engine_Multithread::DBG().cout() << "after voltage update, before barrier1: " << fixed << setprecision(6) << *(it2++) << std::endl;
NS_Engine_Multithread::DBG().cout() << "after barrier1, before barrier2: " << fixed << setprecision(6) << *(it2++) << std::endl;
NS_Engine_Multithread::DBG().cout() << "after barrier2, before current update: " << fixed << setprecision(6) << *(it2++) << std::endl;
NS_Engine_Multithread::DBG().cout() << "after current update, before barrier3: " << fixed << setprecision(6) << *(it2++) << std::endl;
NS_Engine_Multithread::DBG().cout() << "after barrier3: " << fixed << setprecision(6) << *(it2++) << std::endl;
}
}
#endif
Reset();
}
void Engine_Multithread::setNumThreads( unsigned int numThreads )
@ -74,35 +74,55 @@ void Engine_Multithread::setNumThreads( unsigned int numThreads )
void Engine_Multithread::Init()
{
Engine::Init();
m_numTS_times_threads = 0;
Engine::Init(); // gets cleaned up by Engine::~Engine()
// initialize threads
m_stopThreads = false;
if (m_numThreads == 0)
m_numThreads = boost::thread::hardware_concurrency();
std::cout << "using " << m_numThreads << " threads" << std::endl;
cout << "using " << m_numThreads << " threads" << std::endl;
m_barrier1 = new boost::barrier(m_numThreads+1); // numThread workers + 1 excitation thread
m_barrier2 = new boost::barrier(m_numThreads+1); // numThread workers + 1 excitation thread
m_barrier3 = new boost::barrier(m_numThreads); // numThread workers
m_startBarrier = new boost::barrier(m_numThreads+1); // numThread workers + 1 controller
m_stopBarrier = new boost::barrier(m_numThreads+1); // numThread workers + 1 controller
unsigned int linesPerThread = round((float)Op->numLines[0] / (float)m_numThreads);
for (unsigned int n=0; n<m_numThreads; n++) {
unsigned int linesPerThread = (Op->numLines[0]+m_numThreads-1) / m_numThreads;
unsigned int start = n * linesPerThread;
unsigned int stop = min( (n+1) * linesPerThread - 1, Op->numLines[0]-1 );
unsigned int stop_h = (n!=m_numThreads-1)?stop:stop-1;
std::cout << "###DEBUG## Thread " << n << ": start=" << start << " stop=" << stop << " stop_h=" << stop_h << std::endl;
boost::thread *t = new boost::thread( thread(this,start,stop,stop_h) );
unsigned int stop = (n+1) * linesPerThread - 1;
unsigned int stop_h = stop;
if (n == m_numThreads-1) {
// last thread
stop = Op->numLines[0]-1;
stop_h = stop-1;
}
//NS_Engine_Multithread::DBG().cout() << "###DEBUG## Thread " << n << ": start=" << start << " stop=" << stop << " stop_h=" << stop_h << std::endl;
boost::thread *t = new boost::thread( NS_Engine_Multithread::thread(this,start,stop,stop_h,n) );
m_thread_group.add_thread( t );
}
boost::thread *t = new boost::thread( thread_e_excitation(this) );
boost::thread *t = new boost::thread( NS_Engine_Multithread::thread_e_excitation(this) );
m_thread_group.add_thread( t );
}
void Engine_Multithread::Reset()
{
if (!m_stopThreads) {
// prevent multiple invocations
// stop the threads
//NS_Engine_Multithread::DBG().cout() << "stopping all threads" << endl;
m_iterTS = 1;
m_startBarrier->wait(); // start the threads
m_stopThreads = true;
m_stopBarrier->wait(); // wait for the threads to finish
m_thread_group.join_all(); // wait for termination
delete m_barrier1; m_barrier1 = 0;
delete m_barrier2; m_barrier2 = 0;
delete m_barrier3; m_barrier3 = 0;
delete m_startBarrier; m_startBarrier = 0;
delete m_stopBarrier; m_stopBarrier = 0;
}
Engine::Reset();
}
@ -110,43 +130,41 @@ void Engine_Multithread::Reset()
bool Engine_Multithread::IterateTS(unsigned int iterTS)
{
m_iterTS = iterTS;
//cout << "bool Engine_Multithread::IterateTS(): starting threads ...";
m_startBarrier->wait(); // start the threads
//cout << "... threads started";
m_stopBarrier->wait(); // wait for the threads to finish <iterTS> time steps
numTS = m_numTS_times_threads / m_numThreads;
return true;
}
//
// *************************************************************************************************************************
//
namespace NS_Engine_Multithread {
thread::thread( Engine_Multithread* ptr, unsigned int start, unsigned int stop, unsigned int stop_h )
thread::thread( Engine_Multithread* ptr, unsigned int start, unsigned int stop, unsigned int stop_h, unsigned int threadID )
{
m_enginePtr = ptr;
m_start = start;
m_stop = stop;
m_stop_h = stop_h;
Op = m_enginePtr->Op;
m_stopThread = false;
// volt = m_enginePtr->volt;
// curr = m_enginePtr->curr;
m_threadID = threadID;
}
void thread::operator()()
{
//std::cout << "thread::operator() Parameters: " << m_start << " " << m_stop << std::endl;
//DBG().cout() << "Thread " << m_threadID << " (" << boost::this_thread::get_id() << ") started." << endl;
unsigned int pos[3];
bool shift[3];
while (!m_stopThread) {
while (!m_enginePtr->m_stopThreads) {
// wait for start
//cout << "Thread " << boost::this_thread::get_id() << " waiting..." << endl;
//DBG().cout() << "Thread " << m_threadID << " (" << boost::this_thread::get_id() << ") waiting..." << endl;
m_enginePtr->m_startBarrier->wait();
//cout << "Thread " << boost::this_thread::get_id() << " waiting... started." << endl;
@ -158,24 +176,24 @@ void thread::operator()()
for (pos[0]=m_start;pos[0]<=m_stop;++pos[0])
{
shift[0]=pos[0];
for (pos[1]=0;pos[1]<Op->numLines[1];++pos[1])
for (pos[1]=0;pos[1]<m_enginePtr->Op->numLines[1];++pos[1])
{
shift[1]=pos[1];
for (pos[2]=0;pos[2]<Op->numLines[2];++pos[2])
for (pos[2]=0;pos[2]<m_enginePtr->Op->numLines[2];++pos[2])
{
shift[2]=pos[2];
//do the updates here
//for x
m_enginePtr->volt[0][pos[0]][pos[1]][pos[2]] *= Op->vv[0][pos[0]][pos[1]][pos[2]];
m_enginePtr->volt[0][pos[0]][pos[1]][pos[2]] += Op->vi[0][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->curr[2][pos[0]][pos[1]][pos[2]] - m_enginePtr->curr[2][pos[0]][pos[1]-shift[1]][pos[2]] - m_enginePtr->curr[1][pos[0]][pos[1]][pos[2]] + m_enginePtr->curr[1][pos[0]][pos[1]][pos[2]-shift[2]]);
m_enginePtr->volt[0][pos[0]][pos[1]][pos[2]] *= m_enginePtr->Op->vv[0][pos[0]][pos[1]][pos[2]];
m_enginePtr->volt[0][pos[0]][pos[1]][pos[2]] += m_enginePtr->Op->vi[0][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->curr[2][pos[0]][pos[1]][pos[2]] - m_enginePtr->curr[2][pos[0]][pos[1]-shift[1]][pos[2]] - m_enginePtr->curr[1][pos[0]][pos[1]][pos[2]] + m_enginePtr->curr[1][pos[0]][pos[1]][pos[2]-shift[2]]);
//for y
m_enginePtr->volt[1][pos[0]][pos[1]][pos[2]] *= Op->vv[1][pos[0]][pos[1]][pos[2]];
m_enginePtr->volt[1][pos[0]][pos[1]][pos[2]] += Op->vi[1][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->curr[0][pos[0]][pos[1]][pos[2]] - m_enginePtr->curr[0][pos[0]][pos[1]][pos[2]-shift[2]] - m_enginePtr->curr[2][pos[0]][pos[1]][pos[2]] + m_enginePtr->curr[2][pos[0]-shift[0]][pos[1]][pos[2]]);
m_enginePtr->volt[1][pos[0]][pos[1]][pos[2]] *= m_enginePtr->Op->vv[1][pos[0]][pos[1]][pos[2]];
m_enginePtr->volt[1][pos[0]][pos[1]][pos[2]] += m_enginePtr->Op->vi[1][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->curr[0][pos[0]][pos[1]][pos[2]] - m_enginePtr->curr[0][pos[0]][pos[1]][pos[2]-shift[2]] - m_enginePtr->curr[2][pos[0]][pos[1]][pos[2]] + m_enginePtr->curr[2][pos[0]-shift[0]][pos[1]][pos[2]]);
//for x
m_enginePtr->volt[2][pos[0]][pos[1]][pos[2]] *= Op->vv[2][pos[0]][pos[1]][pos[2]];
m_enginePtr->volt[2][pos[0]][pos[1]][pos[2]] += Op->vi[2][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->curr[1][pos[0]][pos[1]][pos[2]] - m_enginePtr->curr[1][pos[0]-shift[0]][pos[1]][pos[2]] - m_enginePtr->curr[0][pos[0]][pos[1]][pos[2]] + m_enginePtr->curr[0][pos[0]][pos[1]-shift[1]][pos[2]]);
m_enginePtr->volt[2][pos[0]][pos[1]][pos[2]] *= m_enginePtr->Op->vv[2][pos[0]][pos[1]][pos[2]];
m_enginePtr->volt[2][pos[0]][pos[1]][pos[2]] += m_enginePtr->Op->vi[2][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->curr[1][pos[0]][pos[1]][pos[2]] - m_enginePtr->curr[1][pos[0]-shift[0]][pos[1]][pos[2]] - m_enginePtr->curr[0][pos[0]][pos[1]][pos[2]] + m_enginePtr->curr[0][pos[0]][pos[1]-shift[1]][pos[2]]);
}
}
}
@ -200,22 +218,22 @@ void thread::operator()()
//current updates
for (pos[0]=m_start;pos[0]<=m_stop_h;++pos[0])
{
for (pos[1]=0;pos[1]<Op->numLines[1]-1;++pos[1])
for (pos[1]=0;pos[1]<m_enginePtr->Op->numLines[1]-1;++pos[1])
{
for (pos[2]=0;pos[2]<Op->numLines[2]-1;++pos[2])
for (pos[2]=0;pos[2]<m_enginePtr->Op->numLines[2]-1;++pos[2])
{
//do the updates here
//for x
m_enginePtr->curr[0][pos[0]][pos[1]][pos[2]] *= Op->ii[0][pos[0]][pos[1]][pos[2]];
m_enginePtr->curr[0][pos[0]][pos[1]][pos[2]] += Op->iv[0][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->volt[2][pos[0]][pos[1]][pos[2]] - m_enginePtr->volt[2][pos[0]][pos[1]+1][pos[2]] - m_enginePtr->volt[1][pos[0]][pos[1]][pos[2]] + m_enginePtr->volt[1][pos[0]][pos[1]][pos[2]+1]);
m_enginePtr->curr[0][pos[0]][pos[1]][pos[2]] *= m_enginePtr->Op->ii[0][pos[0]][pos[1]][pos[2]];
m_enginePtr->curr[0][pos[0]][pos[1]][pos[2]] += m_enginePtr->Op->iv[0][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->volt[2][pos[0]][pos[1]][pos[2]] - m_enginePtr->volt[2][pos[0]][pos[1]+1][pos[2]] - m_enginePtr->volt[1][pos[0]][pos[1]][pos[2]] + m_enginePtr->volt[1][pos[0]][pos[1]][pos[2]+1]);
//for y
m_enginePtr->curr[1][pos[0]][pos[1]][pos[2]] *= Op->ii[1][pos[0]][pos[1]][pos[2]];
m_enginePtr->curr[1][pos[0]][pos[1]][pos[2]] += Op->iv[1][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->volt[0][pos[0]][pos[1]][pos[2]] - m_enginePtr->volt[0][pos[0]][pos[1]][pos[2]+1] - m_enginePtr->volt[2][pos[0]][pos[1]][pos[2]] + m_enginePtr->volt[2][pos[0]+1][pos[1]][pos[2]]);
m_enginePtr->curr[1][pos[0]][pos[1]][pos[2]] *= m_enginePtr->Op->ii[1][pos[0]][pos[1]][pos[2]];
m_enginePtr->curr[1][pos[0]][pos[1]][pos[2]] += m_enginePtr->Op->iv[1][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->volt[0][pos[0]][pos[1]][pos[2]] - m_enginePtr->volt[0][pos[0]][pos[1]][pos[2]+1] - m_enginePtr->volt[2][pos[0]][pos[1]][pos[2]] + m_enginePtr->volt[2][pos[0]+1][pos[1]][pos[2]]);
//for x
m_enginePtr->curr[2][pos[0]][pos[1]][pos[2]] *= Op->ii[2][pos[0]][pos[1]][pos[2]];
m_enginePtr->curr[2][pos[0]][pos[1]][pos[2]] += Op->iv[2][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->volt[1][pos[0]][pos[1]][pos[2]] - m_enginePtr->volt[1][pos[0]+1][pos[1]][pos[2]] - m_enginePtr->volt[0][pos[0]][pos[1]][pos[2]] + m_enginePtr->volt[0][pos[0]][pos[1]+1][pos[2]]);
m_enginePtr->curr[2][pos[0]][pos[1]][pos[2]] *= m_enginePtr->Op->ii[2][pos[0]][pos[1]][pos[2]];
m_enginePtr->curr[2][pos[0]][pos[1]][pos[2]] += m_enginePtr->Op->iv[2][pos[0]][pos[1]][pos[2]] * ( m_enginePtr->volt[1][pos[0]][pos[1]][pos[2]] - m_enginePtr->volt[1][pos[0]+1][pos[1]][pos[2]] - m_enginePtr->volt[0][pos[0]][pos[1]][pos[2]] + m_enginePtr->volt[0][pos[0]][pos[1]+1][pos[2]]);
}
}
}
@ -230,45 +248,54 @@ void thread::operator()()
//soft current excitation here (H-field excite)
++m_enginePtr->m_numTS_times_threads;
if (m_threadID == 0)
++m_enginePtr->numTS; // only the first thread increments numTS
}
m_enginePtr->m_stopBarrier->wait();
}
//DBG().cout() << "Thread " << m_threadID << " (" << boost::this_thread::get_id() << ") finished." << endl;
}
} // namespace
//
// *************************************************************************************************************************
//
namespace NS_Engine_Multithread {
thread_e_excitation::thread_e_excitation( Engine_Multithread* ptr )
{
m_enginePtr = ptr;
Op = m_enginePtr->Op;
m_stopThread = false;
// volt = m_enginePtr->volt;
// curr = m_enginePtr->curr;
}
void thread_e_excitation::operator()()
{
//std::cout << "thread_e_excitation::operator()" << std::endl;
while (!m_stopThread) {
// waiting on thread
m_enginePtr->m_barrier1->wait();
//DBG().cout() << "Thread e_excitation (" << boost::this_thread::get_id() << ") started." << endl;
int exc_pos;
unsigned int numTS = m_enginePtr->m_numTS_times_threads / m_enginePtr->m_numThreads;
const unsigned int E_Exc_Count = m_enginePtr->Op->E_Exc_Count;
while (!m_enginePtr->m_stopThreads)
{
// waiting on NS_Engine_Multithread::thread
m_enginePtr->m_barrier1->wait();
// soft voltage excitation here (E-field excite)
for (unsigned int n=0;n<Op->E_Exc_Count;++n)
for (unsigned int n=0;n<E_Exc_Count;++n)
{
exc_pos = (int)numTS - (int)Op->E_Exc_delay[n];
exc_pos*= (exc_pos>0 && exc_pos<=(int)Op->ExciteLength);
// if (n==0) cerr << numTS << " => " << Op->ExciteSignal[exc_pos] << endl;
m_enginePtr->volt[Op->E_Exc_dir[n]][Op->E_Exc_index[0][n]][Op->E_Exc_index[1][n]][Op->E_Exc_index[2][n]] += Op->E_Exc_amp[n]*Op->ExciteSignal[exc_pos];
exc_pos = (int)m_enginePtr->numTS - (int)m_enginePtr->Op->E_Exc_delay[n];
exc_pos*= (exc_pos>0 && exc_pos<=(int)m_enginePtr->Op->ExciteLength);
m_enginePtr->volt[m_enginePtr->Op->E_Exc_dir[n]][m_enginePtr->Op->E_Exc_index[0][n]][m_enginePtr->Op->E_Exc_index[1][n]][m_enginePtr->Op->E_Exc_index[2][n]] += m_enginePtr->Op->E_Exc_amp[n]*m_enginePtr->Op->ExciteSignal[exc_pos];
}
// continueing thread
// continue NS_Engine_Multithread::thread
m_enginePtr->m_barrier2->wait();
}
//DBG().cout() << "Thread e_excitation (" << boost::this_thread::get_id() << ") finished." << endl;
}
} // namespace

View File

@ -26,8 +26,20 @@
#include <boost/fusion/container/list/list_fwd.hpp>
#include <boost/fusion/include/list_fwd.hpp>
//debug
class Timer {
class Engine_Multithread;
namespace NS_Engine_Multithread {
class DBG { // debug
public:
DBG() {}
~DBG() { std::cout << os.str();}
std::ostringstream& cout() {return os;}
protected:
std::ostringstream os;
};
class Timer { //debug
public:
Timer() {gettimeofday(&t1,NULL);}
double elapsed() {gettimeofday(&t2,NULL); return (t2.tv_sec-t1.tv_sec) + (t2.tv_usec-t1.tv_usec)*1e-6;}
@ -35,12 +47,33 @@ protected:
timeval t1,t2;
};
class thread {
public:
thread( Engine_Multithread* ptr, unsigned int start, unsigned int stop, unsigned int stop_h, unsigned int threadID );
void operator()();
protected:
unsigned int m_start, m_stop, m_stop_h, m_threadID;
Engine_Multithread *m_enginePtr;
};
class thread_e_excitation {
public:
thread_e_excitation( Engine_Multithread* ptr);
void operator()();
protected:
Engine_Multithread *m_enginePtr;
};
} // namespace
class Engine_Multithread : public Engine
{
friend class thread;
friend class thread_e_excitation;
friend class NS_Engine_Multithread::thread;
friend class NS_Engine_Multithread::thread_e_excitation;
public:
static Engine_Multithread* createEngine(Operator* op, unsigned int numThreads = 0);
static Engine_Multithread* createEngine(const Operator* op, unsigned int numThreads = 0);
virtual ~Engine_Multithread();
virtual void setNumThreads( unsigned int numThreads );
@ -50,48 +83,17 @@ public:
//!Iterate a number of timesteps
virtual bool IterateTS(unsigned int iterTS);
virtual unsigned int GetNumberOfTimesteps() {return m_numTS_times_threads / m_numThreads;}
protected:
Engine_Multithread(Operator* op);
Engine_Multithread(const Operator* op);
boost::thread_group m_thread_group;
boost::barrier *m_barrier1, *m_barrier2, *m_barrier3, *m_startBarrier, *m_stopBarrier;
volatile unsigned int m_iterTS;
volatile unsigned int m_numTS_times_threads; //!< numTS times the number of worker threads
unsigned int m_numThreads; //!< number of worker threads
volatile bool m_stopThreads;
#ifdef ENABLE_DEBUG_TIME
std::map<boost::thread::id, std::vector<double> > m_timer_list;
#endif
};
class thread {
public:
thread( Engine_Multithread* ptr, unsigned int start, unsigned int stop, unsigned int stop_h );
void operator()();
protected:
unsigned int m_start, m_stop, m_stop_h;
volatile bool m_stopThread;
Engine_Multithread *m_enginePtr;
Operator *Op;
// FDTD_FLOAT**** volt;
// FDTD_FLOAT**** curr;
};
class thread_e_excitation {
public:
thread_e_excitation( Engine_Multithread* ptr);
void operator()();
protected:
volatile bool m_stopThread;
Engine_Multithread *m_enginePtr;
Operator *Op;
// FDTD_FLOAT**** volt;
// FDTD_FLOAT**** curr;
};
#endif // ENGINE_MULTITHREAD_H