diff --git a/FDTD/operator_sse_compressed.cpp b/FDTD/operator_sse_compressed.cpp index f89de23..ce1da9b 100644 --- a/FDTD/operator_sse_compressed.cpp +++ b/FDTD/operator_sse_compressed.cpp @@ -20,7 +20,8 @@ #include "engine_sse.h" #include "tools/array_ops.h" -#include +#include +#include Operator_SSE_Compressed* Operator_SSE_Compressed::New() { @@ -34,19 +35,6 @@ Operator_SSE_Compressed::Operator_SSE_Compressed() : Operator_sse() { m_Op_index = NULL; m_Use_Compression = false; - m_Max_Compression = false; - - m_Compression_Threshold = 0.8; - - for (int n=0;n<3;++n) - { - f4_vv_Compressed[n]=NULL; - f4_vi_Compressed[n]=NULL; - f4_ii_Compressed[n]=NULL; - f4_iv_Compressed[n]=NULL; - } - - m_max_fifo = 0; } Operator_SSE_Compressed::~Operator_SSE_Compressed() @@ -56,7 +44,7 @@ Operator_SSE_Compressed::~Operator_SSE_Compressed() Engine* Operator_SSE_Compressed::CreateEngine() const { - if (m_Use_Compression==false) + if (!m_Use_Compression) { //! create a default sse-engine Engine_sse* eng = Engine_sse::New(this); @@ -78,13 +66,6 @@ void Operator_SSE_Compressed::Init() { Operator_sse::Init(); m_Op_index = NULL; - for (int n=0;n<3;++n) - { - f4_vv_Compressed[n]=NULL; - f4_vi_Compressed[n]=NULL; - f4_ii_Compressed[n]=NULL; - f4_iv_Compressed[n]=NULL; - } } void Operator_SSE_Compressed::Reset() @@ -106,17 +87,13 @@ void Operator_SSE_Compressed::Reset() m_Op_index = NULL; } - for (int n=0;n<3;++n) - { - Delete1DArray_v4sf(f4_vv_Compressed[n]); - f4_vv_Compressed[n]=NULL; - Delete1DArray_v4sf(f4_vi_Compressed[n]); - f4_vi_Compressed[n]=NULL; - Delete1DArray_v4sf(f4_ii_Compressed[n]); - f4_ii_Compressed[n]=NULL; - Delete1DArray_v4sf(f4_iv_Compressed[n]); - f4_iv_Compressed[n]=NULL; - } + for (int n=0; n<3; n++) + { + f4_vv_Compressed[n].clear(); + f4_vi_Compressed[n].clear(); + f4_iv_Compressed[n].clear(); + f4_ii_Compressed[n].clear(); + } } void Operator_SSE_Compressed::InitOperator() @@ -143,10 +120,8 @@ void Operator_SSE_Compressed::ShowStat() const { Operator_sse::ShowStat(); - string yes_no[2] = {"no","yes"}; - cout << "SSE compression enabled\t: " << yes_no[m_Use_Compression] << endl; - cout << "Unique SSE operator\t: " << m_Op_Count << endl; - cout << "Optimal compression\t: " << yes_no[m_Max_Compression] << endl; + cout << "SSE compression enabled\t: " << (m_Use_Compression?"yes":"no") << endl; + cout << "Unique SSE operators\t: " << f4_vv_Compressed->size() << endl; cout << "-----------------------------------" << endl; } @@ -176,14 +151,6 @@ bool Operator_SSE_Compressed::CompareOperators(unsigned int pos1[3], unsigned in if (!equal( f4_vi[n][pos1[0]][pos1[1]][pos1[2]], f4_vi[n][pos2[0]][pos2[1]][pos2[2]] )) return false; if (!equal( f4_iv[n][pos1[0]][pos1[1]][pos1[2]], f4_iv[n][pos2[0]][pos2[1]][pos2[2]] )) return false; if (!equal( f4_ii[n][pos1[0]][pos1[1]][pos1[2]], f4_ii[n][pos2[0]][pos2[1]][pos2[2]] )) return false; - -// for (int m=0;m<4;++m) -// { -// if (f4_vv[n][pos1[0]][pos1[1]][pos1[2]].f[m] != f4_vv[n][pos2[0]][pos2[1]][pos2[2]].f[m]) return false; -// if (f4_vi[n][pos1[0]][pos1[1]][pos1[2]].f[m] != f4_vi[n][pos2[0]][pos2[1]][pos2[2]].f[m]) return false; -// if (f4_iv[n][pos1[0]][pos1[1]][pos1[2]].f[m] != f4_iv[n][pos2[0]][pos2[1]][pos2[2]].f[m]) return false; -// if (f4_ii[n][pos1[0]][pos1[1]][pos1[2]].f[m] != f4_ii[n][pos2[0]][pos2[1]][pos2[2]].f[m]) return false; -// } } return true; } @@ -192,85 +159,47 @@ bool Operator_SSE_Compressed::CompressOperator() { cout << "Compressing the FDTD operator... this may take a while..." << endl; - if (m_max_fifo==0) - { - m_max_fifo = numVectors*numLines[1] + 1; -// cerr << m_max_fifo << endl; - } + map lookUpMap; - m_Max_Compression = true; - - list fifo; - vector index_list[3]; - - bool found; unsigned int pos[3]; - unsigned int index_pos[3]; - list::iterator it; for (pos[0]=0;pos[0]m_max_fifo) - { - fifo.pop_back(); - m_Max_Compression = false; - } + f4vector vv[3] = { f4_vv[0][pos[0]][pos[1]][pos[2]], f4_vv[1][pos[0]][pos[1]][pos[2]], f4_vv[2][pos[0]][pos[1]][pos[2]] }; + f4vector vi[3] = { f4_vi[0][pos[0]][pos[1]][pos[2]], f4_vi[1][pos[0]][pos[1]][pos[2]], f4_vi[2][pos[0]][pos[1]][pos[2]] }; + f4vector iv[3] = { f4_iv[0][pos[0]][pos[1]][pos[2]], f4_iv[1][pos[0]][pos[1]][pos[2]], f4_iv[2][pos[0]][pos[1]][pos[2]] }; + f4vector ii[3] = { f4_ii[0][pos[0]][pos[1]][pos[2]], f4_ii[1][pos[0]][pos[1]][pos[2]], f4_ii[2][pos[0]][pos[1]][pos[2]] }; + SSE_coeff c( vv, vi, iv, ii ); + + map::iterator it; + it = lookUpMap.find(c); + if (it == lookUpMap.end()) + { + // not found -> insert + unsigned int index = f4_vv_Compressed[0].size(); + for (int n=0; n<3; n++) + { + f4_vv_Compressed[n].push_back( vv[n] ); + f4_vi_Compressed[n].push_back( vi[n] ); + f4_iv_Compressed[n].push_back( iv[n] ); + f4_ii_Compressed[n].push_back( ii[n] ); + } + lookUpMap[c] = index; + m_Op_index[pos[0]][pos[1]][pos[2]] = index; + } + else + { + // this operator is already in the list + unsigned int index = (*it).second; + m_Op_index[pos[0]][pos[1]][pos[2]] = index; + } } } } - m_Op_Count = index_list[0].size(); - if ( ((double)m_Op_Count/(double)GetNumberCells()) > m_Compression_Threshold) - { - cerr << "Operator_SSE_Compressed::CompressOperator: Warning: Compression unsuccessful, ratio is " << (double)m_Op_Count/(double)GetNumberCells() << "! Fallback to conventional sse-engine..." << endl; - return false; - } -// cerr << "total found: " << index_list[0].size() << endl; - - for (int n=0;n<3;++n) - { - f4_vv_Compressed[n] = Create1DArray_v4sf(m_Op_Count); - f4_vi_Compressed[n] = Create1DArray_v4sf(m_Op_Count); - f4_ii_Compressed[n] = Create1DArray_v4sf(m_Op_Count); - f4_iv_Compressed[n] = Create1DArray_v4sf(m_Op_Count); - - for (unsigned int m=0;m other.m_vv[n].f[c]) return false; + if (m_vv[n].f[c] < other.m_vv[n].f[c]) return true; + if (m_vi[n].f[c] > other.m_vi[n].f[c]) return false; + if (m_vi[n].f[c] < other.m_vi[n].f[c]) return true; + if (m_iv[n].f[c] > other.m_iv[n].f[c]) return false; + if (m_iv[n].f[c] < other.m_iv[n].f[c]) return true; + if (m_ii[n].f[c] > other.m_ii[n].f[c]) return false; + if (m_ii[n].f[c] < other.m_ii[n].f[c]) return true; + } + } + return false; +} + +void SSE_coeff::print( ostream& stream ) const +{ + stream << "SSE_coeff: (" << endl; + for (int n=0; n<3; n++) + { + stream << "n=" << n << ":" << endl; + stream << "vv="; + for (int c=0; c<4; c++) + stream << m_vv[n].f[c] << " "; + stream << endl << "vi="; + for (int c=0; c<4; c++) + stream << m_vi[n].f[c] << " "; + stream << endl << "iv="; + for (int c=0; c<4; c++) + stream << m_iv[n].f[c] << " "; + stream << endl << "ii="; + for (int c=0; c<4; c++) + stream << m_ii[n].f[c] << " "; + stream << endl; + } + stream << ")" << endl; +} diff --git a/FDTD/operator_sse_compressed.h b/FDTD/operator_sse_compressed.h index 30465f5..30a1f3d 100644 --- a/FDTD/operator_sse_compressed.h +++ b/FDTD/operator_sse_compressed.h @@ -20,6 +20,21 @@ #include "operator_sse.h" +class SSE_coeff +{ +public: + SSE_coeff( f4vector vv[3], f4vector vi[3], f4vector iv[3], f4vector ii[3] ); + bool operator==( const SSE_coeff& ) const; + bool operator!=( const SSE_coeff& ) const; + bool operator<( const SSE_coeff& ) const; + void print( ostream& stream ) const; +protected: + f4vector m_vv[3]; + f4vector m_vi[3]; + f4vector m_iv[3]; + f4vector m_ii[3]; +}; + class Operator_SSE_Compressed : public Operator_sse { public: @@ -31,44 +46,35 @@ public: virtual int CalcECOperator(); - inline virtual FDTD_FLOAT& GetVV( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { if (m_Use_Compression) return f4_vv_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetVV(n,x,y,z);} - inline virtual FDTD_FLOAT& GetVI( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { if (m_Use_Compression) return f4_vi_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetVI(n,x,y,z);} + inline virtual FDTD_FLOAT& GetVV( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) { if (m_Use_Compression) return f4_vv_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetVV(n,x,y,z);} + inline virtual FDTD_FLOAT& GetVI( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) { if (m_Use_Compression) return f4_vi_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetVI(n,x,y,z);} - inline virtual FDTD_FLOAT& GetII( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { if (m_Use_Compression) return f4_ii_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetII(n,x,y,z);} - inline virtual FDTD_FLOAT& GetIV( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { if (m_Use_Compression) return f4_iv_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetIV(n,x,y,z);} + inline virtual FDTD_FLOAT& GetII( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) { if (m_Use_Compression) return f4_ii_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetII(n,x,y,z);} + inline virtual FDTD_FLOAT& GetIV( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) { if (m_Use_Compression) return f4_iv_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetIV(n,x,y,z);} virtual void ShowStat() const; - void SetCompressionThreshold(double val) {m_Compression_Threshold = val;} - void SetCacheSize(unsigned int size) {m_max_fifo = size;} bool CompressOperator(); protected: Operator_SSE_Compressed(); bool m_Use_Compression; - bool m_Max_Compression; //reached maximal compression? - unsigned int m_Op_Count; - - //! Compression ratio threshold. If this ratio is exceeded, compression will be disabled. \sa SetCompressionThreshold() - double m_Compression_Threshold; //default is 0.8 virtual void Init(); virtual void Reset(); virtual void InitOperator(); - //! Compression cache size, default is 1000 \sa SetCacheSize() - unsigned int m_max_fifo; - bool CompareOperators(unsigned int pos1[3], unsigned int pos2[3]); // engine needs access public: unsigned int*** m_Op_index; - f4vector* f4_vv_Compressed[3]; //calc new voltage from old voltage - f4vector* f4_vi_Compressed[3]; //calc new voltage from old current - f4vector* f4_iv_Compressed[3]; //calc new current from old current - f4vector* f4_ii_Compressed[3]; //calc new current from old voltage + vector f4_vv_Compressed[3]; //!< coefficient: calc new voltage from old voltage + vector f4_vi_Compressed[3]; //!< coefficient: calc new voltage from old current + vector f4_iv_Compressed[3]; //!< coefficient: calc new current from old current + vector f4_ii_Compressed[3]; //!< coefficient: calc new current from old voltage + }; #endif // OPERATOR_SSE_Compressed_H