diff --git a/FDTD/operator_sse_compressed.cpp b/FDTD/operator_sse_compressed.cpp index ce1da9b..b5ad45b 100644 --- a/FDTD/operator_sse_compressed.cpp +++ b/FDTD/operator_sse_compressed.cpp @@ -44,7 +44,7 @@ Operator_SSE_Compressed::~Operator_SSE_Compressed() Engine* Operator_SSE_Compressed::CreateEngine() const { - if (!m_Use_Compression) + if (!m_Use_Compression) { //! create a default sse-engine Engine_sse* eng = Engine_sse::New(this); @@ -75,9 +75,9 @@ void Operator_SSE_Compressed::Reset() if (m_Op_index) { unsigned int pos[3]; - for (pos[0]=0;pos[0]size() << endl; + cout << "SSE compression enabled\t: " << (m_Use_Compression?"yes":"no") << endl; + cout << "Unique SSE operators\t: " << f4_vv_Compressed->size() << endl; cout << "-----------------------------------" << endl; } @@ -130,22 +130,22 @@ void Operator_SSE_Compressed::ShowStat() const INLINE int equal(f4vector v1, f4vector v2) { #if defined(__SSE__) - v4sf compare = __builtin_ia32_cmpeqps( v1.v, v2.v ); // hmm should return v4si... - return __builtin_ia32_movmskps( compare ) == 0x0f; + v4sf compare = __builtin_ia32_cmpeqps( v1.v, v2.v ); // hmm should return v4si... + return __builtin_ia32_movmskps( compare ) == 0x0f; #else - return ( - v1.f[0] == v2.f[0] && - v1.f[1] == v2.f[1] && - v1.f[2] == v2.f[2] && - v1.f[3] == v2.f[3] - ); + return ( + v1.f[0] == v2.f[0] && + v1.f[1] == v2.f[1] && + v1.f[2] == v2.f[2] && + v1.f[3] == v2.f[3] + ); #endif } bool Operator_SSE_Compressed::CompareOperators(unsigned int pos1[3], unsigned int pos2[3]) { // cerr << pos1[0] << " " << pos1[1] << " " << pos1[2] << endl; - for (int n=0;n<3;++n) + for (int n=0; n<3; ++n) { if (!equal( f4_vv[n][pos1[0]][pos1[1]][pos1[2]], f4_vv[n][pos2[0]][pos2[1]][pos2[2]] )) return false; if (!equal( f4_vi[n][pos1[0]][pos1[1]][pos1[2]], f4_vi[n][pos2[0]][pos2[1]][pos2[2]] )) return false; @@ -159,43 +159,43 @@ bool Operator_SSE_Compressed::CompressOperator() { cout << "Compressing the FDTD operator... this may take a while..." << endl; - map lookUpMap; + map lookUpMap; unsigned int pos[3]; - for (pos[0]=0;pos[0]::iterator it; - it = lookUpMap.find(c); - if (it == lookUpMap.end()) - { - // not found -> insert - unsigned int index = f4_vv_Compressed[0].size(); - for (int n=0; n<3; n++) - { - f4_vv_Compressed[n].push_back( vv[n] ); - f4_vi_Compressed[n].push_back( vi[n] ); - f4_iv_Compressed[n].push_back( iv[n] ); - f4_ii_Compressed[n].push_back( ii[n] ); - } - lookUpMap[c] = index; - m_Op_index[pos[0]][pos[1]][pos[2]] = index; - } - else - { - // this operator is already in the list - unsigned int index = (*it).second; - m_Op_index[pos[0]][pos[1]][pos[2]] = index; - } + map::iterator it; + it = lookUpMap.find(c); + if (it == lookUpMap.end()) + { + // not found -> insert + unsigned int index = f4_vv_Compressed[0].size(); + for (int n=0; n<3; n++) + { + f4_vv_Compressed[n].push_back( vv[n] ); + f4_vi_Compressed[n].push_back( vi[n] ); + f4_iv_Compressed[n].push_back( iv[n] ); + f4_ii_Compressed[n].push_back( ii[n] ); + } + lookUpMap[c] = index; + m_Op_index[pos[0]][pos[1]][pos[2]] = index; + } + else + { + // this operator is already in the list + unsigned int index = (*it).second; + m_Op_index[pos[0]][pos[1]][pos[2]] = index; + } } } } @@ -219,67 +219,68 @@ bool Operator_SSE_Compressed::CompressOperator() SSE_coeff::SSE_coeff( f4vector vv[3], f4vector vi[3], f4vector iv[3], f4vector ii[3] ) { - for (int n=0; n<3; n++) { - m_vv[n] = vv[n]; - m_vi[n] = vi[n]; - m_iv[n] = iv[n]; - m_ii[n] = ii[n]; - } + for (int n=0; n<3; n++) + { + m_vv[n] = vv[n]; + m_vi[n] = vi[n]; + m_iv[n] = iv[n]; + m_ii[n] = ii[n]; + } } bool SSE_coeff::operator==( const SSE_coeff& other ) const { - for (int n=0; n<3; n++) - { - if (memcmp( &(m_vv[n]), &(other.m_vv[n]), sizeof(f4vector) ) != 0) return false; - if (memcmp( &(m_vi[n]), &(other.m_vi[n]), sizeof(f4vector) ) != 0) return false; - if (memcmp( &(m_iv[n]), &(other.m_iv[n]), sizeof(f4vector) ) != 0) return false; - if (memcmp( &(m_ii[n]), &(other.m_ii[n]), sizeof(f4vector) ) != 0) return false; - } - return true; + for (int n=0; n<3; n++) + { + if (memcmp( &(m_vv[n]), &(other.m_vv[n]), sizeof(f4vector) ) != 0) return false; + if (memcmp( &(m_vi[n]), &(other.m_vi[n]), sizeof(f4vector) ) != 0) return false; + if (memcmp( &(m_iv[n]), &(other.m_iv[n]), sizeof(f4vector) ) != 0) return false; + if (memcmp( &(m_ii[n]), &(other.m_ii[n]), sizeof(f4vector) ) != 0) return false; + } + return true; } bool SSE_coeff::operator!=( const SSE_coeff& other ) const { - return !(*this == other); + return !(*this == other); } bool SSE_coeff::operator<( const SSE_coeff& other ) const { - for (int n=0; n<3; n++) - { - for (int c=0; c<4; c++) - { - if (m_vv[n].f[c] > other.m_vv[n].f[c]) return false; - if (m_vv[n].f[c] < other.m_vv[n].f[c]) return true; - if (m_vi[n].f[c] > other.m_vi[n].f[c]) return false; - if (m_vi[n].f[c] < other.m_vi[n].f[c]) return true; - if (m_iv[n].f[c] > other.m_iv[n].f[c]) return false; - if (m_iv[n].f[c] < other.m_iv[n].f[c]) return true; - if (m_ii[n].f[c] > other.m_ii[n].f[c]) return false; - if (m_ii[n].f[c] < other.m_ii[n].f[c]) return true; - } - } - return false; + for (int n=0; n<3; n++) + { + for (int c=0; c<4; c++) + { + if (m_vv[n].f[c] > other.m_vv[n].f[c]) return false; + if (m_vv[n].f[c] < other.m_vv[n].f[c]) return true; + if (m_vi[n].f[c] > other.m_vi[n].f[c]) return false; + if (m_vi[n].f[c] < other.m_vi[n].f[c]) return true; + if (m_iv[n].f[c] > other.m_iv[n].f[c]) return false; + if (m_iv[n].f[c] < other.m_iv[n].f[c]) return true; + if (m_ii[n].f[c] > other.m_ii[n].f[c]) return false; + if (m_ii[n].f[c] < other.m_ii[n].f[c]) return true; + } + } + return false; } void SSE_coeff::print( ostream& stream ) const { - stream << "SSE_coeff: (" << endl; - for (int n=0; n<3; n++) - { - stream << "n=" << n << ":" << endl; - stream << "vv="; - for (int c=0; c<4; c++) - stream << m_vv[n].f[c] << " "; - stream << endl << "vi="; - for (int c=0; c<4; c++) - stream << m_vi[n].f[c] << " "; - stream << endl << "iv="; - for (int c=0; c<4; c++) - stream << m_iv[n].f[c] << " "; - stream << endl << "ii="; - for (int c=0; c<4; c++) - stream << m_ii[n].f[c] << " "; - stream << endl; - } - stream << ")" << endl; + stream << "SSE_coeff: (" << endl; + for (int n=0; n<3; n++) + { + stream << "n=" << n << ":" << endl; + stream << "vv="; + for (int c=0; c<4; c++) + stream << m_vv[n].f[c] << " "; + stream << endl << "vi="; + for (int c=0; c<4; c++) + stream << m_vi[n].f[c] << " "; + stream << endl << "iv="; + for (int c=0; c<4; c++) + stream << m_iv[n].f[c] << " "; + stream << endl << "ii="; + for (int c=0; c<4; c++) + stream << m_ii[n].f[c] << " "; + stream << endl; + } + stream << ")" << endl; } diff --git a/FDTD/operator_sse_compressed.h b/FDTD/operator_sse_compressed.h index 30a1f3d..5b9e4d2 100644 --- a/FDTD/operator_sse_compressed.h +++ b/FDTD/operator_sse_compressed.h @@ -23,16 +23,16 @@ class SSE_coeff { public: - SSE_coeff( f4vector vv[3], f4vector vi[3], f4vector iv[3], f4vector ii[3] ); - bool operator==( const SSE_coeff& ) const; - bool operator!=( const SSE_coeff& ) const; - bool operator<( const SSE_coeff& ) const; - void print( ostream& stream ) const; + SSE_coeff( f4vector vv[3], f4vector vi[3], f4vector iv[3], f4vector ii[3] ); + bool operator==( const SSE_coeff& ) const; + bool operator!=( const SSE_coeff& ) const; + bool operator<( const SSE_coeff& ) const; + void print( ostream& stream ) const; protected: - f4vector m_vv[3]; - f4vector m_vi[3]; - f4vector m_iv[3]; - f4vector m_ii[3]; + f4vector m_vv[3]; + f4vector m_vi[3]; + f4vector m_iv[3]; + f4vector m_ii[3]; }; class Operator_SSE_Compressed : public Operator_sse @@ -46,11 +46,11 @@ public: virtual int CalcECOperator(); - inline virtual FDTD_FLOAT& GetVV( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) { if (m_Use_Compression) return f4_vv_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetVV(n,x,y,z);} - inline virtual FDTD_FLOAT& GetVI( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) { if (m_Use_Compression) return f4_vi_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetVI(n,x,y,z);} + inline virtual FDTD_FLOAT& GetVV( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) { if (m_Use_Compression) return f4_vv_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetVV(n,x,y,z);} + inline virtual FDTD_FLOAT& GetVI( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) { if (m_Use_Compression) return f4_vi_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetVI(n,x,y,z);} - inline virtual FDTD_FLOAT& GetII( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) { if (m_Use_Compression) return f4_ii_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetII(n,x,y,z);} - inline virtual FDTD_FLOAT& GetIV( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) { if (m_Use_Compression) return f4_iv_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetIV(n,x,y,z);} + inline virtual FDTD_FLOAT& GetII( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) { if (m_Use_Compression) return f4_ii_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetII(n,x,y,z);} + inline virtual FDTD_FLOAT& GetIV( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) { if (m_Use_Compression) return f4_iv_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetIV(n,x,y,z);} virtual void ShowStat() const; @@ -70,10 +70,10 @@ protected: // engine needs access public: unsigned int*** m_Op_index; - vector f4_vv_Compressed[3]; //!< coefficient: calc new voltage from old voltage - vector f4_vi_Compressed[3]; //!< coefficient: calc new voltage from old current - vector f4_iv_Compressed[3]; //!< coefficient: calc new current from old current - vector f4_ii_Compressed[3]; //!< coefficient: calc new current from old voltage + vector f4_vv_Compressed[3]; //!< coefficient: calc new voltage from old voltage + vector f4_vi_Compressed[3]; //!< coefficient: calc new voltage from old current + vector f4_iv_Compressed[3]; //!< coefficient: calc new current from old current + vector f4_ii_Compressed[3]; //!< coefficient: calc new current from old voltage };