/* * Copyright (C) 2010 Thorsten Liebig (Thorsten.Liebig@gmx.de) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "operator_sse_compressed.h" #include "engine_sse_compressed.h" #include "engine_sse.h" #include "tools/array_ops.h" #include #include Operator_SSE_Compressed* Operator_SSE_Compressed::New() { cout << "Create FDTD operator (compressed SSE)" << endl; Operator_SSE_Compressed* op = new Operator_SSE_Compressed(); op->Init(); return op; } Operator_SSE_Compressed::Operator_SSE_Compressed() : Operator_sse() { m_Op_index = NULL; m_Use_Compression = false; } Operator_SSE_Compressed::~Operator_SSE_Compressed() { Reset(); } Engine* Operator_SSE_Compressed::CreateEngine() const { if (!m_Use_Compression) { //! create a default sse-engine Engine_sse* eng = Engine_sse::New(this); return eng; } Engine_SSE_Compressed* eng = Engine_SSE_Compressed::New(this); return eng; } void Operator_SSE_Compressed::DumpOperator2File(string filename) { if (m_Use_Compression) { cerr << "Operator_SSE_Compressed::DumpOperator2File: Warning: Operator dump not implemented for a compressed operator yet, try disabling operator compression." << endl; return; } else Operator_sse::DumpOperator2File(filename); } int Operator_SSE_Compressed::CalcECOperator() { Operator_sse::CalcECOperator(); m_Use_Compression = CompressOperator(); return 0; } void Operator_SSE_Compressed::Init() { Operator_sse::Init(); m_Op_index = NULL; } void Operator_SSE_Compressed::Reset() { Operator_sse::Reset(); if (m_Op_index) { Delete3DArray( m_Op_index, numLines ); m_Op_index = 0; } for (int n=0; n<3; n++) { f4_vv_Compressed[n].clear(); f4_vi_Compressed[n].clear(); f4_iv_Compressed[n].clear(); f4_ii_Compressed[n].clear(); } } void Operator_SSE_Compressed::InitOperator() { Operator_sse::InitOperator(); m_Op_index = Create3DArray( numLines ); } void Operator_SSE_Compressed::ShowStat() const { Operator_sse::ShowStat(); cout << "SSE compression enabled\t: " << (m_Use_Compression?"yes":"no") << endl; cout << "Unique SSE operators\t: " << f4_vv_Compressed->size() << endl; cout << "-----------------------------------" << endl; } // see http://www.informit.com/articles/article.aspx?p=710752&seqNum=6 #define INLINE inline extern __attribute__((always_inline)) INLINE int equal(f4vector v1, f4vector v2) { #if defined(__SSE__) #if (__GNUC__ == 4) && (__GNUC_MINOR__ < 4) v4si compare = __builtin_ia32_cmpeqps( v1.v, v2.v ); return __builtin_ia32_movmskps( (v4sf)compare ) == 0x0f; #else v4sf compare = __builtin_ia32_cmpeqps( v1.v, v2.v ); return __builtin_ia32_movmskps( compare ) == 0x0f; #endif #else return ( v1.f[0] == v2.f[0] && v1.f[1] == v2.f[1] && v1.f[2] == v2.f[2] && v1.f[3] == v2.f[3] ); #endif } bool Operator_SSE_Compressed::CompareOperators(unsigned int pos1[3], unsigned int pos2[3]) { // cerr << pos1[0] << " " << pos1[1] << " " << pos1[2] << endl; for (int n=0; n<3; ++n) { if (!equal( f4_vv[n][pos1[0]][pos1[1]][pos1[2]], f4_vv[n][pos2[0]][pos2[1]][pos2[2]] )) return false; if (!equal( f4_vi[n][pos1[0]][pos1[1]][pos1[2]], f4_vi[n][pos2[0]][pos2[1]][pos2[2]] )) return false; if (!equal( f4_iv[n][pos1[0]][pos1[1]][pos1[2]], f4_iv[n][pos2[0]][pos2[1]][pos2[2]] )) return false; if (!equal( f4_ii[n][pos1[0]][pos1[1]][pos1[2]], f4_ii[n][pos2[0]][pos2[1]][pos2[2]] )) return false; } return true; } bool Operator_SSE_Compressed::CompressOperator() { cout << "Compressing the FDTD operator... this may take a while..." << endl; map lookUpMap; unsigned int pos[3]; for (pos[0]=0; pos[0]::iterator it; it = lookUpMap.find(c); if (it == lookUpMap.end()) { // not found -> insert unsigned int index = f4_vv_Compressed[0].size(); for (int n=0; n<3; n++) { f4_vv_Compressed[n].push_back( vv[n] ); f4_vi_Compressed[n].push_back( vi[n] ); f4_iv_Compressed[n].push_back( iv[n] ); f4_ii_Compressed[n].push_back( ii[n] ); } lookUpMap[c] = index; m_Op_index[pos[0]][pos[1]][pos[2]] = index; } else { // this operator is already in the list unsigned int index = (*it).second; m_Op_index[pos[0]][pos[1]][pos[2]] = index; } } } } Delete_N_3DArray_v4sf(f4_vv,numLines); Delete_N_3DArray_v4sf(f4_vi,numLines); Delete_N_3DArray_v4sf(f4_iv,numLines); Delete_N_3DArray_v4sf(f4_ii,numLines); f4_vv = 0; f4_vi = 0; f4_iv = 0; f4_ii = 0; return true; } // ---------------------------------------------------------------------------- SSE_coeff::SSE_coeff( f4vector vv[3], f4vector vi[3], f4vector iv[3], f4vector ii[3] ) { for (int n=0; n<3; n++) { m_vv[n] = vv[n]; m_vi[n] = vi[n]; m_iv[n] = iv[n]; m_ii[n] = ii[n]; } } bool SSE_coeff::operator==( const SSE_coeff& other ) const { for (int n=0; n<3; n++) { if (memcmp( &(m_vv[n]), &(other.m_vv[n]), sizeof(f4vector) ) != 0) return false; if (memcmp( &(m_vi[n]), &(other.m_vi[n]), sizeof(f4vector) ) != 0) return false; if (memcmp( &(m_iv[n]), &(other.m_iv[n]), sizeof(f4vector) ) != 0) return false; if (memcmp( &(m_ii[n]), &(other.m_ii[n]), sizeof(f4vector) ) != 0) return false; } return true; } bool SSE_coeff::operator!=( const SSE_coeff& other ) const { return !(*this == other); } bool SSE_coeff::operator<( const SSE_coeff& other ) const { for (int n=0; n<3; n++) { for (int c=0; c<4; c++) { if (m_vv[n].f[c] > other.m_vv[n].f[c]) return false; if (m_vv[n].f[c] < other.m_vv[n].f[c]) return true; if (m_vi[n].f[c] > other.m_vi[n].f[c]) return false; if (m_vi[n].f[c] < other.m_vi[n].f[c]) return true; if (m_iv[n].f[c] > other.m_iv[n].f[c]) return false; if (m_iv[n].f[c] < other.m_iv[n].f[c]) return true; if (m_ii[n].f[c] > other.m_ii[n].f[c]) return false; if (m_ii[n].f[c] < other.m_ii[n].f[c]) return true; } } return false; } void SSE_coeff::print( ostream& stream ) const { stream << "SSE_coeff: (" << endl; for (int n=0; n<3; n++) { stream << "n=" << n << ":" << endl; stream << "vv="; for (int c=0; c<4; c++) stream << m_vv[n].f[c] << " "; stream << endl << "vi="; for (int c=0; c<4; c++) stream << m_vi[n].f[c] << " "; stream << endl << "iv="; for (int c=0; c<4; c++) stream << m_iv[n].f[c] << " "; stream << endl << "ii="; for (int c=0; c<4; c++) stream << m_ii[n].f[c] << " "; stream << endl; } stream << ")" << endl; }