feature: SSE operator compression will now always work at maximum compression
parent
4d67bab7c7
commit
ea7047f7e7
|
@ -20,7 +20,8 @@
|
|||
#include "engine_sse.h"
|
||||
#include "tools/array_ops.h"
|
||||
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <cstring>
|
||||
|
||||
Operator_SSE_Compressed* Operator_SSE_Compressed::New()
|
||||
{
|
||||
|
@ -34,19 +35,6 @@ Operator_SSE_Compressed::Operator_SSE_Compressed() : Operator_sse()
|
|||
{
|
||||
m_Op_index = NULL;
|
||||
m_Use_Compression = false;
|
||||
m_Max_Compression = false;
|
||||
|
||||
m_Compression_Threshold = 0.8;
|
||||
|
||||
for (int n=0;n<3;++n)
|
||||
{
|
||||
f4_vv_Compressed[n]=NULL;
|
||||
f4_vi_Compressed[n]=NULL;
|
||||
f4_ii_Compressed[n]=NULL;
|
||||
f4_iv_Compressed[n]=NULL;
|
||||
}
|
||||
|
||||
m_max_fifo = 0;
|
||||
}
|
||||
|
||||
Operator_SSE_Compressed::~Operator_SSE_Compressed()
|
||||
|
@ -56,7 +44,7 @@ Operator_SSE_Compressed::~Operator_SSE_Compressed()
|
|||
|
||||
Engine* Operator_SSE_Compressed::CreateEngine() const
|
||||
{
|
||||
if (m_Use_Compression==false)
|
||||
if (!m_Use_Compression)
|
||||
{
|
||||
//! create a default sse-engine
|
||||
Engine_sse* eng = Engine_sse::New(this);
|
||||
|
@ -78,13 +66,6 @@ void Operator_SSE_Compressed::Init()
|
|||
{
|
||||
Operator_sse::Init();
|
||||
m_Op_index = NULL;
|
||||
for (int n=0;n<3;++n)
|
||||
{
|
||||
f4_vv_Compressed[n]=NULL;
|
||||
f4_vi_Compressed[n]=NULL;
|
||||
f4_ii_Compressed[n]=NULL;
|
||||
f4_iv_Compressed[n]=NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void Operator_SSE_Compressed::Reset()
|
||||
|
@ -106,17 +87,13 @@ void Operator_SSE_Compressed::Reset()
|
|||
m_Op_index = NULL;
|
||||
}
|
||||
|
||||
for (int n=0;n<3;++n)
|
||||
{
|
||||
Delete1DArray_v4sf(f4_vv_Compressed[n]);
|
||||
f4_vv_Compressed[n]=NULL;
|
||||
Delete1DArray_v4sf(f4_vi_Compressed[n]);
|
||||
f4_vi_Compressed[n]=NULL;
|
||||
Delete1DArray_v4sf(f4_ii_Compressed[n]);
|
||||
f4_ii_Compressed[n]=NULL;
|
||||
Delete1DArray_v4sf(f4_iv_Compressed[n]);
|
||||
f4_iv_Compressed[n]=NULL;
|
||||
}
|
||||
for (int n=0; n<3; n++)
|
||||
{
|
||||
f4_vv_Compressed[n].clear();
|
||||
f4_vi_Compressed[n].clear();
|
||||
f4_iv_Compressed[n].clear();
|
||||
f4_ii_Compressed[n].clear();
|
||||
}
|
||||
}
|
||||
|
||||
void Operator_SSE_Compressed::InitOperator()
|
||||
|
@ -143,10 +120,8 @@ void Operator_SSE_Compressed::ShowStat() const
|
|||
{
|
||||
Operator_sse::ShowStat();
|
||||
|
||||
string yes_no[2] = {"no","yes"};
|
||||
cout << "SSE compression enabled\t: " << yes_no[m_Use_Compression] << endl;
|
||||
cout << "Unique SSE operator\t: " << m_Op_Count << endl;
|
||||
cout << "Optimal compression\t: " << yes_no[m_Max_Compression] << endl;
|
||||
cout << "SSE compression enabled\t: " << (m_Use_Compression?"yes":"no") << endl;
|
||||
cout << "Unique SSE operators\t: " << f4_vv_Compressed->size() << endl;
|
||||
cout << "-----------------------------------" << endl;
|
||||
}
|
||||
|
||||
|
@ -176,14 +151,6 @@ bool Operator_SSE_Compressed::CompareOperators(unsigned int pos1[3], unsigned in
|
|||
if (!equal( f4_vi[n][pos1[0]][pos1[1]][pos1[2]], f4_vi[n][pos2[0]][pos2[1]][pos2[2]] )) return false;
|
||||
if (!equal( f4_iv[n][pos1[0]][pos1[1]][pos1[2]], f4_iv[n][pos2[0]][pos2[1]][pos2[2]] )) return false;
|
||||
if (!equal( f4_ii[n][pos1[0]][pos1[1]][pos1[2]], f4_ii[n][pos2[0]][pos2[1]][pos2[2]] )) return false;
|
||||
|
||||
// for (int m=0;m<4;++m)
|
||||
// {
|
||||
// if (f4_vv[n][pos1[0]][pos1[1]][pos1[2]].f[m] != f4_vv[n][pos2[0]][pos2[1]][pos2[2]].f[m]) return false;
|
||||
// if (f4_vi[n][pos1[0]][pos1[1]][pos1[2]].f[m] != f4_vi[n][pos2[0]][pos2[1]][pos2[2]].f[m]) return false;
|
||||
// if (f4_iv[n][pos1[0]][pos1[1]][pos1[2]].f[m] != f4_iv[n][pos2[0]][pos2[1]][pos2[2]].f[m]) return false;
|
||||
// if (f4_ii[n][pos1[0]][pos1[1]][pos1[2]].f[m] != f4_ii[n][pos2[0]][pos2[1]][pos2[2]].f[m]) return false;
|
||||
// }
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -192,85 +159,47 @@ bool Operator_SSE_Compressed::CompressOperator()
|
|||
{
|
||||
cout << "Compressing the FDTD operator... this may take a while..." << endl;
|
||||
|
||||
if (m_max_fifo==0)
|
||||
{
|
||||
m_max_fifo = numVectors*numLines[1] + 1;
|
||||
// cerr << m_max_fifo << endl;
|
||||
}
|
||||
map<SSE_coeff,unsigned int> lookUpMap;
|
||||
|
||||
m_Max_Compression = true;
|
||||
|
||||
list<unsigned int> fifo;
|
||||
vector<unsigned int> index_list[3];
|
||||
|
||||
bool found;
|
||||
unsigned int pos[3];
|
||||
unsigned int index_pos[3];
|
||||
list<unsigned int>::iterator it;
|
||||
for (pos[0]=0;pos[0]<numLines[0];++pos[0])
|
||||
{
|
||||
for (pos[1]=0;pos[1]<numLines[1];++pos[1])
|
||||
{
|
||||
for (pos[2]=0;pos[2]<numVectors;++pos[2])
|
||||
{
|
||||
found = false;
|
||||
for ( it=fifo.begin() ; it != fifo.end(); it++ )
|
||||
{
|
||||
index_pos[0] = index_list[0].at(*it);
|
||||
index_pos[1] = index_list[1].at(*it);
|
||||
index_pos[2] = index_list[2].at(*it);
|
||||
found = CompareOperators(index_pos, pos);
|
||||
if (found)
|
||||
{
|
||||
m_Op_index[pos[0]][pos[1]][pos[2]] = *it;
|
||||
fifo.push_front(*it); //push already existing value to the front
|
||||
fifo.erase(it);
|
||||
it = fifo.end();
|
||||
++it;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found==false)
|
||||
{
|
||||
fifo.push_front(index_list[0].size());
|
||||
m_Op_index[pos[0]][pos[1]][pos[2]] = index_list[0].size();
|
||||
index_list[0].push_back(pos[0]);
|
||||
index_list[1].push_back(pos[1]);
|
||||
index_list[2].push_back(pos[2]);
|
||||
}
|
||||
if (fifo.size()>m_max_fifo)
|
||||
{
|
||||
fifo.pop_back();
|
||||
m_Max_Compression = false;
|
||||
}
|
||||
f4vector vv[3] = { f4_vv[0][pos[0]][pos[1]][pos[2]], f4_vv[1][pos[0]][pos[1]][pos[2]], f4_vv[2][pos[0]][pos[1]][pos[2]] };
|
||||
f4vector vi[3] = { f4_vi[0][pos[0]][pos[1]][pos[2]], f4_vi[1][pos[0]][pos[1]][pos[2]], f4_vi[2][pos[0]][pos[1]][pos[2]] };
|
||||
f4vector iv[3] = { f4_iv[0][pos[0]][pos[1]][pos[2]], f4_iv[1][pos[0]][pos[1]][pos[2]], f4_iv[2][pos[0]][pos[1]][pos[2]] };
|
||||
f4vector ii[3] = { f4_ii[0][pos[0]][pos[1]][pos[2]], f4_ii[1][pos[0]][pos[1]][pos[2]], f4_ii[2][pos[0]][pos[1]][pos[2]] };
|
||||
SSE_coeff c( vv, vi, iv, ii );
|
||||
|
||||
map<SSE_coeff,unsigned int>::iterator it;
|
||||
it = lookUpMap.find(c);
|
||||
if (it == lookUpMap.end())
|
||||
{
|
||||
// not found -> insert
|
||||
unsigned int index = f4_vv_Compressed[0].size();
|
||||
for (int n=0; n<3; n++)
|
||||
{
|
||||
f4_vv_Compressed[n].push_back( vv[n] );
|
||||
f4_vi_Compressed[n].push_back( vi[n] );
|
||||
f4_iv_Compressed[n].push_back( iv[n] );
|
||||
f4_ii_Compressed[n].push_back( ii[n] );
|
||||
}
|
||||
lookUpMap[c] = index;
|
||||
m_Op_index[pos[0]][pos[1]][pos[2]] = index;
|
||||
}
|
||||
else
|
||||
{
|
||||
// this operator is already in the list
|
||||
unsigned int index = (*it).second;
|
||||
m_Op_index[pos[0]][pos[1]][pos[2]] = index;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_Op_Count = index_list[0].size();
|
||||
if ( ((double)m_Op_Count/(double)GetNumberCells()) > m_Compression_Threshold)
|
||||
{
|
||||
cerr << "Operator_SSE_Compressed::CompressOperator: Warning: Compression unsuccessful, ratio is " << (double)m_Op_Count/(double)GetNumberCells() << "! Fallback to conventional sse-engine..." << endl;
|
||||
return false;
|
||||
}
|
||||
// cerr << "total found: " << index_list[0].size() << endl;
|
||||
|
||||
for (int n=0;n<3;++n)
|
||||
{
|
||||
f4_vv_Compressed[n] = Create1DArray_v4sf(m_Op_Count);
|
||||
f4_vi_Compressed[n] = Create1DArray_v4sf(m_Op_Count);
|
||||
f4_ii_Compressed[n] = Create1DArray_v4sf(m_Op_Count);
|
||||
f4_iv_Compressed[n] = Create1DArray_v4sf(m_Op_Count);
|
||||
|
||||
for (unsigned int m=0;m<m_Op_Count;++m)
|
||||
{
|
||||
f4_vv_Compressed[n][m].v = f4_vv[n][index_list[0].at(m)][index_list[1].at(m)][index_list[2].at(m)].v;
|
||||
f4_vi_Compressed[n][m].v = f4_vi[n][index_list[0].at(m)][index_list[1].at(m)][index_list[2].at(m)].v;
|
||||
f4_ii_Compressed[n][m].v = f4_ii[n][index_list[0].at(m)][index_list[1].at(m)][index_list[2].at(m)].v;
|
||||
f4_iv_Compressed[n][m].v = f4_iv[n][index_list[0].at(m)][index_list[1].at(m)][index_list[2].at(m)].v;
|
||||
}
|
||||
}
|
||||
|
||||
Delete_N_3DArray_v4sf(f4_vv,numLines);
|
||||
Delete_N_3DArray_v4sf(f4_vi,numLines);
|
||||
Delete_N_3DArray_v4sf(f4_iv,numLines);
|
||||
|
@ -282,3 +211,75 @@ bool Operator_SSE_Compressed::CompressOperator()
|
|||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
SSE_coeff::SSE_coeff( f4vector vv[3], f4vector vi[3], f4vector iv[3], f4vector ii[3] )
|
||||
{
|
||||
for (int n=0; n<3; n++) {
|
||||
m_vv[n] = vv[n];
|
||||
m_vi[n] = vi[n];
|
||||
m_iv[n] = iv[n];
|
||||
m_ii[n] = ii[n];
|
||||
}
|
||||
}
|
||||
|
||||
bool SSE_coeff::operator==( const SSE_coeff& other ) const
|
||||
{
|
||||
for (int n=0; n<3; n++)
|
||||
{
|
||||
if (memcmp( &(m_vv[n]), &(other.m_vv[n]), sizeof(f4vector) ) != 0) return false;
|
||||
if (memcmp( &(m_vi[n]), &(other.m_vi[n]), sizeof(f4vector) ) != 0) return false;
|
||||
if (memcmp( &(m_iv[n]), &(other.m_iv[n]), sizeof(f4vector) ) != 0) return false;
|
||||
if (memcmp( &(m_ii[n]), &(other.m_ii[n]), sizeof(f4vector) ) != 0) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
bool SSE_coeff::operator!=( const SSE_coeff& other ) const
|
||||
{
|
||||
return !(*this == other);
|
||||
}
|
||||
bool SSE_coeff::operator<( const SSE_coeff& other ) const
|
||||
{
|
||||
for (int n=0; n<3; n++)
|
||||
{
|
||||
for (int c=0; c<4; c++)
|
||||
{
|
||||
if (m_vv[n].f[c] > other.m_vv[n].f[c]) return false;
|
||||
if (m_vv[n].f[c] < other.m_vv[n].f[c]) return true;
|
||||
if (m_vi[n].f[c] > other.m_vi[n].f[c]) return false;
|
||||
if (m_vi[n].f[c] < other.m_vi[n].f[c]) return true;
|
||||
if (m_iv[n].f[c] > other.m_iv[n].f[c]) return false;
|
||||
if (m_iv[n].f[c] < other.m_iv[n].f[c]) return true;
|
||||
if (m_ii[n].f[c] > other.m_ii[n].f[c]) return false;
|
||||
if (m_ii[n].f[c] < other.m_ii[n].f[c]) return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void SSE_coeff::print( ostream& stream ) const
|
||||
{
|
||||
stream << "SSE_coeff: (" << endl;
|
||||
for (int n=0; n<3; n++)
|
||||
{
|
||||
stream << "n=" << n << ":" << endl;
|
||||
stream << "vv=";
|
||||
for (int c=0; c<4; c++)
|
||||
stream << m_vv[n].f[c] << " ";
|
||||
stream << endl << "vi=";
|
||||
for (int c=0; c<4; c++)
|
||||
stream << m_vi[n].f[c] << " ";
|
||||
stream << endl << "iv=";
|
||||
for (int c=0; c<4; c++)
|
||||
stream << m_iv[n].f[c] << " ";
|
||||
stream << endl << "ii=";
|
||||
for (int c=0; c<4; c++)
|
||||
stream << m_ii[n].f[c] << " ";
|
||||
stream << endl;
|
||||
}
|
||||
stream << ")" << endl;
|
||||
}
|
||||
|
|
|
@ -20,6 +20,21 @@
|
|||
|
||||
#include "operator_sse.h"
|
||||
|
||||
class SSE_coeff
|
||||
{
|
||||
public:
|
||||
SSE_coeff( f4vector vv[3], f4vector vi[3], f4vector iv[3], f4vector ii[3] );
|
||||
bool operator==( const SSE_coeff& ) const;
|
||||
bool operator!=( const SSE_coeff& ) const;
|
||||
bool operator<( const SSE_coeff& ) const;
|
||||
void print( ostream& stream ) const;
|
||||
protected:
|
||||
f4vector m_vv[3];
|
||||
f4vector m_vi[3];
|
||||
f4vector m_iv[3];
|
||||
f4vector m_ii[3];
|
||||
};
|
||||
|
||||
class Operator_SSE_Compressed : public Operator_sse
|
||||
{
|
||||
public:
|
||||
|
@ -31,44 +46,35 @@ public:
|
|||
|
||||
virtual int CalcECOperator();
|
||||
|
||||
inline virtual FDTD_FLOAT& GetVV( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { if (m_Use_Compression) return f4_vv_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetVV(n,x,y,z);}
|
||||
inline virtual FDTD_FLOAT& GetVI( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { if (m_Use_Compression) return f4_vi_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetVI(n,x,y,z);}
|
||||
inline virtual FDTD_FLOAT& GetVV( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) { if (m_Use_Compression) return f4_vv_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetVV(n,x,y,z);}
|
||||
inline virtual FDTD_FLOAT& GetVI( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) { if (m_Use_Compression) return f4_vi_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetVI(n,x,y,z);}
|
||||
|
||||
inline virtual FDTD_FLOAT& GetII( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { if (m_Use_Compression) return f4_ii_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetII(n,x,y,z);}
|
||||
inline virtual FDTD_FLOAT& GetIV( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { if (m_Use_Compression) return f4_iv_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetIV(n,x,y,z);}
|
||||
inline virtual FDTD_FLOAT& GetII( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) { if (m_Use_Compression) return f4_ii_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetII(n,x,y,z);}
|
||||
inline virtual FDTD_FLOAT& GetIV( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) { if (m_Use_Compression) return f4_iv_Compressed[n][m_Op_index[x][y][z%numVectors]].f[z/numVectors]; else return Operator_sse::GetIV(n,x,y,z);}
|
||||
|
||||
virtual void ShowStat() const;
|
||||
|
||||
void SetCompressionThreshold(double val) {m_Compression_Threshold = val;}
|
||||
void SetCacheSize(unsigned int size) {m_max_fifo = size;}
|
||||
bool CompressOperator();
|
||||
|
||||
protected:
|
||||
Operator_SSE_Compressed();
|
||||
|
||||
bool m_Use_Compression;
|
||||
bool m_Max_Compression; //reached maximal compression?
|
||||
unsigned int m_Op_Count;
|
||||
|
||||
//! Compression ratio threshold. If this ratio is exceeded, compression will be disabled. \sa SetCompressionThreshold()
|
||||
double m_Compression_Threshold; //default is 0.8
|
||||
|
||||
virtual void Init();
|
||||
virtual void Reset();
|
||||
virtual void InitOperator();
|
||||
|
||||
//! Compression cache size, default is 1000 \sa SetCacheSize()
|
||||
unsigned int m_max_fifo;
|
||||
|
||||
bool CompareOperators(unsigned int pos1[3], unsigned int pos2[3]);
|
||||
|
||||
// engine needs access
|
||||
public:
|
||||
unsigned int*** m_Op_index;
|
||||
f4vector* f4_vv_Compressed[3]; //calc new voltage from old voltage
|
||||
f4vector* f4_vi_Compressed[3]; //calc new voltage from old current
|
||||
f4vector* f4_iv_Compressed[3]; //calc new current from old current
|
||||
f4vector* f4_ii_Compressed[3]; //calc new current from old voltage
|
||||
vector<f4vector> f4_vv_Compressed[3]; //!< coefficient: calc new voltage from old voltage
|
||||
vector<f4vector> f4_vi_Compressed[3]; //!< coefficient: calc new voltage from old current
|
||||
vector<f4vector> f4_iv_Compressed[3]; //!< coefficient: calc new current from old current
|
||||
vector<f4vector> f4_ii_Compressed[3]; //!< coefficient: calc new current from old voltage
|
||||
|
||||
};
|
||||
|
||||
#endif // OPERATOR_SSE_Compressed_H
|
||||
|
|
Loading…
Reference in New Issue