2010-05-19 09:41:35 +00:00
/*
* Copyright ( C ) 2010 Thorsten Liebig ( Thorsten . Liebig @ gmx . de )
*
* This program is free software : you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation , either version 3 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program . If not , see < http : //www.gnu.org/licenses/>.
*/
# include "operator_sse_compressed.h"
# include "engine_sse_compressed.h"
# include "engine_sse.h"
# include "tools/array_ops.h"
# include <list>
Operator_SSE_Compressed * Operator_SSE_Compressed : : New ( )
{
2010-05-19 19:25:15 +00:00
cout < < " Create FDTD operator (compressed SSE) " < < endl ;
2010-05-19 09:41:35 +00:00
Operator_SSE_Compressed * op = new Operator_SSE_Compressed ( ) ;
op - > Init ( ) ;
return op ;
}
Operator_SSE_Compressed : : Operator_SSE_Compressed ( ) : Operator_sse ( )
{
m_Op_index = NULL ;
m_Use_Compression = false ;
m_Max_Compression = false ;
m_Compression_Threshold = 0.8 ;
for ( int n = 0 ; n < 3 ; + + n )
{
f4_vv_Compressed [ n ] = NULL ;
f4_vi_Compressed [ n ] = NULL ;
f4_ii_Compressed [ n ] = NULL ;
f4_iv_Compressed [ n ] = NULL ;
}
2010-05-21 06:22:41 +00:00
m_max_fifo = 0 ;
2010-05-19 09:41:35 +00:00
}
Operator_SSE_Compressed : : ~ Operator_SSE_Compressed ( )
{
Reset ( ) ;
}
Engine * Operator_SSE_Compressed : : CreateEngine ( ) const
{
if ( m_Use_Compression = = false )
{
//!create a default sse-engine
Engine_sse * eng = Engine_sse : : New ( this ) ;
return eng ;
}
Engine_SSE_Compressed * eng = Engine_SSE_Compressed : : New ( this ) ;
return eng ;
}
int Operator_SSE_Compressed : : CalcECOperator ( )
{
Operator_sse : : CalcECOperator ( ) ;
m_Use_Compression = CompressOperator ( ) ;
return 0 ;
}
void Operator_SSE_Compressed : : Init ( )
{
Operator_sse : : Init ( ) ;
m_Op_index = NULL ;
for ( int n = 0 ; n < 3 ; + + n )
{
f4_vv_Compressed [ n ] = NULL ;
f4_vi_Compressed [ n ] = NULL ;
f4_ii_Compressed [ n ] = NULL ;
f4_iv_Compressed [ n ] = NULL ;
}
}
void Operator_SSE_Compressed : : Reset ( )
{
Operator_sse : : Reset ( ) ;
if ( m_Op_index )
{
unsigned int pos [ 3 ] ;
for ( pos [ 0 ] = 0 ; pos [ 0 ] < numLines [ 0 ] ; + + pos [ 0 ] )
{
for ( pos [ 1 ] = 0 ; pos [ 1 ] < numLines [ 1 ] ; + + pos [ 1 ] )
{
delete [ ] m_Op_index [ pos [ 0 ] ] [ pos [ 1 ] ] ;
}
delete [ ] m_Op_index [ pos [ 0 ] ] ;
}
delete [ ] m_Op_index ;
m_Op_index = NULL ;
}
for ( int n = 0 ; n < 3 ; + + n )
{
Delete1DArray_v4sf ( f4_vv_Compressed [ n ] ) ;
f4_vv_Compressed [ n ] = NULL ;
Delete1DArray_v4sf ( f4_vi_Compressed [ n ] ) ;
f4_vi_Compressed [ n ] = NULL ;
Delete1DArray_v4sf ( f4_ii_Compressed [ n ] ) ;
f4_ii_Compressed [ n ] = NULL ;
Delete1DArray_v4sf ( f4_iv_Compressed [ n ] ) ;
f4_iv_Compressed [ n ] = NULL ;
}
}
void Operator_SSE_Compressed : : InitOperator ( )
{
Operator_sse : : InitOperator ( ) ;
unsigned int pos [ 3 ] ;
m_Op_index = new unsigned int * * [ numLines [ 0 ] ] ;
for ( pos [ 0 ] = 0 ; pos [ 0 ] < numLines [ 0 ] ; + + pos [ 0 ] )
{
m_Op_index [ pos [ 0 ] ] = new unsigned int * [ numLines [ 1 ] ] ;
for ( pos [ 1 ] = 0 ; pos [ 1 ] < numLines [ 1 ] ; + + pos [ 1 ] )
{
m_Op_index [ pos [ 0 ] ] [ pos [ 1 ] ] = new unsigned int [ numVectors ] ;
for ( pos [ 2 ] = 0 ; pos [ 2 ] < numVectors ; + + pos [ 2 ] )
{
m_Op_index [ pos [ 0 ] ] [ pos [ 1 ] ] [ pos [ 2 ] ] = 0 ;
}
}
}
}
2010-05-19 09:42:56 +00:00
void Operator_SSE_Compressed : : ShowStat ( ) const
{
Operator_sse : : ShowStat ( ) ;
cout < < " SSE compression enabled \t : " < < m_Use_Compression < < endl ;
cout < < " Unique SSE operator \t : " < < m_Op_Count < < endl ;
cout < < " Optimal compression \t : " < < m_Max_Compression < < endl ;
cout < < " ----------------------------------- " < < endl ;
}
2010-05-19 09:41:35 +00:00
bool Operator_SSE_Compressed : : CompareOperators ( unsigned int pos1 [ 3 ] , unsigned int pos2 [ 3 ] )
{
// cerr << pos1[0] << " " << pos1[1] << " " << pos1[2] << endl;
for ( int n = 0 ; n < 3 ; + + n )
{
for ( int m = 0 ; m < 4 ; + + m )
{
if ( f4_vv [ n ] [ pos1 [ 0 ] ] [ pos1 [ 1 ] ] [ pos1 [ 2 ] ] . f [ m ] ! = f4_vv [ n ] [ pos2 [ 0 ] ] [ pos2 [ 1 ] ] [ pos2 [ 2 ] ] . f [ m ] ) return false ;
if ( f4_vi [ n ] [ pos1 [ 0 ] ] [ pos1 [ 1 ] ] [ pos1 [ 2 ] ] . f [ m ] ! = f4_vi [ n ] [ pos2 [ 0 ] ] [ pos2 [ 1 ] ] [ pos2 [ 2 ] ] . f [ m ] ) return false ;
if ( f4_iv [ n ] [ pos1 [ 0 ] ] [ pos1 [ 1 ] ] [ pos1 [ 2 ] ] . f [ m ] ! = f4_iv [ n ] [ pos2 [ 0 ] ] [ pos2 [ 1 ] ] [ pos2 [ 2 ] ] . f [ m ] ) return false ;
if ( f4_ii [ n ] [ pos1 [ 0 ] ] [ pos1 [ 1 ] ] [ pos1 [ 2 ] ] . f [ m ] ! = f4_ii [ n ] [ pos2 [ 0 ] ] [ pos2 [ 1 ] ] [ pos2 [ 2 ] ] . f [ m ] ) return false ;
}
}
return true ;
}
bool Operator_SSE_Compressed : : CompressOperator ( )
{
2010-06-07 21:08:38 +00:00
cout < < " Compressing the FDTD operator... this may take a while... " < < endl ;
2010-05-19 09:41:35 +00:00
2010-05-21 06:22:41 +00:00
if ( m_max_fifo = = 0 )
{
m_max_fifo = numVectors * numLines [ 1 ] + 1 ;
// cerr << m_max_fifo << endl;
}
2010-05-19 09:41:35 +00:00
m_Max_Compression = true ;
list < unsigned int > fifo ;
vector < unsigned int > index_list [ 3 ] ;
bool found ;
unsigned int pos [ 3 ] ;
unsigned int index_pos [ 3 ] ;
list < unsigned int > : : iterator it ;
for ( pos [ 0 ] = 0 ; pos [ 0 ] < numLines [ 0 ] ; + + pos [ 0 ] )
{
for ( pos [ 1 ] = 0 ; pos [ 1 ] < numLines [ 1 ] ; + + pos [ 1 ] )
{
for ( pos [ 2 ] = 0 ; pos [ 2 ] < numVectors ; + + pos [ 2 ] )
{
found = false ;
for ( it = fifo . begin ( ) ; it ! = fifo . end ( ) ; it + + )
{
index_pos [ 0 ] = index_list [ 0 ] . at ( * it ) ;
index_pos [ 1 ] = index_list [ 1 ] . at ( * it ) ;
index_pos [ 2 ] = index_list [ 2 ] . at ( * it ) ;
found = CompareOperators ( index_pos , pos ) ;
if ( found )
{
m_Op_index [ pos [ 0 ] ] [ pos [ 1 ] ] [ pos [ 2 ] ] = * it ;
fifo . erase ( it ) ;
fifo . push_front ( * it ) ; //push already existing value to the front
it = fifo . end ( ) ;
+ + it ;
break ;
}
}
if ( found = = false )
{
fifo . push_front ( index_list [ 0 ] . size ( ) ) ;
m_Op_index [ pos [ 0 ] ] [ pos [ 1 ] ] [ pos [ 2 ] ] = index_list [ 0 ] . size ( ) ;
index_list [ 0 ] . push_back ( pos [ 0 ] ) ;
index_list [ 1 ] . push_back ( pos [ 1 ] ) ;
index_list [ 2 ] . push_back ( pos [ 2 ] ) ;
}
if ( fifo . size ( ) > m_max_fifo )
{
fifo . pop_back ( ) ;
m_Max_Compression = false ;
}
}
}
}
m_Op_Count = index_list [ 0 ] . size ( ) ;
if ( ( ( double ) m_Op_Count / ( double ) GetNumberCells ( ) ) > m_Compression_Threshold )
{
cerr < < " Operator_SSE_Compressed::CompressOperator: Warning: Compression unsuccessful, ratio is " < < ( double ) m_Op_Count / ( double ) GetNumberCells ( ) < < " ! Fallback to conventional sse-engine... " < < endl ;
return false ;
}
// cerr << "total found: " << index_list[0].size() << endl;
for ( int n = 0 ; n < 3 ; + + n )
{
f4_vv_Compressed [ n ] = Create1DArray_v4sf ( m_Op_Count ) ;
f4_vi_Compressed [ n ] = Create1DArray_v4sf ( m_Op_Count ) ;
f4_ii_Compressed [ n ] = Create1DArray_v4sf ( m_Op_Count ) ;
f4_iv_Compressed [ n ] = Create1DArray_v4sf ( m_Op_Count ) ;
for ( unsigned int m = 0 ; m < m_Op_Count ; + + m )
{
for ( unsigned int v = 0 ; v < 4 ; + + v )
{
f4_vv_Compressed [ n ] [ m ] . f [ v ] = f4_vv [ n ] [ index_list [ 0 ] . at ( m ) ] [ index_list [ 1 ] . at ( m ) ] [ index_list [ 2 ] . at ( m ) ] . f [ v ] ;
f4_vi_Compressed [ n ] [ m ] . f [ v ] = f4_vi [ n ] [ index_list [ 0 ] . at ( m ) ] [ index_list [ 1 ] . at ( m ) ] [ index_list [ 2 ] . at ( m ) ] . f [ v ] ;
f4_ii_Compressed [ n ] [ m ] . f [ v ] = f4_ii [ n ] [ index_list [ 0 ] . at ( m ) ] [ index_list [ 1 ] . at ( m ) ] [ index_list [ 2 ] . at ( m ) ] . f [ v ] ;
f4_iv_Compressed [ n ] [ m ] . f [ v ] = f4_iv [ n ] [ index_list [ 0 ] . at ( m ) ] [ index_list [ 1 ] . at ( m ) ] [ index_list [ 2 ] . at ( m ) ] . f [ v ] ;
}
}
}
Delete_N_3DArray_v4sf ( f4_vv , numLines ) ;
Delete_N_3DArray_v4sf ( f4_vi , numLines ) ;
Delete_N_3DArray_v4sf ( f4_iv , numLines ) ;
Delete_N_3DArray_v4sf ( f4_ii , numLines ) ;
f4_vv = 0 ;
f4_vi = 0 ;
f4_iv = 0 ;
f4_ii = 0 ;
return true ;
}