/*
* Copyright (C) 2010 Thorsten Liebig (Thorsten.Liebig@gmx.de)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#include "operator_sse_compressed.h"
#include "engine_sse_compressed.h"
#include "engine_sse.h"
#include "tools/array_ops.h"
#include
Operator_SSE_Compressed* Operator_SSE_Compressed::New()
{
cout << "Create FDTD operator (compressed SSE)" << endl;
Operator_SSE_Compressed* op = new Operator_SSE_Compressed();
op->Init();
return op;
}
Operator_SSE_Compressed::Operator_SSE_Compressed() : Operator_sse()
{
m_Op_index = NULL;
m_Use_Compression = false;
m_Max_Compression = false;
m_Compression_Threshold = 0.8;
for (int n=0;n<3;++n)
{
f4_vv_Compressed[n]=NULL;
f4_vi_Compressed[n]=NULL;
f4_ii_Compressed[n]=NULL;
f4_iv_Compressed[n]=NULL;
}
m_max_fifo = 0;
}
Operator_SSE_Compressed::~Operator_SSE_Compressed()
{
Reset();
}
Engine* Operator_SSE_Compressed::CreateEngine() const
{
if (m_Use_Compression==false)
{
//! create a default sse-engine
Engine_sse* eng = Engine_sse::New(this);
return eng;
}
Engine_SSE_Compressed* eng = Engine_SSE_Compressed::New(this);
return eng;
}
int Operator_SSE_Compressed::CalcECOperator()
{
Operator_sse::CalcECOperator();
m_Use_Compression = CompressOperator();
return 0;
}
void Operator_SSE_Compressed::Init()
{
Operator_sse::Init();
m_Op_index = NULL;
for (int n=0;n<3;++n)
{
f4_vv_Compressed[n]=NULL;
f4_vi_Compressed[n]=NULL;
f4_ii_Compressed[n]=NULL;
f4_iv_Compressed[n]=NULL;
}
}
void Operator_SSE_Compressed::Reset()
{
Operator_sse::Reset();
if (m_Op_index)
{
unsigned int pos[3];
for (pos[0]=0;pos[0] fifo;
vector index_list[3];
bool found;
unsigned int pos[3];
unsigned int index_pos[3];
list::iterator it;
for (pos[0]=0;pos[0]m_max_fifo)
{
fifo.pop_back();
m_Max_Compression = false;
}
}
}
}
m_Op_Count = index_list[0].size();
if ( ((double)m_Op_Count/(double)GetNumberCells()) > m_Compression_Threshold)
{
cerr << "Operator_SSE_Compressed::CompressOperator: Warning: Compression unsuccessful, ratio is " << (double)m_Op_Count/(double)GetNumberCells() << "! Fallback to conventional sse-engine..." << endl;
return false;
}
// cerr << "total found: " << index_list[0].size() << endl;
for (int n=0;n<3;++n)
{
f4_vv_Compressed[n] = Create1DArray_v4sf(m_Op_Count);
f4_vi_Compressed[n] = Create1DArray_v4sf(m_Op_Count);
f4_ii_Compressed[n] = Create1DArray_v4sf(m_Op_Count);
f4_iv_Compressed[n] = Create1DArray_v4sf(m_Op_Count);
for (unsigned int m=0;m