introducing compressed sse operator & engine
use --engine=sse-compressed option to enable
This commit is contained in:
parent
3e9658914f
commit
de5bdc8ef6
144
FDTD/engine_sse_compressed.cpp
Normal file
144
FDTD/engine_sse_compressed.cpp
Normal file
@ -0,0 +1,144 @@
|
||||
/*
|
||||
* Copyright (C) 2010 Thorsten Liebig (Thorsten.Liebig@gmx.de)
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "engine_sse_compressed.h"
|
||||
|
||||
|
||||
Engine_SSE_Compressed* Engine_SSE_Compressed::New(const Operator_SSE_Compressed* op)
|
||||
{
|
||||
Engine_SSE_Compressed* e = new Engine_SSE_Compressed(op);
|
||||
e->Init();
|
||||
return e;
|
||||
}
|
||||
|
||||
Engine_SSE_Compressed::Engine_SSE_Compressed(const Operator_SSE_Compressed* op) : Engine_sse(op)
|
||||
{
|
||||
Op = op;
|
||||
}
|
||||
|
||||
Engine_SSE_Compressed::~Engine_SSE_Compressed()
|
||||
{
|
||||
}
|
||||
|
||||
void Engine_SSE_Compressed::UpdateVoltages(unsigned int startX, unsigned int numX)
|
||||
{
|
||||
unsigned int pos[3];
|
||||
bool shift[2];
|
||||
f4vector temp;
|
||||
|
||||
pos[0] = startX;
|
||||
unsigned int index=0;
|
||||
for (unsigned int posX=0;posX<numX;++posX)
|
||||
{
|
||||
shift[0]=pos[0];
|
||||
for (pos[1]=0;pos[1]<numLines[1];++pos[1])
|
||||
{
|
||||
shift[1]=pos[1];
|
||||
for (pos[2]=1;pos[2]<numVectors;++pos[2])
|
||||
{
|
||||
index = Op->m_Op_index[pos[0]][pos[1]][pos[2]];
|
||||
// x-polarization
|
||||
f4_volt[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv_Compressed[0][index].v;
|
||||
f4_volt[0][pos[0]][pos[1]][pos[2]].v += Op->f4_vi_Compressed[0][index].v * ( f4_curr[2][pos[0]][pos[1]][pos[2]].v - f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2]].v - f4_curr[1][pos[0]][pos[1]][pos[2]].v + f4_curr[1][pos[0]][pos[1]][pos[2]-1].v );
|
||||
|
||||
// y-polarization
|
||||
f4_volt[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv_Compressed[1][index].v;
|
||||
f4_volt[1][pos[0]][pos[1]][pos[2]].v += Op->f4_vi_Compressed[1][index].v * ( f4_curr[0][pos[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]-1].v - f4_curr[2][pos[0]][pos[1]][pos[2]].v + f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2]].v);
|
||||
|
||||
// z-polarization
|
||||
f4_volt[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv_Compressed[2][index].v;
|
||||
f4_volt[2][pos[0]][pos[1]][pos[2]].v += Op->f4_vi_Compressed[2][index].v * ( f4_curr[1][pos[0]][pos[1]][pos[2]].v - f4_curr[1][pos[0]-shift[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]].v + f4_curr[0][pos[0]][pos[1]-shift[1]][pos[2]].v);
|
||||
}
|
||||
|
||||
// for pos[2] = 0
|
||||
// x-polarization
|
||||
index = Op->m_Op_index[pos[0]][pos[1]][0];
|
||||
temp.f[0] = 0;
|
||||
temp.f[1] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[0];
|
||||
temp.f[2] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[1];
|
||||
temp.f[3] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[2];
|
||||
f4_volt[0][pos[0]][pos[1]][0].v *= Op->f4_vv_Compressed[0][index].v;
|
||||
f4_volt[0][pos[0]][pos[1]][0].v += Op->f4_vi_Compressed[0][index].v * ( f4_curr[2][pos[0]][pos[1]][0].v - f4_curr[2][pos[0]][pos[1]-shift[1]][0].v - f4_curr[1][pos[0]][pos[1]][0].v + temp.v );
|
||||
|
||||
// y-polarization
|
||||
temp.f[0] = 0;
|
||||
temp.f[1] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[0];
|
||||
temp.f[2] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[1];
|
||||
temp.f[3] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[2];
|
||||
f4_volt[1][pos[0]][pos[1]][0].v *= Op->f4_vv_Compressed[1][index].v;
|
||||
f4_volt[1][pos[0]][pos[1]][0].v += Op->f4_vi_Compressed[1][index].v * ( f4_curr[0][pos[0]][pos[1]][0].v - temp.v - f4_curr[2][pos[0]][pos[1]][0].v + f4_curr[2][pos[0]-shift[0]][pos[1]][0].v);
|
||||
|
||||
// z-polarization
|
||||
f4_volt[2][pos[0]][pos[1]][0].v *= Op->f4_vv_Compressed[2][index].v;
|
||||
f4_volt[2][pos[0]][pos[1]][0].v += Op->f4_vi_Compressed[2][index].v * ( f4_curr[1][pos[0]][pos[1]][0].v - f4_curr[1][pos[0]-shift[0]][pos[1]][0].v - f4_curr[0][pos[0]][pos[1]][0].v + f4_curr[0][pos[0]][pos[1]-shift[1]][0].v);
|
||||
}
|
||||
++pos[0];
|
||||
}
|
||||
}
|
||||
|
||||
void Engine_SSE_Compressed::UpdateCurrents(unsigned int startX, unsigned int numX)
|
||||
{
|
||||
unsigned int pos[5];
|
||||
f4vector temp;
|
||||
|
||||
pos[0] = startX;
|
||||
unsigned int index;
|
||||
for (unsigned int posX=0;posX<numX;++posX)
|
||||
{
|
||||
for (pos[1]=0;pos[1]<numLines[1]-1;++pos[1])
|
||||
{
|
||||
for (pos[2]=0;pos[2]<numVectors-1;++pos[2])
|
||||
{
|
||||
index = Op->m_Op_index[pos[0]][pos[1]][pos[2]];
|
||||
// x-pol
|
||||
f4_curr[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii_Compressed[0][index].v;
|
||||
f4_curr[0][pos[0]][pos[1]][pos[2]].v += Op->f4_iv_Compressed[0][index].v * ( f4_volt[2][pos[0]][pos[1]][pos[2]].v - f4_volt[2][pos[0]][pos[1]+1][pos[2]].v - f4_volt[1][pos[0]][pos[1]][pos[2]].v + f4_volt[1][pos[0]][pos[1]][pos[2]+1].v);
|
||||
|
||||
// y-pol
|
||||
f4_curr[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii_Compressed[1][index].v;
|
||||
f4_curr[1][pos[0]][pos[1]][pos[2]].v += Op->f4_iv_Compressed[1][index].v * ( f4_volt[0][pos[0]][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]+1].v - f4_volt[2][pos[0]][pos[1]][pos[2]].v + f4_volt[2][pos[0]+1][pos[1]][pos[2]].v);
|
||||
|
||||
// z-pol
|
||||
f4_curr[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii_Compressed[2][index].v;
|
||||
f4_curr[2][pos[0]][pos[1]][pos[2]].v += Op->f4_iv_Compressed[2][index].v * ( f4_volt[1][pos[0]][pos[1]][pos[2]].v - f4_volt[1][pos[0]+1][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]].v + f4_volt[0][pos[0]][pos[1]+1][pos[2]].v);
|
||||
}
|
||||
|
||||
index = Op->m_Op_index[pos[0]][pos[1]][numVectors-1];
|
||||
// for pos[2] = numVectors-1
|
||||
// x-pol
|
||||
temp.f[0] = f4_volt[1][pos[0]][pos[1]][0].f[1];
|
||||
temp.f[1] = f4_volt[1][pos[0]][pos[1]][0].f[2];
|
||||
temp.f[2] = f4_volt[1][pos[0]][pos[1]][0].f[3];
|
||||
temp.f[3] = 0;
|
||||
f4_curr[0][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii_Compressed[0][index].v;
|
||||
f4_curr[0][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv_Compressed[0][index].v * ( f4_volt[2][pos[0]][pos[1]][numVectors-1].v - f4_volt[2][pos[0]][pos[1]+1][numVectors-1].v - f4_volt[1][pos[0]][pos[1]][numVectors-1].v + temp.v);
|
||||
|
||||
// y-pol
|
||||
temp.f[0] = f4_volt[0][pos[0]][pos[1]][0].f[1];
|
||||
temp.f[1] = f4_volt[0][pos[0]][pos[1]][0].f[2];
|
||||
temp.f[2] = f4_volt[0][pos[0]][pos[1]][0].f[3];
|
||||
temp.f[3] = 0;
|
||||
f4_curr[1][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii_Compressed[1][index].v;
|
||||
f4_curr[1][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv_Compressed[1][index].v * ( f4_volt[0][pos[0]][pos[1]][numVectors-1].v - temp.v - f4_volt[2][pos[0]][pos[1]][numVectors-1].v + f4_volt[2][pos[0]+1][pos[1]][numVectors-1].v);
|
||||
|
||||
// z-pol
|
||||
f4_curr[2][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii_Compressed[2][index].v;
|
||||
f4_curr[2][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv_Compressed[2][index].v * ( f4_volt[1][pos[0]][pos[1]][numVectors-1].v - f4_volt[1][pos[0]+1][pos[1]][numVectors-1].v - f4_volt[0][pos[0]][pos[1]][numVectors-1].v + f4_volt[0][pos[0]][pos[1]+1][numVectors-1].v);
|
||||
}
|
||||
++pos[0];
|
||||
}
|
||||
}
|
38
FDTD/engine_sse_compressed.h
Normal file
38
FDTD/engine_sse_compressed.h
Normal file
@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Copyright (C) 2010 Thorsten Liebig (Thorsten.Liebig@gmx.de)
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef ENGINE_SSE_COMPRESSED_H
|
||||
#define ENGINE_SSE_COMPRESSED_H
|
||||
|
||||
#include "engine_sse.h"
|
||||
#include "operator_sse_compressed.h"
|
||||
|
||||
class Engine_SSE_Compressed : public Engine_sse
|
||||
{
|
||||
public:
|
||||
static Engine_SSE_Compressed* New(const Operator_SSE_Compressed* op);
|
||||
virtual ~Engine_SSE_Compressed();
|
||||
|
||||
protected:
|
||||
Engine_SSE_Compressed(const Operator_SSE_Compressed* op);
|
||||
const Operator_SSE_Compressed* Op;
|
||||
|
||||
virtual void UpdateVoltages(unsigned int startX, unsigned int numX);
|
||||
virtual void UpdateCurrents(unsigned int startX, unsigned int numX);
|
||||
};
|
||||
|
||||
#endif // ENGINE_SSE_COMPRESSED_H
|
@ -28,6 +28,10 @@ Operator_sse* Operator_sse::New()
|
||||
|
||||
Operator_sse::Operator_sse() : Operator()
|
||||
{
|
||||
f4_vv = 0;
|
||||
f4_vi = 0;
|
||||
f4_iv = 0;
|
||||
f4_ii = 0;
|
||||
}
|
||||
|
||||
Operator_sse::~Operator_sse()
|
||||
|
247
FDTD/operator_sse_compressed.cpp
Normal file
247
FDTD/operator_sse_compressed.cpp
Normal file
@ -0,0 +1,247 @@
|
||||
/*
|
||||
* Copyright (C) 2010 Thorsten Liebig (Thorsten.Liebig@gmx.de)
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "operator_sse_compressed.h"
|
||||
#include "engine_sse_compressed.h"
|
||||
#include "engine_sse.h"
|
||||
#include "tools/array_ops.h"
|
||||
|
||||
#include <list>
|
||||
|
||||
Operator_SSE_Compressed* Operator_SSE_Compressed::New()
|
||||
{
|
||||
Operator_SSE_Compressed* op = new Operator_SSE_Compressed();
|
||||
op->Init();
|
||||
return op;
|
||||
}
|
||||
|
||||
Operator_SSE_Compressed::Operator_SSE_Compressed() : Operator_sse()
|
||||
{
|
||||
m_Op_index = NULL;
|
||||
m_Use_Compression = false;
|
||||
m_Max_Compression = false;
|
||||
|
||||
m_Compression_Threshold = 0.8;
|
||||
|
||||
for (int n=0;n<3;++n)
|
||||
{
|
||||
f4_vv_Compressed[n]=NULL;
|
||||
f4_vi_Compressed[n]=NULL;
|
||||
f4_ii_Compressed[n]=NULL;
|
||||
f4_iv_Compressed[n]=NULL;
|
||||
}
|
||||
|
||||
m_max_fifo = 1000;
|
||||
}
|
||||
|
||||
Operator_SSE_Compressed::~Operator_SSE_Compressed()
|
||||
{
|
||||
Reset();
|
||||
}
|
||||
|
||||
Engine* Operator_SSE_Compressed::CreateEngine() const
|
||||
{
|
||||
if (m_Use_Compression==false)
|
||||
{
|
||||
//!create a default sse-engine
|
||||
Engine_sse* eng = Engine_sse::New(this);
|
||||
return eng;
|
||||
}
|
||||
Engine_SSE_Compressed* eng = Engine_SSE_Compressed::New(this);
|
||||
return eng;
|
||||
}
|
||||
|
||||
int Operator_SSE_Compressed::CalcECOperator()
|
||||
{
|
||||
Operator_sse::CalcECOperator();
|
||||
m_Use_Compression = CompressOperator();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void Operator_SSE_Compressed::Init()
|
||||
{
|
||||
Operator_sse::Init();
|
||||
m_Op_index = NULL;
|
||||
for (int n=0;n<3;++n)
|
||||
{
|
||||
f4_vv_Compressed[n]=NULL;
|
||||
f4_vi_Compressed[n]=NULL;
|
||||
f4_ii_Compressed[n]=NULL;
|
||||
f4_iv_Compressed[n]=NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void Operator_SSE_Compressed::Reset()
|
||||
{
|
||||
Operator_sse::Reset();
|
||||
|
||||
if (m_Op_index)
|
||||
{
|
||||
unsigned int pos[3];
|
||||
for (pos[0]=0;pos[0]<numLines[0];++pos[0])
|
||||
{
|
||||
for (pos[1]=0;pos[1]<numLines[1];++pos[1])
|
||||
{
|
||||
delete[] m_Op_index[pos[0]][pos[1]];
|
||||
}
|
||||
delete[] m_Op_index[pos[0]];
|
||||
}
|
||||
delete[] m_Op_index;
|
||||
m_Op_index = NULL;
|
||||
}
|
||||
|
||||
for (int n=0;n<3;++n)
|
||||
{
|
||||
Delete1DArray_v4sf(f4_vv_Compressed[n]);
|
||||
f4_vv_Compressed[n]=NULL;
|
||||
Delete1DArray_v4sf(f4_vi_Compressed[n]);
|
||||
f4_vi_Compressed[n]=NULL;
|
||||
Delete1DArray_v4sf(f4_ii_Compressed[n]);
|
||||
f4_ii_Compressed[n]=NULL;
|
||||
Delete1DArray_v4sf(f4_iv_Compressed[n]);
|
||||
f4_iv_Compressed[n]=NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void Operator_SSE_Compressed::InitOperator()
|
||||
{
|
||||
Operator_sse::InitOperator();
|
||||
|
||||
unsigned int pos[3];
|
||||
m_Op_index = new unsigned int**[numLines[0]];
|
||||
for (pos[0]=0;pos[0]<numLines[0];++pos[0])
|
||||
{
|
||||
m_Op_index[pos[0]] = new unsigned int*[numLines[1]];
|
||||
for (pos[1]=0;pos[1]<numLines[1];++pos[1])
|
||||
{
|
||||
m_Op_index[pos[0]][pos[1]] = new unsigned int[numVectors];
|
||||
for (pos[2]=0;pos[2]<numVectors;++pos[2])
|
||||
{
|
||||
m_Op_index[pos[0]][pos[1]][pos[2]] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool Operator_SSE_Compressed::CompareOperators(unsigned int pos1[3], unsigned int pos2[3])
|
||||
{
|
||||
// cerr << pos1[0] << " " << pos1[1] << " " << pos1[2] << endl;
|
||||
for (int n=0;n<3;++n)
|
||||
{
|
||||
for (int m=0;m<4;++m)
|
||||
{
|
||||
if (f4_vv[n][pos1[0]][pos1[1]][pos1[2]].f[m] != f4_vv[n][pos2[0]][pos2[1]][pos2[2]].f[m]) return false;
|
||||
if (f4_vi[n][pos1[0]][pos1[1]][pos1[2]].f[m] != f4_vi[n][pos2[0]][pos2[1]][pos2[2]].f[m]) return false;
|
||||
if (f4_iv[n][pos1[0]][pos1[1]][pos1[2]].f[m] != f4_iv[n][pos2[0]][pos2[1]][pos2[2]].f[m]) return false;
|
||||
if (f4_ii[n][pos1[0]][pos1[1]][pos1[2]].f[m] != f4_ii[n][pos2[0]][pos2[1]][pos2[2]].f[m]) return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Operator_SSE_Compressed::CompressOperator()
|
||||
{
|
||||
cout << "Compressing the FDTD operator... this my take a while..." << endl;
|
||||
|
||||
m_Max_Compression = true;
|
||||
|
||||
list<unsigned int> fifo;
|
||||
vector<unsigned int> index_list[3];
|
||||
|
||||
bool found;
|
||||
unsigned int pos[3];
|
||||
unsigned int index_pos[3];
|
||||
list<unsigned int>::iterator it;
|
||||
for (pos[0]=0;pos[0]<numLines[0];++pos[0])
|
||||
{
|
||||
for (pos[1]=0;pos[1]<numLines[1];++pos[1])
|
||||
{
|
||||
for (pos[2]=0;pos[2]<numVectors;++pos[2])
|
||||
{
|
||||
found = false;
|
||||
for ( it=fifo.begin() ; it != fifo.end(); it++ )
|
||||
{
|
||||
index_pos[0] = index_list[0].at(*it);
|
||||
index_pos[1] = index_list[1].at(*it);
|
||||
index_pos[2] = index_list[2].at(*it);
|
||||
found = CompareOperators(index_pos, pos);
|
||||
if (found)
|
||||
{
|
||||
m_Op_index[pos[0]][pos[1]][pos[2]] = *it;
|
||||
fifo.erase(it);
|
||||
fifo.push_front(*it); //push already existing value to the front
|
||||
it = fifo.end();
|
||||
++it;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found==false)
|
||||
{
|
||||
fifo.push_front(index_list[0].size());
|
||||
m_Op_index[pos[0]][pos[1]][pos[2]] = index_list[0].size();
|
||||
index_list[0].push_back(pos[0]);
|
||||
index_list[1].push_back(pos[1]);
|
||||
index_list[2].push_back(pos[2]);
|
||||
}
|
||||
if (fifo.size()>m_max_fifo)
|
||||
{
|
||||
fifo.pop_back();
|
||||
m_Max_Compression = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_Op_Count = index_list[0].size();
|
||||
if ( ((double)m_Op_Count/(double)GetNumberCells()) > m_Compression_Threshold)
|
||||
{
|
||||
cerr << "Operator_SSE_Compressed::CompressOperator: Warning: Compression unsuccessful, ratio is " << (double)m_Op_Count/(double)GetNumberCells() << "! Fallback to conventional sse-engine..." << endl;
|
||||
return false;
|
||||
}
|
||||
// cerr << "total found: " << index_list[0].size() << endl;
|
||||
|
||||
for (int n=0;n<3;++n)
|
||||
{
|
||||
f4_vv_Compressed[n] = Create1DArray_v4sf(m_Op_Count);
|
||||
f4_vi_Compressed[n] = Create1DArray_v4sf(m_Op_Count);
|
||||
f4_ii_Compressed[n] = Create1DArray_v4sf(m_Op_Count);
|
||||
f4_iv_Compressed[n] = Create1DArray_v4sf(m_Op_Count);
|
||||
|
||||
for (unsigned int m=0;m<m_Op_Count;++m)
|
||||
{
|
||||
for (unsigned int v=0;v<4;++v)
|
||||
{
|
||||
f4_vv_Compressed[n][m].f[v] = f4_vv[n][index_list[0].at(m)][index_list[1].at(m)][index_list[2].at(m)].f[v];
|
||||
f4_vi_Compressed[n][m].f[v] = f4_vi[n][index_list[0].at(m)][index_list[1].at(m)][index_list[2].at(m)].f[v];
|
||||
f4_ii_Compressed[n][m].f[v] = f4_ii[n][index_list[0].at(m)][index_list[1].at(m)][index_list[2].at(m)].f[v];
|
||||
f4_iv_Compressed[n][m].f[v] = f4_iv[n][index_list[0].at(m)][index_list[1].at(m)][index_list[2].at(m)].f[v];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Delete_N_3DArray_v4sf(f4_vv,numLines);
|
||||
Delete_N_3DArray_v4sf(f4_vi,numLines);
|
||||
Delete_N_3DArray_v4sf(f4_iv,numLines);
|
||||
Delete_N_3DArray_v4sf(f4_ii,numLines);
|
||||
f4_vv = 0;
|
||||
f4_vi = 0;
|
||||
f4_iv = 0;
|
||||
f4_ii = 0;
|
||||
|
||||
return true;
|
||||
}
|
66
FDTD/operator_sse_compressed.h
Normal file
66
FDTD/operator_sse_compressed.h
Normal file
@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (C) 2010 Thorsten Liebig (Thorsten.Liebig@gmx.de)
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef OPERATOR_SSE_COMPRESSED_H
|
||||
#define OPERATOR_SSE_COMPRESSED_H
|
||||
|
||||
#include "operator_sse.h"
|
||||
|
||||
class Operator_SSE_Compressed : public Operator_sse
|
||||
{
|
||||
public:
|
||||
//! Create a new operator
|
||||
static Operator_SSE_Compressed* New();
|
||||
virtual ~Operator_SSE_Compressed();
|
||||
|
||||
virtual Engine* CreateEngine() const;
|
||||
|
||||
virtual int CalcECOperator();
|
||||
|
||||
void SetCompressionThreshold(double val) {m_Compression_Threshold = val;}
|
||||
void SetCacheSize(unsigned int size) {m_max_fifo = size;}
|
||||
bool CompressOperator();
|
||||
|
||||
protected:
|
||||
Operator_SSE_Compressed();
|
||||
|
||||
bool m_Use_Compression;
|
||||
bool m_Max_Compression; //reached maximal compression?
|
||||
unsigned int m_Op_Count;
|
||||
|
||||
//! Compression ratio threshold. If this ratio is exceeded, compression will be disabled. \sa SetCompressionThreshold
|
||||
double m_Compression_Threshold; //default is 0.8
|
||||
|
||||
virtual void Init();
|
||||
virtual void Reset();
|
||||
virtual void InitOperator();
|
||||
|
||||
//! Compression cache size, default is 1000 \sa SetCacheSize
|
||||
unsigned int m_max_fifo;
|
||||
|
||||
bool CompareOperators(unsigned int pos1[3], unsigned int pos2[3]);
|
||||
|
||||
// engine needs access
|
||||
public:
|
||||
unsigned int*** m_Op_index;
|
||||
f4vector* f4_vv_Compressed[3]; //calc new voltage from old voltage
|
||||
f4vector* f4_vi_Compressed[3]; //calc new voltage from old current
|
||||
f4vector* f4_iv_Compressed[3]; //calc new current from old current
|
||||
f4vector* f4_ii_Compressed[3]; //calc new current from old voltage
|
||||
};
|
||||
|
||||
#endif // OPERATOR_SSE_Compressed_H
|
@ -3,20 +3,22 @@ clear
|
||||
clc
|
||||
|
||||
%% setup the simulation %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
abs_length = 250;
|
||||
length = 4000;
|
||||
length = 10000;
|
||||
width = 1000;
|
||||
height = 1000;
|
||||
mesh_res = 25;
|
||||
mesh_res = 20;
|
||||
abs_length = mesh_res*10;
|
||||
|
||||
EPS0 = 8.85418781762e-12;
|
||||
MUE0 = 1.256637062e-6;
|
||||
|
||||
%% define openEMS options %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
openEMS_opts = '';
|
||||
% openEMS_opts = [openEMS_opts ' --disable-dumps'];
|
||||
openEMS_opts = [openEMS_opts ' --disable-dumps'];
|
||||
% openEMS_opts = [openEMS_opts ' --debug-material'];
|
||||
% openEMS_opts = [openEMS_opts ' --engine=multithreaded'];
|
||||
openEMS_opts = [openEMS_opts ' --engine=multithreaded'];
|
||||
% openEMS_opts = [openEMS_opts ' --engine=sse'];
|
||||
openEMS_opts = [openEMS_opts ' --engine=sse-compressed'];
|
||||
|
||||
Sim_Path = 'tmp';
|
||||
Sim_CSX = 'plane_wave.xml';
|
||||
@ -24,7 +26,7 @@ Sim_CSX = 'plane_wave.xml';
|
||||
mkdir(Sim_Path);
|
||||
|
||||
%% setup FDTD parameter & excitation function %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
FDTD = InitFDTD(5e5,1e-5,'OverSampling',10);
|
||||
FDTD = InitFDTD(5000,1e-5,'OverSampling',10);
|
||||
FDTD = SetGaussExcite(FDTD,0.5e9,0.5e9);
|
||||
BC = [1 1 0 0 0 0];
|
||||
FDTD = SetBoundaryCond(FDTD,BC);
|
||||
|
@ -52,7 +52,9 @@ SOURCES += main.cpp \
|
||||
FDTD/operator_ext_mur_abc.cpp \
|
||||
FDTD/excitation.cpp \
|
||||
FDTD/operator_ext_cylinder.cpp \
|
||||
FDTD/engine_ext_cylinder.cpp
|
||||
FDTD/engine_ext_cylinder.cpp \
|
||||
FDTD/operator_sse_compressed.cpp \
|
||||
FDTD/engine_sse_compressed.cpp
|
||||
HEADERS += tools/ErrorMsg.h \
|
||||
tools/AdrOp.h \
|
||||
tools/constants.h \
|
||||
@ -75,7 +77,9 @@ HEADERS += tools/ErrorMsg.h \
|
||||
FDTD/operator_ext_mur_abc.h \
|
||||
FDTD/excitation.h \
|
||||
FDTD/operator_ext_cylinder.h \
|
||||
FDTD/engine_ext_cylinder.h
|
||||
FDTD/engine_ext_cylinder.h \
|
||||
FDTD/operator_sse_compressed.h \
|
||||
FDTD/engine_sse_compressed.h
|
||||
QMAKE_CXXFLAGS_RELEASE = -O3 \
|
||||
-g \
|
||||
-march=native
|
||||
|
11
openems.cpp
11
openems.cpp
@ -22,6 +22,7 @@
|
||||
#include "FDTD/operator_cylinder.h"
|
||||
#include "FDTD/engine_multithread.h"
|
||||
#include "FDTD/engine_sse.h"
|
||||
#include "FDTD/operator_sse_compressed.h"
|
||||
#include "FDTD/operator_ext_mur_abc.h"
|
||||
#include "FDTD/processvoltage.h"
|
||||
#include "FDTD/processcurrent.h"
|
||||
@ -121,6 +122,12 @@ bool openEMS::parseCommandLineArgument( const char *argv )
|
||||
m_engine = EngineType_SSE;
|
||||
return true;
|
||||
}
|
||||
else if (strcmp(argv,"--engine=sse-compressed")==0)
|
||||
{
|
||||
cout << "openEMS - enabled compressed sse engine" << endl;
|
||||
m_engine = EngineType_SSE_Compressed;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
@ -211,6 +218,10 @@ int openEMS::SetupFDTD(const char* file)
|
||||
{
|
||||
FDTD_Op = Operator_sse::New();
|
||||
}
|
||||
else if (m_engine == EngineType_SSE_Compressed)
|
||||
{
|
||||
FDTD_Op = Operator_SSE_Compressed::New();
|
||||
}
|
||||
else
|
||||
{
|
||||
FDTD_Op = Operator::New();
|
||||
|
@ -60,7 +60,7 @@ protected:
|
||||
Engine* FDTD_Eng;
|
||||
ProcessingArray* PA;
|
||||
|
||||
enum EngineType {EngineType_Standard, EngineType_SSE};
|
||||
enum EngineType {EngineType_Standard, EngineType_SSE, EngineType_SSE_Compressed};
|
||||
EngineType m_engine;
|
||||
unsigned int m_engine_numThreads;
|
||||
};
|
||||
|
@ -132,6 +132,11 @@ void Dump_N_3DArray2File(ostream &file, FDTD_FLOAT**** array, unsigned int* numL
|
||||
}
|
||||
|
||||
|
||||
void Delete1DArray_v4sf(f4vector* array)
|
||||
{
|
||||
if (array==NULL) return;
|
||||
FREE( array );
|
||||
}
|
||||
|
||||
|
||||
void Delete3DArray_v4sf(f4vector*** array, const unsigned int* numLines)
|
||||
@ -163,6 +168,16 @@ void Delete_N_3DArray_v4sf(f4vector**** array, const unsigned int* numLines)
|
||||
//delete[] array;
|
||||
}
|
||||
|
||||
f4vector* Create1DArray_v4sf(const unsigned int numLines)
|
||||
{
|
||||
f4vector* array=NULL;
|
||||
if (MEMALIGN( (void**)&array, 16, sizeof(typeof(f4vector))*numLines )) {
|
||||
cerr << "cannot allocate aligned memory" << endl;
|
||||
exit(3);
|
||||
}
|
||||
return array;
|
||||
}
|
||||
|
||||
//! \brief this function allocates a 3D array, which is aligned to 16 byte
|
||||
f4vector*** Create3DArray_v4sf(const unsigned int* numLines)
|
||||
{
|
||||
|
@ -47,8 +47,10 @@ void Delete_N_3DArray(FDTD_FLOAT**** array, const unsigned int* numLines);
|
||||
void Dump_N_3DArray2File(ostream &file, FDTD_FLOAT**** array, const unsigned int* numLines);
|
||||
|
||||
|
||||
void Delete1DArray_v4sf(f4vector* array);
|
||||
void Delete3DArray_v4sf(f4vector*** array, const unsigned int* numLines);
|
||||
void Delete_N_3DArray_v4sf(f4vector**** array, const unsigned int* numLines);
|
||||
f4vector* Create1DArray_v4sf(const unsigned int numLines);
|
||||
f4vector*** Create3DArray_v4sf(const unsigned int* numLines);
|
||||
f4vector**** Create_N_3DArray_v4sf(const unsigned int* numLines);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user