sse tests

pull/1/head
Sebastian Held 2010-04-21 11:18:22 +02:00
parent d611e5c8b1
commit 95188dd15e
9 changed files with 318 additions and 5 deletions

145
FDTD/engine_sse.cpp Normal file
View File

@ -0,0 +1,145 @@
/*
* Copyright (C) 2010 Thorsten Liebig (Thorsten.Liebig@gmx.de)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "engine_sse.h"
#include "tools/array_ops.h"
//! \brief construct an Engine_sse instance
//! it's the responsibility of the caller to free the returned pointer
Engine_sse* Engine_sse::New(const Operator* op)
{
Engine_sse* e = new Engine_sse(op);
e->Init();
return e;
}
Engine_sse::Engine_sse(const Operator* op) : Engine(op)
{
Op = op;
for (int n=0;n<3;++n)
{
numLines[n] = Op->GetNumberOfLines(n);
}
}
Engine_sse::~Engine_sse()
{
this->Reset();
}
void Engine_sse::Init()
{
numTS = 0;
volt_ = Create_N_3DArray_v4sf(numLines);
curr = Create_N_3DArray(numLines);
}
void Engine_sse::Reset()
{
Delete_N_3DArray_v4sf(volt_,numLines);
volt=NULL;
Delete_N_3DArray(curr,numLines);
curr=NULL;
}
void Engine_sse::UpdateVoltages()
{
unsigned int pos[4];
bool shift[3];
//voltage updates
for (pos[0]=0;pos[0]<numLines[0];++pos[0])
{
shift[0]=pos[0];
for (pos[1]=0;pos[1]<numLines[1];++pos[1])
{
shift[1]=pos[1];
for (pos[2]=0;pos[2]<numLines[2]/4;++pos[2])
{
//do the updates here
//for x
volt_[0][pos[0]][pos[1]][pos[2]].v *= Op->vv_[0][pos[0]][pos[1]][pos[2]].v;
volt_[1][pos[0]][pos[1]][pos[2]].v *= Op->vv_[1][pos[0]][pos[1]][pos[2]].v;
volt_[2][pos[0]][pos[1]][pos[2]].v *= Op->vv_[2][pos[0]][pos[1]][pos[2]].v;
for (pos[3]=0;pos[3]<4;++pos[3]) {
shift[2]=pos[2]+pos[3];
volt_[0][pos[0]][pos[1]][pos[2]].f[pos[3]] += Op->vi_[0][pos[0]][pos[1]][pos[2]].f[pos[3]] * ( curr[2][pos[0]][pos[1]][pos[2]] - curr[2][pos[0]][pos[1]-shift[1]][pos[2]] - curr[1][pos[0]][pos[1]][pos[2]] + curr[1][pos[0]][pos[1]][pos[2]-shift[2]]);
volt_[1][pos[0]][pos[1]][pos[2]].f[pos[3]] += Op->vi_[1][pos[0]][pos[1]][pos[2]].f[pos[3]] * ( curr[0][pos[0]][pos[1]][pos[2]] - curr[0][pos[0]][pos[1]][pos[2]-shift[2]] - curr[2][pos[0]][pos[1]][pos[2]] + curr[2][pos[0]-shift[0]][pos[1]][pos[2]]);
volt_[2][pos[0]][pos[1]][pos[2]].f[pos[3]] += Op->vi_[2][pos[0]][pos[1]][pos[2]].f[pos[3]] * ( curr[1][pos[0]][pos[1]][pos[2]] - curr[1][pos[0]-shift[0]][pos[1]][pos[2]] - curr[0][pos[0]][pos[1]][pos[2]] + curr[0][pos[0]][pos[1]-shift[1]][pos[2]]);
}
}
}
}
}
void Engine_sse::ApplyVoltageExcite()
{
int exc_pos;
//soft voltage excitation here (E-field excite)
for (unsigned int n=0;n<Op->E_Exc_Count;++n)
{
exc_pos = (int)numTS - (int)Op->E_Exc_delay[n];
exc_pos *= (exc_pos>0 && exc_pos<=(int)Op->ExciteLength);
// if (n==0) cerr << numTS << " => " << Op->ExciteSignal[exc_pos] << endl;
volt[Op->E_Exc_dir[n]][Op->E_Exc_index[0][n]][Op->E_Exc_index[1][n]][Op->E_Exc_index[2][n]] += Op->E_Exc_amp[n]*Op->ExciteSignal[exc_pos];
}
}
void Engine_sse::UpdateCurrents()
{
unsigned int pos[3];
for (pos[0]=0;pos[0]<numLines[0]-1;++pos[0])
{
for (pos[1]=0;pos[1]<numLines[1]-1;++pos[1])
{
for (pos[2]=0;pos[2]<numLines[2]-1;++pos[2])
{
//do the updates here
//for x
curr[0][pos[0]][pos[1]][pos[2]] *= Op->ii[0][pos[0]][pos[1]][pos[2]];
curr[0][pos[0]][pos[1]][pos[2]] += Op->iv[0][pos[0]][pos[1]][pos[2]] * ( volt[2][pos[0]][pos[1]][pos[2]] - volt[2][pos[0]][pos[1]+1][pos[2]] - volt[1][pos[0]][pos[1]][pos[2]] + volt[1][pos[0]][pos[1]][pos[2]+1]);
//for y
curr[1][pos[0]][pos[1]][pos[2]] *= Op->ii[1][pos[0]][pos[1]][pos[2]];
curr[1][pos[0]][pos[1]][pos[2]] += Op->iv[1][pos[0]][pos[1]][pos[2]] * ( volt[0][pos[0]][pos[1]][pos[2]] - volt[0][pos[0]][pos[1]][pos[2]+1] - volt[2][pos[0]][pos[1]][pos[2]] + volt[2][pos[0]+1][pos[1]][pos[2]]);
//for z
curr[2][pos[0]][pos[1]][pos[2]] *= Op->ii[2][pos[0]][pos[1]][pos[2]];
curr[2][pos[0]][pos[1]][pos[2]] += Op->iv[2][pos[0]][pos[1]][pos[2]] * ( volt[1][pos[0]][pos[1]][pos[2]] - volt[1][pos[0]+1][pos[1]][pos[2]] - volt[0][pos[0]][pos[1]][pos[2]] + volt[0][pos[0]][pos[1]+1][pos[2]]);
}
}
}
}
void Engine_sse::ApplyCurrentExcite()
{
}
bool Engine_sse::IterateTS(unsigned int iterTS)
{
for (unsigned int iter=0;iter<iterTS;++iter)
{
UpdateVoltages();
ApplyVoltageExcite();
UpdateCurrents();
ApplyCurrentExcite();
++numTS;
}
return true;
}

57
FDTD/engine_sse.h Normal file
View File

@ -0,0 +1,57 @@
/*
* Copyright (C) 2010 Thorsten Liebig (Thorsten.Liebig@gmx.de)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef ENGINE_SSE_H
#define ENGINE_SSE_H
#include "operator.h"
#include "engine.h"
class Engine_sse : public Engine
{
public:
static Engine_sse* New(const Operator* op);
virtual ~Engine_sse();
virtual void Init();
virtual void Reset();
//!Iterate a number of timesteps
virtual bool IterateTS(unsigned int iterTS);
virtual unsigned int GetNumberOfTimesteps() {return numTS;};
// virtual f4vector**** GetVoltages() {return volt;};
virtual FDTD_FLOAT**** GetCurrents() {return curr;};
protected:
Engine_sse(const Operator* op);
const Operator* Op;
virtual void UpdateVoltages();
virtual void ApplyVoltageExcite();
virtual void UpdateCurrents();
virtual void ApplyCurrentExcite();
unsigned int numLines[3];
f4vector**** volt_;
FDTD_FLOAT**** curr;
unsigned int numTS;
};
#endif // ENGINE_SSE_H

View File

@ -47,6 +47,8 @@ void Operator::Init()
E_Exc_dir=NULL; E_Exc_dir=NULL;
vv=NULL; vv=NULL;
vi=NULL; vi=NULL;
vv_=NULL;
vi_=NULL;
iv=NULL; iv=NULL;
ii=NULL; ii=NULL;
for (int n=0;n<3;++n) for (int n=0;n<3;++n)
@ -75,6 +77,8 @@ void Operator::Reset()
delete[] E_Exc_amp; delete[] E_Exc_amp;
Delete_N_3DArray(vv,numLines); Delete_N_3DArray(vv,numLines);
Delete_N_3DArray(vi,numLines); Delete_N_3DArray(vi,numLines);
Delete_N_3DArray_v4sf(vv_,numLines);
Delete_N_3DArray_v4sf(vi_,numLines);
Delete_N_3DArray(iv,numLines); Delete_N_3DArray(iv,numLines);
Delete_N_3DArray(ii,numLines); Delete_N_3DArray(ii,numLines);
for (int n=0;n<3;++n) for (int n=0;n<3;++n)
@ -502,10 +506,14 @@ void Operator::InitOperator()
{ {
Delete_N_3DArray(vv,numLines); Delete_N_3DArray(vv,numLines);
Delete_N_3DArray(vi,numLines); Delete_N_3DArray(vi,numLines);
Delete_N_3DArray_v4sf(vv_,numLines);
Delete_N_3DArray_v4sf(vi_,numLines);
Delete_N_3DArray(iv,numLines); Delete_N_3DArray(iv,numLines);
Delete_N_3DArray(ii,numLines); Delete_N_3DArray(ii,numLines);
vv = Create_N_3DArray(numLines); vv = Create_N_3DArray(numLines);
vi = Create_N_3DArray(numLines); vi = Create_N_3DArray(numLines);
vv_ = Create_N_3DArray_v4sf(numLines);
vi_ = Create_N_3DArray_v4sf(numLines);
iv = Create_N_3DArray(numLines); iv = Create_N_3DArray(numLines);
ii = Create_N_3DArray(numLines); ii = Create_N_3DArray(numLines);
} }
@ -516,6 +524,9 @@ inline void Operator::Calc_ECOperatorPos(int n, unsigned int* pos)
vv[n][pos[0]][pos[1]][pos[2]] = (1-dT*EC_G[n][i]/2/EC_C[n][i])/(1+dT*EC_G[n][i]/2/EC_C[n][i]); vv[n][pos[0]][pos[1]][pos[2]] = (1-dT*EC_G[n][i]/2/EC_C[n][i])/(1+dT*EC_G[n][i]/2/EC_C[n][i]);
vi[n][pos[0]][pos[1]][pos[2]] = (dT/EC_C[n][i])/(1+dT*EC_G[n][i]/2/EC_C[n][i]); vi[n][pos[0]][pos[1]][pos[2]] = (dT/EC_C[n][i])/(1+dT*EC_G[n][i]/2/EC_C[n][i]);
vv_[n][pos[0]][pos[1]][pos[2]/4].f[pos[2]%4] = vv[n][pos[0]][pos[1]][pos[2]];
vi_[n][pos[0]][pos[1]][pos[2]/4].f[pos[2]%4] = vi[n][pos[0]][pos[1]][pos[2]];
ii[n][pos[0]][pos[1]][pos[2]] = (1-dT*EC_R[n][i]/2/EC_L[n][i])/(1+dT*EC_R[n][i]/2/EC_L[n][i]); ii[n][pos[0]][pos[1]][pos[2]] = (1-dT*EC_R[n][i]/2/EC_L[n][i])/(1+dT*EC_R[n][i]/2/EC_L[n][i]);
iv[n][pos[0]][pos[1]][pos[2]] = (dT/EC_L[n][i])/(1+dT*EC_R[n][i]/2/EC_L[n][i]); iv[n][pos[0]][pos[1]][pos[2]] = (dT/EC_L[n][i])/(1+dT*EC_R[n][i]/2/EC_L[n][i]);
} }

View File

@ -24,6 +24,21 @@
#define FDTD_FLOAT float #define FDTD_FLOAT float
#if __SIZEOF_FLOAT__ != 4
#error wrong size of float
#endif
typedef float v4sf __attribute__ ((vector_size (16))); // vector of four single floats
union f4vector
{
v4sf v;
float f[4];
};
//! Abstract base-class for the FDTD-operator //! Abstract base-class for the FDTD-operator
class Operator class Operator
{ {
@ -130,6 +145,8 @@ public:
FDTD_FLOAT**** vi; //calc new voltage from old current FDTD_FLOAT**** vi; //calc new voltage from old current
FDTD_FLOAT**** ii; //calc new current from old current FDTD_FLOAT**** ii; //calc new current from old current
FDTD_FLOAT**** iv; //calc new current from old voltage FDTD_FLOAT**** iv; //calc new current from old voltage
f4vector**** vv_; //calc new voltage from old voltage
f4vector**** vi_; //calc new voltage from old current
//Excitation time-signal //Excitation time-signal
unsigned int ExciteLength; unsigned int ExciteLength;

View File

@ -38,7 +38,8 @@ SOURCES += main.cpp \
openems.cpp \ openems.cpp \
FDTD/engine_multithread.cpp \ FDTD/engine_multithread.cpp \
FDTD/operator_cylinder.cpp \ FDTD/operator_cylinder.cpp \
FDTD/engine_cylinder.cpp FDTD/engine_cylinder.cpp \
FDTD/engine_sse.cpp
HEADERS += tools/ErrorMsg.h \ HEADERS += tools/ErrorMsg.h \
tools/AdrOp.h \ tools/AdrOp.h \
tools/constants.h \ tools/constants.h \
@ -54,10 +55,11 @@ HEADERS += tools/ErrorMsg.h \
openems.h \ openems.h \
FDTD/engine_multithread.h \ FDTD/engine_multithread.h \
FDTD/operator_cylinder.h \ FDTD/operator_cylinder.h \
FDTD/engine_cylinder.h FDTD/engine_cylinder.h \
QMAKE_CXXFLAGS_RELEASE = -O2 \ FDTD/engine_sse.h
QMAKE_CXXFLAGS_RELEASE = -O3 \
-g \ -g \
-march=native -march=native
QMAKE_CXXFLAGS_DEBUG = -O0 \ QMAKE_CXXFLAGS_DEBUG = -O0 \
-g \ -g \
-march=native -march=native

View File

@ -21,6 +21,7 @@
#include "FDTD/engine.h" #include "FDTD/engine.h"
#include "FDTD/engine_cylinder.h" #include "FDTD/engine_cylinder.h"
#include "FDTD/engine_multithread.h" #include "FDTD/engine_multithread.h"
#include "FDTD/engine_sse.h"
#include "FDTD/processvoltage.h" #include "FDTD/processvoltage.h"
#include "FDTD/processcurrent.h" #include "FDTD/processcurrent.h"
#include "FDTD/processfields_td.h" #include "FDTD/processfields_td.h"
@ -112,6 +113,12 @@ bool openEMS::parseCommandLineArgument( const char *argv )
cout << "openEMS - fixed number of threads: " << m_engine_numThreads << endl; cout << "openEMS - fixed number of threads: " << m_engine_numThreads << endl;
return true; return true;
} }
else if (strcmp(argv,"--engine=sse")==0)
{
cout << "openEMS - enabled sse engine" << endl;
m_engine = EngineType_SSE;
return true;
}
return false; return false;
} }

View File

@ -61,7 +61,7 @@ protected:
Engine* FDTD_Eng; Engine* FDTD_Eng;
ProcessingArray* PA; ProcessingArray* PA;
enum EngineType {EngineType_Standard,EngineType_Multithreaded}; enum EngineType {EngineType_Standard,EngineType_Multithreaded,EngineType_SSE};
EngineType m_engine; EngineType m_engine;
unsigned int m_engine_numThreads; unsigned int m_engine_numThreads;
}; };

View File

@ -91,3 +91,70 @@ void Dump_N_3DArray2File(ostream &file, FDTD_FLOAT**** array, unsigned int* numL
} }
} }
} }
void Delete3DArray_v4sf(f4vector*** array, const unsigned int* numLines)
{
if (array==NULL) return;
unsigned int pos[3];
for (pos[0]=0;pos[0]<numLines[0];++pos[0])
{
for (pos[1]=0;pos[1]<numLines[1];++pos[1])
{
delete[] array[pos[0]][pos[1]];
}
delete[] array[pos[0]];
}
delete[] array;
}
void Delete_N_3DArray_v4sf(f4vector**** array, const unsigned int* numLines)
{
if (array==NULL) return;
for (int n=0;n<3;++n)
{
Delete3DArray_v4sf(array[n],numLines);
}
delete[] array;
}
f4vector*** Create3DArray_v4sf(const unsigned int* numLines)
{
f4vector*** array=NULL;
unsigned int pos[3];
array = new f4vector**[numLines[0]];
for (pos[0]=0;pos[0]<numLines[0];++pos[0])
{
array[pos[0]] = new f4vector*[numLines[1]];
for (pos[1]=0;pos[1]<numLines[1];++pos[1])
{
array[pos[0]][pos[1]] = new f4vector[numLines[2]/4];
for (pos[2]=0;pos[2]<numLines[2]/4;++pos[2])
{
array[pos[0]][pos[1]][pos[2]].f[0] = 0;
array[pos[0]][pos[1]][pos[2]].f[1] = 0;
array[pos[0]][pos[1]][pos[2]].f[2] = 0;
array[pos[0]][pos[1]][pos[2]].f[3] = 0;
}
}
}
return array;
}
f4vector**** Create_N_3DArray_v4sf(const unsigned int* numLines)
{
if ((numLines[2] % 4) != 0) {
cerr << "sse engine needs number of mesh lines divideable by 4" << endl;
exit(1);
}
f4vector**** array=NULL;
array = new f4vector***[3];
for (int n=0;n<3;++n)
{
array[n]=Create3DArray_v4sf(numLines);
}
return array;
}

View File

@ -28,4 +28,11 @@ void Delete_N_3DArray(FDTD_FLOAT**** array, const unsigned int* numLines);
void Dump_N_3DArray2File(ostream &file, FDTD_FLOAT**** array, const unsigned int* numLines); void Dump_N_3DArray2File(ostream &file, FDTD_FLOAT**** array, const unsigned int* numLines);
void Delete3DArray_v4sf(f4vector*** array, const unsigned int* numLines);
void Delete_N_3DArray_v4sf(f4vector**** array, const unsigned int* numLines);
f4vector*** Create3DArray_v4sf(const unsigned int* numLines);
f4vector**** Create_N_3DArray_v4sf(const unsigned int* numLines);
#endif // ARRAY_OPS_H #endif // ARRAY_OPS_H