From 71dde7ea49918c033ae91487c38db959d381736f Mon Sep 17 00:00:00 2001
From: Yifeng Li <tomli@tomli.me>
Date: Wed, 18 Jan 2023 17:51:33 +0000
Subject: [PATCH] FDTD: reformat code of update equations.

The original update equations in the FDTD engine have extremely
long lines and are difficult to read and work with. This patch
inserts line breaks, it aligns all array indexes by x/y/z
coordinates to make it easy to visually compare.

Signed-off-by: Yifeng Li <tomli@tomli.me>
---
 FDTD/engine.cpp                |  66 ++++++++++++---
 FDTD/engine_sse.cpp            | 132 +++++++++++++++++++++++------
 FDTD/engine_sse_compressed.cpp | 148 ++++++++++++++++++++++++++-------
 3 files changed, 282 insertions(+), 64 deletions(-)

diff --git a/FDTD/engine.cpp b/FDTD/engine.cpp
index 26725e6..b7e8320 100644
--- a/FDTD/engine.cpp
+++ b/FDTD/engine.cpp
@@ -123,16 +123,37 @@ void Engine::UpdateVoltages(unsigned int startX, unsigned int numX)
 				shift[2]=pos[2];
 				//do the updates here
 				//for x
-				volt[0][pos[0]][pos[1]][pos[2]] *= Op->vv[0][pos[0]][pos[1]][pos[2]];
-				volt[0][pos[0]][pos[1]][pos[2]] += Op->vi[0][pos[0]][pos[1]][pos[2]] * ( curr[2][pos[0]][pos[1]][pos[2]] - curr[2][pos[0]][pos[1]-shift[1]][pos[2]] - curr[1][pos[0]][pos[1]][pos[2]] + curr[1][pos[0]][pos[1]][pos[2]-shift[2]]);
+				volt[0][pos[0]][pos[1]][pos[2]] *=
+				    Op->vv[0][pos[0]][pos[1]][pos[2]];
+				volt[0][pos[0]][pos[1]][pos[2]] +=
+				    Op->vi[0][pos[0]][pos[1]][pos[2]] * (
+				        curr[2][pos[0]][pos[1]         ][pos[2]         ] -
+				        curr[2][pos[0]][pos[1]-shift[1]][pos[2]         ] -
+				        curr[1][pos[0]][pos[1]         ][pos[2]         ] +
+				        curr[1][pos[0]][pos[1]         ][pos[2]-shift[2]]
+				    );
 
 				//for y
-				volt[1][pos[0]][pos[1]][pos[2]] *= Op->vv[1][pos[0]][pos[1]][pos[2]];
-				volt[1][pos[0]][pos[1]][pos[2]] += Op->vi[1][pos[0]][pos[1]][pos[2]] * ( curr[0][pos[0]][pos[1]][pos[2]] - curr[0][pos[0]][pos[1]][pos[2]-shift[2]] - curr[2][pos[0]][pos[1]][pos[2]] + curr[2][pos[0]-shift[0]][pos[1]][pos[2]]);
+				volt[1][pos[0]][pos[1]][pos[2]] *=
+				    Op->vv[1][pos[0]][pos[1]][pos[2]];
+				volt[1][pos[0]][pos[1]][pos[2]] +=
+				    Op->vi[1][pos[0]][pos[1]][pos[2]] * (
+				        curr[0][pos[0]         ][pos[1]][pos[2]         ] -
+				        curr[0][pos[0]         ][pos[1]][pos[2]-shift[2]] -
+				        curr[2][pos[0]         ][pos[1]][pos[2]         ] +
+				        curr[2][pos[0]-shift[0]][pos[1]][pos[2]         ]
+				    );
 
 				//for z
-				volt[2][pos[0]][pos[1]][pos[2]] *= Op->vv[2][pos[0]][pos[1]][pos[2]];
-				volt[2][pos[0]][pos[1]][pos[2]] += Op->vi[2][pos[0]][pos[1]][pos[2]] * ( curr[1][pos[0]][pos[1]][pos[2]] - curr[1][pos[0]-shift[0]][pos[1]][pos[2]] - curr[0][pos[0]][pos[1]][pos[2]] + curr[0][pos[0]][pos[1]-shift[1]][pos[2]]);
+				volt[2][pos[0]][pos[1]][pos[2]] *=
+				    Op->vv[2][pos[0]][pos[1]][pos[2]];
+				volt[2][pos[0]][pos[1]][pos[2]] +=
+				    Op->vi[2][pos[0]][pos[1]][pos[2]] * (
+				        curr[1][pos[0]         ][pos[1]         ][pos[2]] -
+				        curr[1][pos[0]-shift[0]][pos[1]         ][pos[2]] -
+				        curr[0][pos[0]         ][pos[1]         ][pos[2]] +
+				        curr[0][pos[0]         ][pos[1]-shift[1]][pos[2]]
+				    );
 			}
 		}
 		++pos[0];
@@ -151,16 +172,37 @@ void Engine::UpdateCurrents(unsigned int startX, unsigned int numX)
 			{
 				//do the updates here
 				//for x
-				curr[0][pos[0]][pos[1]][pos[2]] *= Op->ii[0][pos[0]][pos[1]][pos[2]];
-				curr[0][pos[0]][pos[1]][pos[2]] += Op->iv[0][pos[0]][pos[1]][pos[2]] * ( volt[2][pos[0]][pos[1]][pos[2]] - volt[2][pos[0]][pos[1]+1][pos[2]] - volt[1][pos[0]][pos[1]][pos[2]] + volt[1][pos[0]][pos[1]][pos[2]+1]);
+				curr[0][pos[0]][pos[1]][pos[2]] *=
+				    Op->ii[0][pos[0]][pos[1]][pos[2]];
+				curr[0][pos[0]][pos[1]][pos[2]] +=
+				    Op->iv[0][pos[0]][pos[1]][pos[2]] * (
+				        volt[2][pos[0]][pos[1]  ][pos[2]  ] -
+				        volt[2][pos[0]][pos[1]+1][pos[2]  ] -
+				        volt[1][pos[0]][pos[1]  ][pos[2]  ] +
+				        volt[1][pos[0]][pos[1]  ][pos[2]+1]
+				    );
 
 				//for y
-				curr[1][pos[0]][pos[1]][pos[2]] *= Op->ii[1][pos[0]][pos[1]][pos[2]];
-				curr[1][pos[0]][pos[1]][pos[2]] += Op->iv[1][pos[0]][pos[1]][pos[2]] * ( volt[0][pos[0]][pos[1]][pos[2]] - volt[0][pos[0]][pos[1]][pos[2]+1] - volt[2][pos[0]][pos[1]][pos[2]] + volt[2][pos[0]+1][pos[1]][pos[2]]);
+				curr[1][pos[0]][pos[1]][pos[2]] *=
+				    Op->ii[1][pos[0]][pos[1]][pos[2]];
+				curr[1][pos[0]][pos[1]][pos[2]] +=
+				    Op->iv[1][pos[0]][pos[1]][pos[2]] * (
+				        volt[0][pos[0]  ][pos[1]][pos[2]  ] -
+				        volt[0][pos[0]  ][pos[1]][pos[2]+1] -
+				        volt[2][pos[0]  ][pos[1]][pos[2]  ] +
+				        volt[2][pos[0]+1][pos[1]][pos[2]  ]
+				    );
 
 				//for z
-				curr[2][pos[0]][pos[1]][pos[2]] *= Op->ii[2][pos[0]][pos[1]][pos[2]];
-				curr[2][pos[0]][pos[1]][pos[2]] += Op->iv[2][pos[0]][pos[1]][pos[2]] * ( volt[1][pos[0]][pos[1]][pos[2]] - volt[1][pos[0]+1][pos[1]][pos[2]] - volt[0][pos[0]][pos[1]][pos[2]] + volt[0][pos[0]][pos[1]+1][pos[2]]);
+				curr[2][pos[0]][pos[1]][pos[2]] *=
+				    Op->ii[2][pos[0]][pos[1]][pos[2]];
+				curr[2][pos[0]][pos[1]][pos[2]] +=
+				    Op->iv[2][pos[0]][pos[1]][pos[2]] * (
+				        volt[1][pos[0]  ][pos[1]  ][pos[2]] -
+				        volt[1][pos[0]+1][pos[1]  ][pos[2]] -
+				        volt[0][pos[0]  ][pos[1]  ][pos[2]] +
+				        volt[0][pos[0]  ][pos[1]+1][pos[2]]
+				    );
 			}
 		}
 		++pos[0];
diff --git a/FDTD/engine_sse.cpp b/FDTD/engine_sse.cpp
index 2e06685..067c4b7 100644
--- a/FDTD/engine_sse.cpp
+++ b/FDTD/engine_sse.cpp
@@ -91,16 +91,37 @@ void Engine_sse::UpdateVoltages(unsigned int startX, unsigned int numX)
 			for (pos[2]=1; pos[2]<numVectors; ++pos[2])
 			{
 				// x-polarization
-				f4_volt[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv[0][pos[0]][pos[1]][pos[2]].v;
-				f4_volt[0][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[0][pos[0]][pos[1]][pos[2]].v * ( f4_curr[2][pos[0]][pos[1]][pos[2]].v - f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2]].v - f4_curr[1][pos[0]][pos[1]][pos[2]].v + f4_curr[1][pos[0]][pos[1]][pos[2]-1].v );
+				f4_volt[0][pos[0]][pos[1]][pos[2]].v *=
+				    Op->f4_vv[0][pos[0]][pos[1]][pos[2]].v;
+				f4_volt[0][pos[0]][pos[1]][pos[2]].v +=
+				    Op->f4_vi[0][pos[0]][pos[1]][pos[2]].v * (
+				        f4_curr[2][pos[0]][pos[1]         ][pos[2]].v -
+				        f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2]].v -
+				        f4_curr[1][pos[0]][pos[1]         ][pos[2]].v +
+				        f4_curr[1][pos[0]][pos[1]         ][pos[2]-1].v
+				    );
 
 				// y-polarization
-				f4_volt[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv[1][pos[0]][pos[1]][pos[2]].v;
-				f4_volt[1][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[1][pos[0]][pos[1]][pos[2]].v * ( f4_curr[0][pos[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]-1].v - f4_curr[2][pos[0]][pos[1]][pos[2]].v + f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2]].v);
+				f4_volt[1][pos[0]][pos[1]][pos[2]].v *=
+				    Op->f4_vv[1][pos[0]][pos[1]][pos[2]].v;
+				f4_volt[1][pos[0]][pos[1]][pos[2]].v +=
+				    Op->f4_vi[1][pos[0]][pos[1]][pos[2]].v * (
+				        f4_curr[0][pos[0]         ][pos[1]][pos[2]  ].v -
+				        f4_curr[0][pos[0]         ][pos[1]][pos[2]-1].v -
+				        f4_curr[2][pos[0]         ][pos[1]][pos[2]  ].v +
+				        f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2]  ].v
+				    );
 
 				// z-polarization
-				f4_volt[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv[2][pos[0]][pos[1]][pos[2]].v;
-				f4_volt[2][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[2][pos[0]][pos[1]][pos[2]].v * ( f4_curr[1][pos[0]][pos[1]][pos[2]].v - f4_curr[1][pos[0]-shift[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]].v + f4_curr[0][pos[0]][pos[1]-shift[1]][pos[2]].v);
+				f4_volt[2][pos[0]][pos[1]][pos[2]].v *=
+				    Op->f4_vv[2][pos[0]][pos[1]][pos[2]].v;
+				f4_volt[2][pos[0]][pos[1]][pos[2]].v +=
+				    Op->f4_vi[2][pos[0]][pos[1]][pos[2]].v * (
+				        f4_curr[1][pos[0]         ][pos[1]         ][pos[2]].v -
+				        f4_curr[1][pos[0]-shift[0]][pos[1]         ][pos[2]].v -
+				        f4_curr[0][pos[0]         ][pos[1]         ][pos[2]].v +
+				        f4_curr[0][pos[0]         ][pos[1]-shift[1]][pos[2]].v
+				    );
 			}
 
 			// for pos[2] = 0
@@ -109,20 +130,41 @@ void Engine_sse::UpdateVoltages(unsigned int startX, unsigned int numX)
 			temp.f[1] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[0];
 			temp.f[2] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[1];
 			temp.f[3] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[2];
-			f4_volt[0][pos[0]][pos[1]][0].v *= Op->f4_vv[0][pos[0]][pos[1]][0].v;
-			f4_volt[0][pos[0]][pos[1]][0].v += Op->f4_vi[0][pos[0]][pos[1]][0].v * ( f4_curr[2][pos[0]][pos[1]][0].v - f4_curr[2][pos[0]][pos[1]-shift[1]][0].v - f4_curr[1][pos[0]][pos[1]][0].v + temp.v );
+			f4_volt[0][pos[0]][pos[1]][0].v *=
+			    Op->f4_vv[0][pos[0]][pos[1]][0].v;
+			f4_volt[0][pos[0]][pos[1]][0].v +=
+			    Op->f4_vi[0][pos[0]][pos[1]][0].v * (
+			        f4_curr[2][pos[0]][pos[1]         ][0].v -
+			        f4_curr[2][pos[0]][pos[1]-shift[1]][0].v -
+			        f4_curr[1][pos[0]][pos[1]         ][0].v +
+			        temp.v
+			    );
 
 			// y-polarization
 			temp.f[0] = 0;
 			temp.f[1] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[0];
 			temp.f[2] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[1];
 			temp.f[3] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[2];
-			f4_volt[1][pos[0]][pos[1]][0].v *= Op->f4_vv[1][pos[0]][pos[1]][0].v;
-			f4_volt[1][pos[0]][pos[1]][0].v += Op->f4_vi[1][pos[0]][pos[1]][0].v * ( f4_curr[0][pos[0]][pos[1]][0].v - temp.v - f4_curr[2][pos[0]][pos[1]][0].v + f4_curr[2][pos[0]-shift[0]][pos[1]][0].v);
+			f4_volt[1][pos[0]][pos[1]][0].v *=
+			    Op->f4_vv[1][pos[0]][pos[1]][0].v;
+			f4_volt[1][pos[0]][pos[1]][0].v +=
+			    Op->f4_vi[1][pos[0]][pos[1]][0].v * (
+			        f4_curr[0][pos[0]         ][pos[1]][0].v -
+			        temp.v -
+			        f4_curr[2][pos[0]         ][pos[1]][0].v +
+			        f4_curr[2][pos[0]-shift[0]][pos[1]][0].v
+			    );
 
 			// z-polarization
-			f4_volt[2][pos[0]][pos[1]][0].v *= Op->f4_vv[2][pos[0]][pos[1]][0].v;
-			f4_volt[2][pos[0]][pos[1]][0].v += Op->f4_vi[2][pos[0]][pos[1]][0].v * ( f4_curr[1][pos[0]][pos[1]][0].v - f4_curr[1][pos[0]-shift[0]][pos[1]][0].v - f4_curr[0][pos[0]][pos[1]][0].v + f4_curr[0][pos[0]][pos[1]-shift[1]][0].v);
+			f4_volt[2][pos[0]][pos[1]][0].v *=
+			    Op->f4_vv[2][pos[0]][pos[1]][0].v;
+			f4_volt[2][pos[0]][pos[1]][0].v +=
+			    Op->f4_vi[2][pos[0]][pos[1]][0].v * (
+			        f4_curr[1][pos[0]         ][pos[1]         ][0].v -
+			        f4_curr[1][pos[0]-shift[0]][pos[1]         ][0].v -
+			        f4_curr[0][pos[0]         ][pos[1]         ][0].v +
+			        f4_curr[0][pos[0]         ][pos[1]-shift[1]][0].v
+			    );
 		}
 		++pos[0];
 	}
@@ -141,16 +183,37 @@ void Engine_sse::UpdateCurrents(unsigned int startX, unsigned int numX)
 			for (pos[2]=0; pos[2]<numVectors-1; ++pos[2])
 			{
 				// x-pol
-				f4_curr[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii[0][pos[0]][pos[1]][pos[2]].v;
-				f4_curr[0][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[0][pos[0]][pos[1]][pos[2]].v * ( f4_volt[2][pos[0]][pos[1]][pos[2]].v - f4_volt[2][pos[0]][pos[1]+1][pos[2]].v - f4_volt[1][pos[0]][pos[1]][pos[2]].v + f4_volt[1][pos[0]][pos[1]][pos[2]+1].v);
+				f4_curr[0][pos[0]][pos[1]][pos[2]].v *=
+				    Op->f4_ii[0][pos[0]][pos[1]][pos[2]].v;
+				f4_curr[0][pos[0]][pos[1]][pos[2]].v +=
+				    Op->f4_iv[0][pos[0]][pos[1]][pos[2]].v * (
+				        f4_volt[2][pos[0]][pos[1]  ][pos[2]  ].v -
+				        f4_volt[2][pos[0]][pos[1]+1][pos[2]  ].v -
+				        f4_volt[1][pos[0]][pos[1]  ][pos[2]  ].v +
+				        f4_volt[1][pos[0]][pos[1]  ][pos[2]+1].v
+				    );
 
 				// y-pol
-				f4_curr[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii[1][pos[0]][pos[1]][pos[2]].v;
-				f4_curr[1][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[1][pos[0]][pos[1]][pos[2]].v * ( f4_volt[0][pos[0]][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]+1].v - f4_volt[2][pos[0]][pos[1]][pos[2]].v + f4_volt[2][pos[0]+1][pos[1]][pos[2]].v);
+				f4_curr[1][pos[0]][pos[1]][pos[2]].v *=
+				    Op->f4_ii[1][pos[0]][pos[1]][pos[2]].v;
+				f4_curr[1][pos[0]][pos[1]][pos[2]].v +=
+				    Op->f4_iv[1][pos[0]][pos[1]][pos[2]].v * (
+				        f4_volt[0][pos[0]  ][pos[1]][pos[2]  ].v -
+				        f4_volt[0][pos[0]  ][pos[1]][pos[2]+1].v -
+				        f4_volt[2][pos[0]  ][pos[1]][pos[2]  ].v +
+				        f4_volt[2][pos[0]+1][pos[1]][pos[2]  ].v
+				    );
 
 				// z-pol
-				f4_curr[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii[2][pos[0]][pos[1]][pos[2]].v;
-				f4_curr[2][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[2][pos[0]][pos[1]][pos[2]].v * ( f4_volt[1][pos[0]][pos[1]][pos[2]].v - f4_volt[1][pos[0]+1][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]].v + f4_volt[0][pos[0]][pos[1]+1][pos[2]].v);
+				f4_curr[2][pos[0]][pos[1]][pos[2]].v *=
+				    Op->f4_ii[2][pos[0]][pos[1]][pos[2]].v;
+				f4_curr[2][pos[0]][pos[1]][pos[2]].v +=
+				    Op->f4_iv[2][pos[0]][pos[1]][pos[2]].v * (
+				        f4_volt[1][pos[0]  ][pos[1]  ][pos[2]].v -
+				        f4_volt[1][pos[0]+1][pos[1]  ][pos[2]].v -
+				        f4_volt[0][pos[0]  ][pos[1]  ][pos[2]].v +
+				        f4_volt[0][pos[0]  ][pos[1]+1][pos[2]].v
+				    );
 			}
 
 			// for pos[2] = numVectors-1
@@ -159,20 +222,41 @@ void Engine_sse::UpdateCurrents(unsigned int startX, unsigned int numX)
 			temp.f[1] = f4_volt[1][pos[0]][pos[1]][0].f[2];
 			temp.f[2] = f4_volt[1][pos[0]][pos[1]][0].f[3];
 			temp.f[3] = 0;
-			f4_curr[0][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii[0][pos[0]][pos[1]][numVectors-1].v;
-			f4_curr[0][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv[0][pos[0]][pos[1]][numVectors-1].v * ( f4_volt[2][pos[0]][pos[1]][numVectors-1].v - f4_volt[2][pos[0]][pos[1]+1][numVectors-1].v - f4_volt[1][pos[0]][pos[1]][numVectors-1].v + temp.v);
+			f4_curr[0][pos[0]][pos[1]][numVectors-1].v *=
+			    Op->f4_ii[0][pos[0]][pos[1]][numVectors-1].v;
+			f4_curr[0][pos[0]][pos[1]][numVectors-1].v +=
+			    Op->f4_iv[0][pos[0]][pos[1]][numVectors-1].v * (
+			        f4_volt[2][pos[0]][pos[1]  ][numVectors-1].v -
+			        f4_volt[2][pos[0]][pos[1]+1][numVectors-1].v -
+			        f4_volt[1][pos[0]][pos[1]  ][numVectors-1].v +
+			        temp.v
+			    );
 
 			// y-pol
 			temp.f[0] = f4_volt[0][pos[0]][pos[1]][0].f[1];
 			temp.f[1] = f4_volt[0][pos[0]][pos[1]][0].f[2];
 			temp.f[2] = f4_volt[0][pos[0]][pos[1]][0].f[3];
 			temp.f[3] = 0;
-			f4_curr[1][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii[1][pos[0]][pos[1]][numVectors-1].v;
-			f4_curr[1][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv[1][pos[0]][pos[1]][numVectors-1].v * ( f4_volt[0][pos[0]][pos[1]][numVectors-1].v - temp.v - f4_volt[2][pos[0]][pos[1]][numVectors-1].v + f4_volt[2][pos[0]+1][pos[1]][numVectors-1].v);
+			f4_curr[1][pos[0]][pos[1]][numVectors-1].v *=
+			    Op->f4_ii[1][pos[0]][pos[1]][numVectors-1].v;
+			f4_curr[1][pos[0]][pos[1]][numVectors-1].v +=
+			    Op->f4_iv[1][pos[0]][pos[1]][numVectors-1].v * (
+			        f4_volt[0][pos[0]  ][pos[1]][numVectors-1].v -
+			        temp.v -
+			        f4_volt[2][pos[0]  ][pos[1]][numVectors-1].v +
+			        f4_volt[2][pos[0]+1][pos[1]][numVectors-1].v
+			    );
 
 			// z-pol
-			f4_curr[2][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii[2][pos[0]][pos[1]][numVectors-1].v;
-			f4_curr[2][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv[2][pos[0]][pos[1]][numVectors-1].v * ( f4_volt[1][pos[0]][pos[1]][numVectors-1].v - f4_volt[1][pos[0]+1][pos[1]][numVectors-1].v - f4_volt[0][pos[0]][pos[1]][numVectors-1].v + f4_volt[0][pos[0]][pos[1]+1][numVectors-1].v);
+			f4_curr[2][pos[0]][pos[1]][numVectors-1].v *=
+			    Op->f4_ii[2][pos[0]][pos[1]][numVectors-1].v;
+			f4_curr[2][pos[0]][pos[1]][numVectors-1].v +=
+			    Op->f4_iv[2][pos[0]][pos[1]][numVectors-1].v * (
+			        f4_volt[1][pos[0]  ][pos[1]  ][numVectors-1].v -
+			        f4_volt[1][pos[0]+1][pos[1]  ][numVectors-1].v -
+			        f4_volt[0][pos[0]  ][pos[1]  ][numVectors-1].v +
+			        f4_volt[0][pos[0]  ][pos[1]+1][numVectors-1].v
+			    );
 		}
 		++pos[0];
 	}
diff --git a/FDTD/engine_sse_compressed.cpp b/FDTD/engine_sse_compressed.cpp
index 8e83387..e82a752 100644
--- a/FDTD/engine_sse_compressed.cpp
+++ b/FDTD/engine_sse_compressed.cpp
@@ -55,47 +55,93 @@ void Engine_SSE_Compressed::UpdateVoltages(unsigned int startX, unsigned int num
 			{
 				index = Op->m_Op_index[pos[0]][pos[1]][pos[2]];
 				// x-polarization
-				f4_volt[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv_Compressed[0][index].v;
-				f4_volt[0][pos[0]][pos[1]][pos[2]].v += Op->f4_vi_Compressed[0][index].v * ( f4_curr[2][pos[0]][pos[1]][pos[2]].v - f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2]].v - f4_curr[1][pos[0]][pos[1]][pos[2]].v + f4_curr[1][pos[0]][pos[1]][pos[2]-1].v );
+				f4_volt[0][pos[0]][pos[1]][pos[2]].v *=
+				    Op->f4_vv_Compressed[0][index].v;
+				f4_volt[0][pos[0]][pos[1]][pos[2]].v +=
+				    Op->f4_vi_Compressed[0][index].v * (
+				        f4_curr[2][pos[0]][pos[1]         ][pos[2]  ].v -
+				        f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2]  ].v -
+				        f4_curr[1][pos[0]][pos[1]         ][pos[2]  ].v +
+				        f4_curr[1][pos[0]][pos[1]         ][pos[2]-1].v
+				    );
 
 				// y-polarization
-				f4_volt[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv_Compressed[1][index].v;
-				f4_volt[1][pos[0]][pos[1]][pos[2]].v += Op->f4_vi_Compressed[1][index].v * ( f4_curr[0][pos[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]-1].v - f4_curr[2][pos[0]][pos[1]][pos[2]].v + f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2]].v);
+				f4_volt[1][pos[0]][pos[1]][pos[2]].v *=
+				    Op->f4_vv_Compressed[1][index].v;
+				f4_volt[1][pos[0]][pos[1]][pos[2]].v +=
+				    Op->f4_vi_Compressed[1][index].v * (
+				        f4_curr[0][pos[0]         ][pos[1]][pos[2]  ].v -
+				        f4_curr[0][pos[0]         ][pos[1]][pos[2]-1].v -
+				        f4_curr[2][pos[0]         ][pos[1]][pos[2]  ].v +
+				        f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2]  ].v
+				    );
 
 				// z-polarization
-				f4_volt[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv_Compressed[2][index].v;
-				f4_volt[2][pos[0]][pos[1]][pos[2]].v += Op->f4_vi_Compressed[2][index].v * ( f4_curr[1][pos[0]][pos[1]][pos[2]].v - f4_curr[1][pos[0]-shift[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]].v + f4_curr[0][pos[0]][pos[1]-shift[1]][pos[2]].v);
+				f4_volt[2][pos[0]][pos[1]][pos[2]].v *=
+				    Op->f4_vv_Compressed[2][index].v;
+				f4_volt[2][pos[0]][pos[1]][pos[2]].v +=
+				    Op->f4_vi_Compressed[2][index].v * (
+				        f4_curr[1][pos[0]         ][pos[1]]         [pos[2]].v -
+				        f4_curr[1][pos[0]-shift[0]][pos[1]]         [pos[2]].v -
+				        f4_curr[0][pos[0]         ][pos[1]]         [pos[2]].v +
+				        f4_curr[0][pos[0]         ][pos[1]-shift[1]][pos[2]].v
+				    );
 			}
 
 			// for pos[2] = 0
 			// x-polarization
 			index = Op->m_Op_index[pos[0]][pos[1]][0];
 #ifdef __SSE2__
-			temp.v = (__m128)_mm_slli_si128( (__m128i)f4_curr[1][pos[0]][pos[1]][numVectors-1].v, 4 );
+			temp.v = (__m128)_mm_slli_si128(
+			             (__m128i)f4_curr[1][pos[0]][pos[1]][numVectors-1].v, 4
+			         );
 #else
 			temp.f[0] = 0;
 			temp.f[1] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[0];
 			temp.f[2] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[1];
 			temp.f[3] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[2];
 #endif
-			f4_volt[0][pos[0]][pos[1]][0].v *= Op->f4_vv_Compressed[0][index].v;
-			f4_volt[0][pos[0]][pos[1]][0].v += Op->f4_vi_Compressed[0][index].v * ( f4_curr[2][pos[0]][pos[1]][0].v - f4_curr[2][pos[0]][pos[1]-shift[1]][0].v - f4_curr[1][pos[0]][pos[1]][0].v + temp.v );
+			f4_volt[0][pos[0]][pos[1]][0].v *=
+			    Op->f4_vv_Compressed[0][index].v;
+			f4_volt[0][pos[0]][pos[1]][0].v +=
+			    Op->f4_vi_Compressed[0][index].v * (
+			        f4_curr[2][pos[0]][pos[1]         ][0].v -
+			        f4_curr[2][pos[0]][pos[1]-shift[1]][0].v -
+			        f4_curr[1][pos[0]][pos[1]         ][0].v +
+			        temp.v
+			    );
 
 			// y-polarization
 #ifdef __SSE2__
-			temp.v = (__m128)_mm_slli_si128( (__m128i)f4_curr[0][pos[0]][pos[1]][numVectors-1].v, 4 );
+			temp.v = (__m128)_mm_slli_si128(
+			             (__m128i)f4_curr[0][pos[0]][pos[1]][numVectors-1].v, 4
+			         );
 #else
 			temp.f[0] = 0;
 			temp.f[1] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[0];
 			temp.f[2] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[1];
 			temp.f[3] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[2];
 #endif
-			f4_volt[1][pos[0]][pos[1]][0].v *= Op->f4_vv_Compressed[1][index].v;
-			f4_volt[1][pos[0]][pos[1]][0].v += Op->f4_vi_Compressed[1][index].v * ( f4_curr[0][pos[0]][pos[1]][0].v - temp.v - f4_curr[2][pos[0]][pos[1]][0].v + f4_curr[2][pos[0]-shift[0]][pos[1]][0].v);
+			f4_volt[1][pos[0]][pos[1]][0].v *=
+			    Op->f4_vv_Compressed[1][index].v;
+			f4_volt[1][pos[0]][pos[1]][0].v +=
+			    Op->f4_vi_Compressed[1][index].v * (
+			        f4_curr[0][pos[0]         ][pos[1]][0].v -
+			        temp.v -
+			        f4_curr[2][pos[0]         ][pos[1]][0].v +
+			        f4_curr[2][pos[0]-shift[0]][pos[1]][0].v
+			    );
 
 			// z-polarization
-			f4_volt[2][pos[0]][pos[1]][0].v *= Op->f4_vv_Compressed[2][index].v;
-			f4_volt[2][pos[0]][pos[1]][0].v += Op->f4_vi_Compressed[2][index].v * ( f4_curr[1][pos[0]][pos[1]][0].v - f4_curr[1][pos[0]-shift[0]][pos[1]][0].v - f4_curr[0][pos[0]][pos[1]][0].v + f4_curr[0][pos[0]][pos[1]-shift[1]][0].v);
+			f4_volt[2][pos[0]][pos[1]][0].v *=
+			    Op->f4_vv_Compressed[2][index].v;
+			f4_volt[2][pos[0]][pos[1]][0].v +=
+			    Op->f4_vi_Compressed[2][index].v * (
+			        f4_curr[1][pos[0]         ][pos[1]         ][0].v -
+			        f4_curr[1][pos[0]-shift[0]][pos[1]         ][0].v -
+			        f4_curr[0][pos[0]         ][pos[1]         ][0].v +
+			        f4_curr[0][pos[0]         ][pos[1]-shift[1]][0].v
+			    );
 		}
 		++pos[0];
 	}
@@ -116,47 +162,93 @@ void Engine_SSE_Compressed::UpdateCurrents(unsigned int startX, unsigned int num
 			{
 				index = Op->m_Op_index[pos[0]][pos[1]][pos[2]];
 				// x-pol
-				f4_curr[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii_Compressed[0][index].v;
-				f4_curr[0][pos[0]][pos[1]][pos[2]].v += Op->f4_iv_Compressed[0][index].v * ( f4_volt[2][pos[0]][pos[1]][pos[2]].v - f4_volt[2][pos[0]][pos[1]+1][pos[2]].v - f4_volt[1][pos[0]][pos[1]][pos[2]].v + f4_volt[1][pos[0]][pos[1]][pos[2]+1].v);
+				f4_curr[0][pos[0]][pos[1]][pos[2]].v *=
+				    Op->f4_ii_Compressed[0][index].v;
+				f4_curr[0][pos[0]][pos[1]][pos[2]].v +=
+				    Op->f4_iv_Compressed[0][index].v * (
+				        f4_volt[2][pos[0]][pos[1]  ][pos[2]  ].v -
+				        f4_volt[2][pos[0]][pos[1]+1][pos[2]  ].v -
+				        f4_volt[1][pos[0]][pos[1]  ][pos[2]  ].v +
+				        f4_volt[1][pos[0]][pos[1]  ][pos[2]+1].v
+				    );
 
 				// y-pol
-				f4_curr[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii_Compressed[1][index].v;
-				f4_curr[1][pos[0]][pos[1]][pos[2]].v += Op->f4_iv_Compressed[1][index].v * ( f4_volt[0][pos[0]][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]+1].v - f4_volt[2][pos[0]][pos[1]][pos[2]].v + f4_volt[2][pos[0]+1][pos[1]][pos[2]].v);
+				f4_curr[1][pos[0]][pos[1]][pos[2]].v *=
+				    Op->f4_ii_Compressed[1][index].v;
+				f4_curr[1][pos[0]][pos[1]][pos[2]].v +=
+				    Op->f4_iv_Compressed[1][index].v * (
+				        f4_volt[0][pos[0]  ][pos[1]][pos[2]  ].v -
+				        f4_volt[0][pos[0]  ][pos[1]][pos[2]+1].v -
+				        f4_volt[2][pos[0]  ][pos[1]][pos[2]  ].v +
+				        f4_volt[2][pos[0]+1][pos[1]][pos[2]  ].v
+				    );
 
 				// z-pol
-				f4_curr[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii_Compressed[2][index].v;
-				f4_curr[2][pos[0]][pos[1]][pos[2]].v += Op->f4_iv_Compressed[2][index].v * ( f4_volt[1][pos[0]][pos[1]][pos[2]].v - f4_volt[1][pos[0]+1][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]].v + f4_volt[0][pos[0]][pos[1]+1][pos[2]].v);
+				f4_curr[2][pos[0]][pos[1]][pos[2]].v *=
+				    Op->f4_ii_Compressed[2][index].v;
+				f4_curr[2][pos[0]][pos[1]][pos[2]].v +=
+				    Op->f4_iv_Compressed[2][index].v * (
+				        f4_volt[1][pos[0]  ][pos[1]  ][pos[2]].v -
+				        f4_volt[1][pos[0]+1][pos[1]  ][pos[2]].v -
+				        f4_volt[0][pos[0]  ][pos[1]  ][pos[2]].v +
+				        f4_volt[0][pos[0]  ][pos[1]+1][pos[2]].v
+				    );
 			}
 
 			index = Op->m_Op_index[pos[0]][pos[1]][numVectors-1];
 			// for pos[2] = numVectors-1
 			// x-pol
 #ifdef __SSE2__
-			temp.v = (__m128)_mm_srli_si128( (__m128i)f4_volt[1][pos[0]][pos[1]][0].v, 4 );
+			temp.v = (__m128)_mm_srli_si128(
+			             (__m128i)f4_volt[1][pos[0]][pos[1]][0].v, 4
+			         );
 #else
 			temp.f[0] = f4_volt[1][pos[0]][pos[1]][0].f[1];
 			temp.f[1] = f4_volt[1][pos[0]][pos[1]][0].f[2];
 			temp.f[2] = f4_volt[1][pos[0]][pos[1]][0].f[3];
 			temp.f[3] = 0;
 #endif
-			f4_curr[0][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii_Compressed[0][index].v;
-			f4_curr[0][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv_Compressed[0][index].v * ( f4_volt[2][pos[0]][pos[1]][numVectors-1].v - f4_volt[2][pos[0]][pos[1]+1][numVectors-1].v - f4_volt[1][pos[0]][pos[1]][numVectors-1].v + temp.v);
+			f4_curr[0][pos[0]][pos[1]][numVectors-1].v *=
+			    Op->f4_ii_Compressed[0][index].v;
+			f4_curr[0][pos[0]][pos[1]][numVectors-1].v +=
+			    Op->f4_iv_Compressed[0][index].v * (
+			        f4_volt[2][pos[0]][pos[1]  ][numVectors-1].v -
+			        f4_volt[2][pos[0]][pos[1]+1][numVectors-1].v -
+			        f4_volt[1][pos[0]][pos[1]  ][numVectors-1].v +
+			        temp.v
+			    );
 
 			// y-pol
 #ifdef __SSE2__
-			temp.v = (__m128)_mm_srli_si128( (__m128i)f4_volt[0][pos[0]][pos[1]][0].v, 4 );
+			temp.v = (__m128)_mm_srli_si128(
+			             (__m128i)f4_volt[0][pos[0]][pos[1]][0].v, 4
+			         );
 #else
 			temp.f[0] = f4_volt[0][pos[0]][pos[1]][0].f[1];
 			temp.f[1] = f4_volt[0][pos[0]][pos[1]][0].f[2];
 			temp.f[2] = f4_volt[0][pos[0]][pos[1]][0].f[3];
 			temp.f[3] = 0;
 #endif
-			f4_curr[1][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii_Compressed[1][index].v;
-			f4_curr[1][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv_Compressed[1][index].v * ( f4_volt[0][pos[0]][pos[1]][numVectors-1].v - temp.v - f4_volt[2][pos[0]][pos[1]][numVectors-1].v + f4_volt[2][pos[0]+1][pos[1]][numVectors-1].v);
+			f4_curr[1][pos[0]][pos[1]][numVectors-1].v *=
+			    Op->f4_ii_Compressed[1][index].v;
+			f4_curr[1][pos[0]][pos[1]][numVectors-1].v +=
+			    Op->f4_iv_Compressed[1][index].v * (
+			        f4_volt[0][pos[0]  ][pos[1]][numVectors-1].v -
+			        temp.v -
+			        f4_volt[2][pos[0]  ][pos[1]][numVectors-1].v +
+			        f4_volt[2][pos[0]+1][pos[1]][numVectors-1].v
+			    );
 
 			// z-pol
-			f4_curr[2][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii_Compressed[2][index].v;
-			f4_curr[2][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv_Compressed[2][index].v * ( f4_volt[1][pos[0]][pos[1]][numVectors-1].v - f4_volt[1][pos[0]+1][pos[1]][numVectors-1].v - f4_volt[0][pos[0]][pos[1]][numVectors-1].v + f4_volt[0][pos[0]][pos[1]+1][numVectors-1].v);
+			f4_curr[2][pos[0]][pos[1]][numVectors-1].v *=
+			    Op->f4_ii_Compressed[2][index].v;
+			f4_curr[2][pos[0]][pos[1]][numVectors-1].v +=
+			    Op->f4_iv_Compressed[2][index].v * (
+			        f4_volt[1][pos[0]  ][pos[1]  ][numVectors-1].v -
+			        f4_volt[1][pos[0]+1][pos[1]  ][numVectors-1].v -
+			        f4_volt[0][pos[0]  ][pos[1]  ][numVectors-1].v +
+			        f4_volt[0][pos[0]  ][pos[1]+1][numVectors-1].v
+			    );
 		}
 		++pos[0];
 	}