FDTD: reformat code of update equations.

The original update equations in the FDTD engine have extremely long lines and are difficult to read and work with. This patch inserts line breaks, it aligns all array indexes by x/y/z coordinates to make it easy to visually compare. Signed-off-by: Yifeng Li <tomli@tomli.me>
2023-01-18 17:51:33 +00:00 · 2023-01-18 17:51:33 +00:00 · 71dde7ea49
parent 5b8cf2f2ed
commit 71dde7ea49
3 changed files with 282 additions and 64 deletions
--- a/FDTD/engine.cpp
+++ b/FDTD/engine.cpp
@ -123,16 +123,37 @@ void Engine::UpdateVoltages(unsigned int startX, unsigned int numX)
 				shift[2]=pos[2];
 				//do the updates here
 				//for x
-				volt[0][pos[0]][pos[1]][pos[2]] *= Op->vv[0][pos[0]][pos[1]][pos[2]];
+				volt[0][pos[0]][pos[1]][pos[2]] *=
-				volt[0][pos[0]][pos[1]][pos[2]] += Op->vi[0][pos[0]][pos[1]][pos[2]] * ( curr[2][pos[0]][pos[1]][pos[2]] - curr[2][pos[0]][pos[1]-shift[1]][pos[2]] - curr[1][pos[0]][pos[1]][pos[2]] + curr[1][pos[0]][pos[1]][pos[2]-shift[2]]);
+				    Op->vv[0][pos[0]][pos[1]][pos[2]];
 				volt[0][pos[0]][pos[1]][pos[2]] +=
 				    Op->vi[0][pos[0]][pos[1]][pos[2]] * (
 				        curr[2][pos[0]][pos[1]         ][pos[2]         ] -
 				        curr[2][pos[0]][pos[1]-shift[1]][pos[2]         ] -
 				        curr[1][pos[0]][pos[1]         ][pos[2]         ] +
 				        curr[1][pos[0]][pos[1]         ][pos[2]-shift[2]]
 				    );
 				//for y
-				volt[1][pos[0]][pos[1]][pos[2]] *= Op->vv[1][pos[0]][pos[1]][pos[2]];
+				volt[1][pos[0]][pos[1]][pos[2]] *=
-				volt[1][pos[0]][pos[1]][pos[2]] += Op->vi[1][pos[0]][pos[1]][pos[2]] * ( curr[0][pos[0]][pos[1]][pos[2]] - curr[0][pos[0]][pos[1]][pos[2]-shift[2]] - curr[2][pos[0]][pos[1]][pos[2]] + curr[2][pos[0]-shift[0]][pos[1]][pos[2]]);
+				    Op->vv[1][pos[0]][pos[1]][pos[2]];
 				volt[1][pos[0]][pos[1]][pos[2]] +=
 				    Op->vi[1][pos[0]][pos[1]][pos[2]] * (
 				        curr[0][pos[0]         ][pos[1]][pos[2]         ] -
 				        curr[0][pos[0]         ][pos[1]][pos[2]-shift[2]] -
 				        curr[2][pos[0]         ][pos[1]][pos[2]         ] +
 				        curr[2][pos[0]-shift[0]][pos[1]][pos[2]         ]
 				    );
 				//for z
-				volt[2][pos[0]][pos[1]][pos[2]] *= Op->vv[2][pos[0]][pos[1]][pos[2]];
+				volt[2][pos[0]][pos[1]][pos[2]] *=
-				volt[2][pos[0]][pos[1]][pos[2]] += Op->vi[2][pos[0]][pos[1]][pos[2]] * ( curr[1][pos[0]][pos[1]][pos[2]] - curr[1][pos[0]-shift[0]][pos[1]][pos[2]] - curr[0][pos[0]][pos[1]][pos[2]] + curr[0][pos[0]][pos[1]-shift[1]][pos[2]]);
+				    Op->vv[2][pos[0]][pos[1]][pos[2]];
 				volt[2][pos[0]][pos[1]][pos[2]] +=
 				    Op->vi[2][pos[0]][pos[1]][pos[2]] * (
 				        curr[1][pos[0]         ][pos[1]         ][pos[2]] -
 				        curr[1][pos[0]-shift[0]][pos[1]         ][pos[2]] -
 				        curr[0][pos[0]         ][pos[1]         ][pos[2]] +
 				        curr[0][pos[0]         ][pos[1]-shift[1]][pos[2]]
 				    );
 			}
 		}
 		++pos[0];
@ -151,16 +172,37 @@ void Engine::UpdateCurrents(unsigned int startX, unsigned int numX)
 			{
 				//do the updates here
 				//for x
-				curr[0][pos[0]][pos[1]][pos[2]] *= Op->ii[0][pos[0]][pos[1]][pos[2]];
+				curr[0][pos[0]][pos[1]][pos[2]] *=
-				curr[0][pos[0]][pos[1]][pos[2]] += Op->iv[0][pos[0]][pos[1]][pos[2]] * ( volt[2][pos[0]][pos[1]][pos[2]] - volt[2][pos[0]][pos[1]+1][pos[2]] - volt[1][pos[0]][pos[1]][pos[2]] + volt[1][pos[0]][pos[1]][pos[2]+1]);
+				    Op->ii[0][pos[0]][pos[1]][pos[2]];
 				curr[0][pos[0]][pos[1]][pos[2]] +=
 				    Op->iv[0][pos[0]][pos[1]][pos[2]] * (
 				        volt[2][pos[0]][pos[1]  ][pos[2]  ] -
 				        volt[2][pos[0]][pos[1]+1][pos[2]  ] -
 				        volt[1][pos[0]][pos[1]  ][pos[2]  ] +
 				        volt[1][pos[0]][pos[1]  ][pos[2]+1]
 				    );
 				//for y
-				curr[1][pos[0]][pos[1]][pos[2]] *= Op->ii[1][pos[0]][pos[1]][pos[2]];
+				curr[1][pos[0]][pos[1]][pos[2]] *=
-				curr[1][pos[0]][pos[1]][pos[2]] += Op->iv[1][pos[0]][pos[1]][pos[2]] * ( volt[0][pos[0]][pos[1]][pos[2]] - volt[0][pos[0]][pos[1]][pos[2]+1] - volt[2][pos[0]][pos[1]][pos[2]] + volt[2][pos[0]+1][pos[1]][pos[2]]);
+				    Op->ii[1][pos[0]][pos[1]][pos[2]];
 				curr[1][pos[0]][pos[1]][pos[2]] +=
 				    Op->iv[1][pos[0]][pos[1]][pos[2]] * (
 				        volt[0][pos[0]  ][pos[1]][pos[2]  ] -
 				        volt[0][pos[0]  ][pos[1]][pos[2]+1] -
 				        volt[2][pos[0]  ][pos[1]][pos[2]  ] +
 				        volt[2][pos[0]+1][pos[1]][pos[2]  ]
 				    );
 				//for z
-				curr[2][pos[0]][pos[1]][pos[2]] *= Op->ii[2][pos[0]][pos[1]][pos[2]];
+				curr[2][pos[0]][pos[1]][pos[2]] *=
-				curr[2][pos[0]][pos[1]][pos[2]] += Op->iv[2][pos[0]][pos[1]][pos[2]] * ( volt[1][pos[0]][pos[1]][pos[2]] - volt[1][pos[0]+1][pos[1]][pos[2]] - volt[0][pos[0]][pos[1]][pos[2]] + volt[0][pos[0]][pos[1]+1][pos[2]]);
+				    Op->ii[2][pos[0]][pos[1]][pos[2]];
 				curr[2][pos[0]][pos[1]][pos[2]] +=
 				    Op->iv[2][pos[0]][pos[1]][pos[2]] * (
 				        volt[1][pos[0]  ][pos[1]  ][pos[2]] -
 				        volt[1][pos[0]+1][pos[1]  ][pos[2]] -
 				        volt[0][pos[0]  ][pos[1]  ][pos[2]] +
 				        volt[0][pos[0]  ][pos[1]+1][pos[2]]
 				    );
 			}
 		}
 		++pos[0];
--- a/FDTD/engine_sse.cpp
+++ b/FDTD/engine_sse.cpp
@ -91,16 +91,37 @@ void Engine_sse::UpdateVoltages(unsigned int startX, unsigned int numX)
 			for (pos[2]=1; pos[2]<numVectors; ++pos[2])
 			{
 				// x-polarization
-				f4_volt[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv[0][pos[0]][pos[1]][pos[2]].v;
+				f4_volt[0][pos[0]][pos[1]][pos[2]].v *=
-				f4_volt[0][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[0][pos[0]][pos[1]][pos[2]].v * ( f4_curr[2][pos[0]][pos[1]][pos[2]].v - f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2]].v - f4_curr[1][pos[0]][pos[1]][pos[2]].v + f4_curr[1][pos[0]][pos[1]][pos[2]-1].v );
+				    Op->f4_vv[0][pos[0]][pos[1]][pos[2]].v;
 				f4_volt[0][pos[0]][pos[1]][pos[2]].v +=
 				    Op->f4_vi[0][pos[0]][pos[1]][pos[2]].v * (
 				        f4_curr[2][pos[0]][pos[1]         ][pos[2]].v -
 				        f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2]].v -
 				        f4_curr[1][pos[0]][pos[1]         ][pos[2]].v +
 				        f4_curr[1][pos[0]][pos[1]         ][pos[2]-1].v
 				    );
 				// y-polarization
-				f4_volt[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv[1][pos[0]][pos[1]][pos[2]].v;
+				f4_volt[1][pos[0]][pos[1]][pos[2]].v *=
-				f4_volt[1][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[1][pos[0]][pos[1]][pos[2]].v * ( f4_curr[0][pos[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]-1].v - f4_curr[2][pos[0]][pos[1]][pos[2]].v + f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2]].v);
+				    Op->f4_vv[1][pos[0]][pos[1]][pos[2]].v;
 				f4_volt[1][pos[0]][pos[1]][pos[2]].v +=
 				    Op->f4_vi[1][pos[0]][pos[1]][pos[2]].v * (
 				        f4_curr[0][pos[0]         ][pos[1]][pos[2]  ].v -
 				        f4_curr[0][pos[0]         ][pos[1]][pos[2]-1].v -
 				        f4_curr[2][pos[0]         ][pos[1]][pos[2]  ].v +
 				        f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2]  ].v
 				    );
 				// z-polarization
-				f4_volt[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv[2][pos[0]][pos[1]][pos[2]].v;
+				f4_volt[2][pos[0]][pos[1]][pos[2]].v *=
-				f4_volt[2][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[2][pos[0]][pos[1]][pos[2]].v * ( f4_curr[1][pos[0]][pos[1]][pos[2]].v - f4_curr[1][pos[0]-shift[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]].v + f4_curr[0][pos[0]][pos[1]-shift[1]][pos[2]].v);
+				    Op->f4_vv[2][pos[0]][pos[1]][pos[2]].v;
 				f4_volt[2][pos[0]][pos[1]][pos[2]].v +=
 				    Op->f4_vi[2][pos[0]][pos[1]][pos[2]].v * (
 				        f4_curr[1][pos[0]         ][pos[1]         ][pos[2]].v -
 				        f4_curr[1][pos[0]-shift[0]][pos[1]         ][pos[2]].v -
 				        f4_curr[0][pos[0]         ][pos[1]         ][pos[2]].v +
 				        f4_curr[0][pos[0]         ][pos[1]-shift[1]][pos[2]].v
 				    );
 			}
 			// for pos[2] = 0
@ -109,20 +130,41 @@ void Engine_sse::UpdateVoltages(unsigned int startX, unsigned int numX)
 			temp.f[1] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[0];
 			temp.f[2] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[1];
 			temp.f[3] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[2];
-			f4_volt[0][pos[0]][pos[1]][0].v *= Op->f4_vv[0][pos[0]][pos[1]][0].v;
+			f4_volt[0][pos[0]][pos[1]][0].v *=
-			f4_volt[0][pos[0]][pos[1]][0].v += Op->f4_vi[0][pos[0]][pos[1]][0].v * ( f4_curr[2][pos[0]][pos[1]][0].v - f4_curr[2][pos[0]][pos[1]-shift[1]][0].v - f4_curr[1][pos[0]][pos[1]][0].v + temp.v );
+			    Op->f4_vv[0][pos[0]][pos[1]][0].v;
 			f4_volt[0][pos[0]][pos[1]][0].v +=
 			    Op->f4_vi[0][pos[0]][pos[1]][0].v * (
 			        f4_curr[2][pos[0]][pos[1]         ][0].v -
 			        f4_curr[2][pos[0]][pos[1]-shift[1]][0].v -
 			        f4_curr[1][pos[0]][pos[1]         ][0].v +
 			        temp.v
 			    );
 			// y-polarization
 			temp.f[0] = 0;
 			temp.f[1] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[0];
 			temp.f[2] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[1];
 			temp.f[3] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[2];
-			f4_volt[1][pos[0]][pos[1]][0].v *= Op->f4_vv[1][pos[0]][pos[1]][0].v;
+			f4_volt[1][pos[0]][pos[1]][0].v *=
-			f4_volt[1][pos[0]][pos[1]][0].v += Op->f4_vi[1][pos[0]][pos[1]][0].v * ( f4_curr[0][pos[0]][pos[1]][0].v - temp.v - f4_curr[2][pos[0]][pos[1]][0].v + f4_curr[2][pos[0]-shift[0]][pos[1]][0].v);
+			    Op->f4_vv[1][pos[0]][pos[1]][0].v;
 			f4_volt[1][pos[0]][pos[1]][0].v +=
 			    Op->f4_vi[1][pos[0]][pos[1]][0].v * (
 			        f4_curr[0][pos[0]         ][pos[1]][0].v -
 			        temp.v -
 			        f4_curr[2][pos[0]         ][pos[1]][0].v +
 			        f4_curr[2][pos[0]-shift[0]][pos[1]][0].v
 			    );
 			// z-polarization
-			f4_volt[2][pos[0]][pos[1]][0].v *= Op->f4_vv[2][pos[0]][pos[1]][0].v;
+			f4_volt[2][pos[0]][pos[1]][0].v *=
-			f4_volt[2][pos[0]][pos[1]][0].v += Op->f4_vi[2][pos[0]][pos[1]][0].v * ( f4_curr[1][pos[0]][pos[1]][0].v - f4_curr[1][pos[0]-shift[0]][pos[1]][0].v - f4_curr[0][pos[0]][pos[1]][0].v + f4_curr[0][pos[0]][pos[1]-shift[1]][0].v);
+			    Op->f4_vv[2][pos[0]][pos[1]][0].v;
 			f4_volt[2][pos[0]][pos[1]][0].v +=
 			    Op->f4_vi[2][pos[0]][pos[1]][0].v * (
 			        f4_curr[1][pos[0]         ][pos[1]         ][0].v -
 			        f4_curr[1][pos[0]-shift[0]][pos[1]         ][0].v -
 			        f4_curr[0][pos[0]         ][pos[1]         ][0].v +
 			        f4_curr[0][pos[0]         ][pos[1]-shift[1]][0].v
 			    );
 		}
 		++pos[0];
 	}
@ -141,16 +183,37 @@ void Engine_sse::UpdateCurrents(unsigned int startX, unsigned int numX)
 			for (pos[2]=0; pos[2]<numVectors-1; ++pos[2])
 			{
 				// x-pol
-				f4_curr[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii[0][pos[0]][pos[1]][pos[2]].v;
+				f4_curr[0][pos[0]][pos[1]][pos[2]].v *=
-				f4_curr[0][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[0][pos[0]][pos[1]][pos[2]].v * ( f4_volt[2][pos[0]][pos[1]][pos[2]].v - f4_volt[2][pos[0]][pos[1]+1][pos[2]].v - f4_volt[1][pos[0]][pos[1]][pos[2]].v + f4_volt[1][pos[0]][pos[1]][pos[2]+1].v);
+				    Op->f4_ii[0][pos[0]][pos[1]][pos[2]].v;
 				f4_curr[0][pos[0]][pos[1]][pos[2]].v +=
 				    Op->f4_iv[0][pos[0]][pos[1]][pos[2]].v * (
 				        f4_volt[2][pos[0]][pos[1]  ][pos[2]  ].v -
 				        f4_volt[2][pos[0]][pos[1]+1][pos[2]  ].v -
 				        f4_volt[1][pos[0]][pos[1]  ][pos[2]  ].v +
 				        f4_volt[1][pos[0]][pos[1]  ][pos[2]+1].v
 				    );
 				// y-pol
-				f4_curr[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii[1][pos[0]][pos[1]][pos[2]].v;
+				f4_curr[1][pos[0]][pos[1]][pos[2]].v *=
-				f4_curr[1][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[1][pos[0]][pos[1]][pos[2]].v * ( f4_volt[0][pos[0]][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]+1].v - f4_volt[2][pos[0]][pos[1]][pos[2]].v + f4_volt[2][pos[0]+1][pos[1]][pos[2]].v);
+				    Op->f4_ii[1][pos[0]][pos[1]][pos[2]].v;
 				f4_curr[1][pos[0]][pos[1]][pos[2]].v +=
 				    Op->f4_iv[1][pos[0]][pos[1]][pos[2]].v * (
 				        f4_volt[0][pos[0]  ][pos[1]][pos[2]  ].v -
 				        f4_volt[0][pos[0]  ][pos[1]][pos[2]+1].v -
 				        f4_volt[2][pos[0]  ][pos[1]][pos[2]  ].v +
 				        f4_volt[2][pos[0]+1][pos[1]][pos[2]  ].v
 				    );
 				// z-pol
-				f4_curr[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii[2][pos[0]][pos[1]][pos[2]].v;
+				f4_curr[2][pos[0]][pos[1]][pos[2]].v *=
-				f4_curr[2][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[2][pos[0]][pos[1]][pos[2]].v * ( f4_volt[1][pos[0]][pos[1]][pos[2]].v - f4_volt[1][pos[0]+1][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]].v + f4_volt[0][pos[0]][pos[1]+1][pos[2]].v);
+				    Op->f4_ii[2][pos[0]][pos[1]][pos[2]].v;
 				f4_curr[2][pos[0]][pos[1]][pos[2]].v +=
 				    Op->f4_iv[2][pos[0]][pos[1]][pos[2]].v * (
 				        f4_volt[1][pos[0]  ][pos[1]  ][pos[2]].v -
 				        f4_volt[1][pos[0]+1][pos[1]  ][pos[2]].v -
 				        f4_volt[0][pos[0]  ][pos[1]  ][pos[2]].v +
 				        f4_volt[0][pos[0]  ][pos[1]+1][pos[2]].v
 				    );
 			}
 			// for pos[2] = numVectors-1
@ -159,20 +222,41 @@ void Engine_sse::UpdateCurrents(unsigned int startX, unsigned int numX)
 			temp.f[1] = f4_volt[1][pos[0]][pos[1]][0].f[2];
 			temp.f[2] = f4_volt[1][pos[0]][pos[1]][0].f[3];
 			temp.f[3] = 0;
-			f4_curr[0][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii[0][pos[0]][pos[1]][numVectors-1].v;
+			f4_curr[0][pos[0]][pos[1]][numVectors-1].v *=
-			f4_curr[0][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv[0][pos[0]][pos[1]][numVectors-1].v * ( f4_volt[2][pos[0]][pos[1]][numVectors-1].v - f4_volt[2][pos[0]][pos[1]+1][numVectors-1].v - f4_volt[1][pos[0]][pos[1]][numVectors-1].v + temp.v);
+			    Op->f4_ii[0][pos[0]][pos[1]][numVectors-1].v;
 			f4_curr[0][pos[0]][pos[1]][numVectors-1].v +=
 			    Op->f4_iv[0][pos[0]][pos[1]][numVectors-1].v * (
 			        f4_volt[2][pos[0]][pos[1]  ][numVectors-1].v -
 			        f4_volt[2][pos[0]][pos[1]+1][numVectors-1].v -
 			        f4_volt[1][pos[0]][pos[1]  ][numVectors-1].v +
 			        temp.v
 			    );
 			// y-pol
 			temp.f[0] = f4_volt[0][pos[0]][pos[1]][0].f[1];
 			temp.f[1] = f4_volt[0][pos[0]][pos[1]][0].f[2];
 			temp.f[2] = f4_volt[0][pos[0]][pos[1]][0].f[3];
 			temp.f[3] = 0;
-			f4_curr[1][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii[1][pos[0]][pos[1]][numVectors-1].v;
+			f4_curr[1][pos[0]][pos[1]][numVectors-1].v *=
-			f4_curr[1][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv[1][pos[0]][pos[1]][numVectors-1].v * ( f4_volt[0][pos[0]][pos[1]][numVectors-1].v - temp.v - f4_volt[2][pos[0]][pos[1]][numVectors-1].v + f4_volt[2][pos[0]+1][pos[1]][numVectors-1].v);
+			    Op->f4_ii[1][pos[0]][pos[1]][numVectors-1].v;
 			f4_curr[1][pos[0]][pos[1]][numVectors-1].v +=
 			    Op->f4_iv[1][pos[0]][pos[1]][numVectors-1].v * (
 			        f4_volt[0][pos[0]  ][pos[1]][numVectors-1].v -
 			        temp.v -
 			        f4_volt[2][pos[0]  ][pos[1]][numVectors-1].v +
 			        f4_volt[2][pos[0]+1][pos[1]][numVectors-1].v
 			    );
 			// z-pol
-			f4_curr[2][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii[2][pos[0]][pos[1]][numVectors-1].v;
+			f4_curr[2][pos[0]][pos[1]][numVectors-1].v *=
-			f4_curr[2][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv[2][pos[0]][pos[1]][numVectors-1].v * ( f4_volt[1][pos[0]][pos[1]][numVectors-1].v - f4_volt[1][pos[0]+1][pos[1]][numVectors-1].v - f4_volt[0][pos[0]][pos[1]][numVectors-1].v + f4_volt[0][pos[0]][pos[1]+1][numVectors-1].v);
+			    Op->f4_ii[2][pos[0]][pos[1]][numVectors-1].v;
 			f4_curr[2][pos[0]][pos[1]][numVectors-1].v +=
 			    Op->f4_iv[2][pos[0]][pos[1]][numVectors-1].v * (
 			        f4_volt[1][pos[0]  ][pos[1]  ][numVectors-1].v -
 			        f4_volt[1][pos[0]+1][pos[1]  ][numVectors-1].v -
 			        f4_volt[0][pos[0]  ][pos[1]  ][numVectors-1].v +
 			        f4_volt[0][pos[0]  ][pos[1]+1][numVectors-1].v
 			    );
 		}
 		++pos[0];
 	}
--- a/FDTD/engine_sse_compressed.cpp
+++ b/FDTD/engine_sse_compressed.cpp
@ -55,47 +55,93 @@ void Engine_SSE_Compressed::UpdateVoltages(unsigned int startX, unsigned int num
 			{
 				index = Op->m_Op_index[pos[0]][pos[1]][pos[2]];
 				// x-polarization
-				f4_volt[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv_Compressed[0][index].v;
+				f4_volt[0][pos[0]][pos[1]][pos[2]].v *=
-				f4_volt[0][pos[0]][pos[1]][pos[2]].v += Op->f4_vi_Compressed[0][index].v * ( f4_curr[2][pos[0]][pos[1]][pos[2]].v - f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2]].v - f4_curr[1][pos[0]][pos[1]][pos[2]].v + f4_curr[1][pos[0]][pos[1]][pos[2]-1].v );
+				    Op->f4_vv_Compressed[0][index].v;
 				f4_volt[0][pos[0]][pos[1]][pos[2]].v +=
 				    Op->f4_vi_Compressed[0][index].v * (
 				        f4_curr[2][pos[0]][pos[1]         ][pos[2]  ].v -
 				        f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2]  ].v -
 				        f4_curr[1][pos[0]][pos[1]         ][pos[2]  ].v +
 				        f4_curr[1][pos[0]][pos[1]         ][pos[2]-1].v
 				    );
 				// y-polarization
-				f4_volt[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv_Compressed[1][index].v;
+				f4_volt[1][pos[0]][pos[1]][pos[2]].v *=
-				f4_volt[1][pos[0]][pos[1]][pos[2]].v += Op->f4_vi_Compressed[1][index].v * ( f4_curr[0][pos[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]-1].v - f4_curr[2][pos[0]][pos[1]][pos[2]].v + f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2]].v);
+				    Op->f4_vv_Compressed[1][index].v;
 				f4_volt[1][pos[0]][pos[1]][pos[2]].v +=
 				    Op->f4_vi_Compressed[1][index].v * (
 				        f4_curr[0][pos[0]         ][pos[1]][pos[2]  ].v -
 				        f4_curr[0][pos[0]         ][pos[1]][pos[2]-1].v -
 				        f4_curr[2][pos[0]         ][pos[1]][pos[2]  ].v +
 				        f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2]  ].v
 				    );
 				// z-polarization
-				f4_volt[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv_Compressed[2][index].v;
+				f4_volt[2][pos[0]][pos[1]][pos[2]].v *=
-				f4_volt[2][pos[0]][pos[1]][pos[2]].v += Op->f4_vi_Compressed[2][index].v * ( f4_curr[1][pos[0]][pos[1]][pos[2]].v - f4_curr[1][pos[0]-shift[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]].v + f4_curr[0][pos[0]][pos[1]-shift[1]][pos[2]].v);
+				    Op->f4_vv_Compressed[2][index].v;
 				f4_volt[2][pos[0]][pos[1]][pos[2]].v +=
 				    Op->f4_vi_Compressed[2][index].v * (
 				        f4_curr[1][pos[0]         ][pos[1]]         [pos[2]].v -
 				        f4_curr[1][pos[0]-shift[0]][pos[1]]         [pos[2]].v -
 				        f4_curr[0][pos[0]         ][pos[1]]         [pos[2]].v +
 				        f4_curr[0][pos[0]         ][pos[1]-shift[1]][pos[2]].v
 				    );
 			}
 			// for pos[2] = 0
 			// x-polarization
 			index = Op->m_Op_index[pos[0]][pos[1]][0];
 #ifdef __SSE2__
-			temp.v = (__m128)_mm_slli_si128( (__m128i)f4_curr[1][pos[0]][pos[1]][numVectors-1].v, 4 );
+			temp.v = (__m128)_mm_slli_si128(
 			             (__m128i)f4_curr[1][pos[0]][pos[1]][numVectors-1].v, 4
 			         );
 #else
 			temp.f[0] = 0;
 			temp.f[1] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[0];
 			temp.f[2] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[1];
 			temp.f[3] = f4_curr[1][pos[0]][pos[1]][numVectors-1].f[2];
 #endif
-			f4_volt[0][pos[0]][pos[1]][0].v *= Op->f4_vv_Compressed[0][index].v;
+			f4_volt[0][pos[0]][pos[1]][0].v *=
-			f4_volt[0][pos[0]][pos[1]][0].v += Op->f4_vi_Compressed[0][index].v * ( f4_curr[2][pos[0]][pos[1]][0].v - f4_curr[2][pos[0]][pos[1]-shift[1]][0].v - f4_curr[1][pos[0]][pos[1]][0].v + temp.v );
+			    Op->f4_vv_Compressed[0][index].v;
 			f4_volt[0][pos[0]][pos[1]][0].v +=
 			    Op->f4_vi_Compressed[0][index].v * (
 			        f4_curr[2][pos[0]][pos[1]         ][0].v -
 			        f4_curr[2][pos[0]][pos[1]-shift[1]][0].v -
 			        f4_curr[1][pos[0]][pos[1]         ][0].v +
 			        temp.v
 			    );
 			// y-polarization
 #ifdef __SSE2__
-			temp.v = (__m128)_mm_slli_si128( (__m128i)f4_curr[0][pos[0]][pos[1]][numVectors-1].v, 4 );
+			temp.v = (__m128)_mm_slli_si128(
 			             (__m128i)f4_curr[0][pos[0]][pos[1]][numVectors-1].v, 4
 			         );
 #else
 			temp.f[0] = 0;
 			temp.f[1] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[0];
 			temp.f[2] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[1];
 			temp.f[3] = f4_curr[0][pos[0]][pos[1]][numVectors-1].f[2];
 #endif
-			f4_volt[1][pos[0]][pos[1]][0].v *= Op->f4_vv_Compressed[1][index].v;
+			f4_volt[1][pos[0]][pos[1]][0].v *=
-			f4_volt[1][pos[0]][pos[1]][0].v += Op->f4_vi_Compressed[1][index].v * ( f4_curr[0][pos[0]][pos[1]][0].v - temp.v - f4_curr[2][pos[0]][pos[1]][0].v + f4_curr[2][pos[0]-shift[0]][pos[1]][0].v);
+			    Op->f4_vv_Compressed[1][index].v;
 			f4_volt[1][pos[0]][pos[1]][0].v +=
 			    Op->f4_vi_Compressed[1][index].v * (
 			        f4_curr[0][pos[0]         ][pos[1]][0].v -
 			        temp.v -
 			        f4_curr[2][pos[0]         ][pos[1]][0].v +
 			        f4_curr[2][pos[0]-shift[0]][pos[1]][0].v
 			    );
 			// z-polarization
-			f4_volt[2][pos[0]][pos[1]][0].v *= Op->f4_vv_Compressed[2][index].v;
+			f4_volt[2][pos[0]][pos[1]][0].v *=
-			f4_volt[2][pos[0]][pos[1]][0].v += Op->f4_vi_Compressed[2][index].v * ( f4_curr[1][pos[0]][pos[1]][0].v - f4_curr[1][pos[0]-shift[0]][pos[1]][0].v - f4_curr[0][pos[0]][pos[1]][0].v + f4_curr[0][pos[0]][pos[1]-shift[1]][0].v);
+			    Op->f4_vv_Compressed[2][index].v;
 			f4_volt[2][pos[0]][pos[1]][0].v +=
 			    Op->f4_vi_Compressed[2][index].v * (
 			        f4_curr[1][pos[0]         ][pos[1]         ][0].v -
 			        f4_curr[1][pos[0]-shift[0]][pos[1]         ][0].v -
 			        f4_curr[0][pos[0]         ][pos[1]         ][0].v +
 			        f4_curr[0][pos[0]         ][pos[1]-shift[1]][0].v
 			    );
 		}
 		++pos[0];
 	}
@ -116,47 +162,93 @@ void Engine_SSE_Compressed::UpdateCurrents(unsigned int startX, unsigned int num
 			{
 				index = Op->m_Op_index[pos[0]][pos[1]][pos[2]];
 				// x-pol
-				f4_curr[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii_Compressed[0][index].v;
+				f4_curr[0][pos[0]][pos[1]][pos[2]].v *=
-				f4_curr[0][pos[0]][pos[1]][pos[2]].v += Op->f4_iv_Compressed[0][index].v * ( f4_volt[2][pos[0]][pos[1]][pos[2]].v - f4_volt[2][pos[0]][pos[1]+1][pos[2]].v - f4_volt[1][pos[0]][pos[1]][pos[2]].v + f4_volt[1][pos[0]][pos[1]][pos[2]+1].v);
+				    Op->f4_ii_Compressed[0][index].v;
 				f4_curr[0][pos[0]][pos[1]][pos[2]].v +=
 				    Op->f4_iv_Compressed[0][index].v * (
 				        f4_volt[2][pos[0]][pos[1]  ][pos[2]  ].v -
 				        f4_volt[2][pos[0]][pos[1]+1][pos[2]  ].v -
 				        f4_volt[1][pos[0]][pos[1]  ][pos[2]  ].v +
 				        f4_volt[1][pos[0]][pos[1]  ][pos[2]+1].v
 				    );
 				// y-pol
-				f4_curr[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii_Compressed[1][index].v;
+				f4_curr[1][pos[0]][pos[1]][pos[2]].v *=
-				f4_curr[1][pos[0]][pos[1]][pos[2]].v += Op->f4_iv_Compressed[1][index].v * ( f4_volt[0][pos[0]][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]+1].v - f4_volt[2][pos[0]][pos[1]][pos[2]].v + f4_volt[2][pos[0]+1][pos[1]][pos[2]].v);
+				    Op->f4_ii_Compressed[1][index].v;
 				f4_curr[1][pos[0]][pos[1]][pos[2]].v +=
 				    Op->f4_iv_Compressed[1][index].v * (
 				        f4_volt[0][pos[0]  ][pos[1]][pos[2]  ].v -
 				        f4_volt[0][pos[0]  ][pos[1]][pos[2]+1].v -
 				        f4_volt[2][pos[0]  ][pos[1]][pos[2]  ].v +
 				        f4_volt[2][pos[0]+1][pos[1]][pos[2]  ].v
 				    );
 				// z-pol
-				f4_curr[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii_Compressed[2][index].v;
+				f4_curr[2][pos[0]][pos[1]][pos[2]].v *=
-				f4_curr[2][pos[0]][pos[1]][pos[2]].v += Op->f4_iv_Compressed[2][index].v * ( f4_volt[1][pos[0]][pos[1]][pos[2]].v - f4_volt[1][pos[0]+1][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]].v + f4_volt[0][pos[0]][pos[1]+1][pos[2]].v);
+				    Op->f4_ii_Compressed[2][index].v;
 				f4_curr[2][pos[0]][pos[1]][pos[2]].v +=
 				    Op->f4_iv_Compressed[2][index].v * (
 				        f4_volt[1][pos[0]  ][pos[1]  ][pos[2]].v -
 				        f4_volt[1][pos[0]+1][pos[1]  ][pos[2]].v -
 				        f4_volt[0][pos[0]  ][pos[1]  ][pos[2]].v +
 				        f4_volt[0][pos[0]  ][pos[1]+1][pos[2]].v
 				    );
 			}
 			index = Op->m_Op_index[pos[0]][pos[1]][numVectors-1];
 			// for pos[2] = numVectors-1
 			// x-pol
 #ifdef __SSE2__
-			temp.v = (__m128)_mm_srli_si128( (__m128i)f4_volt[1][pos[0]][pos[1]][0].v, 4 );
+			temp.v = (__m128)_mm_srli_si128(
 			             (__m128i)f4_volt[1][pos[0]][pos[1]][0].v, 4
 			         );
 #else
 			temp.f[0] = f4_volt[1][pos[0]][pos[1]][0].f[1];
 			temp.f[1] = f4_volt[1][pos[0]][pos[1]][0].f[2];
 			temp.f[2] = f4_volt[1][pos[0]][pos[1]][0].f[3];
 			temp.f[3] = 0;
 #endif
-			f4_curr[0][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii_Compressed[0][index].v;
+			f4_curr[0][pos[0]][pos[1]][numVectors-1].v *=
-			f4_curr[0][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv_Compressed[0][index].v * ( f4_volt[2][pos[0]][pos[1]][numVectors-1].v - f4_volt[2][pos[0]][pos[1]+1][numVectors-1].v - f4_volt[1][pos[0]][pos[1]][numVectors-1].v + temp.v);
+			    Op->f4_ii_Compressed[0][index].v;
 			f4_curr[0][pos[0]][pos[1]][numVectors-1].v +=
 			    Op->f4_iv_Compressed[0][index].v * (
 			        f4_volt[2][pos[0]][pos[1]  ][numVectors-1].v -
 			        f4_volt[2][pos[0]][pos[1]+1][numVectors-1].v -
 			        f4_volt[1][pos[0]][pos[1]  ][numVectors-1].v +
 			        temp.v
 			    );
 			// y-pol
 #ifdef __SSE2__
-			temp.v = (__m128)_mm_srli_si128( (__m128i)f4_volt[0][pos[0]][pos[1]][0].v, 4 );
+			temp.v = (__m128)_mm_srli_si128(
 			             (__m128i)f4_volt[0][pos[0]][pos[1]][0].v, 4
 			         );
 #else
 			temp.f[0] = f4_volt[0][pos[0]][pos[1]][0].f[1];
 			temp.f[1] = f4_volt[0][pos[0]][pos[1]][0].f[2];
 			temp.f[2] = f4_volt[0][pos[0]][pos[1]][0].f[3];
 			temp.f[3] = 0;
 #endif
-			f4_curr[1][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii_Compressed[1][index].v;
+			f4_curr[1][pos[0]][pos[1]][numVectors-1].v *=
-			f4_curr[1][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv_Compressed[1][index].v * ( f4_volt[0][pos[0]][pos[1]][numVectors-1].v - temp.v - f4_volt[2][pos[0]][pos[1]][numVectors-1].v + f4_volt[2][pos[0]+1][pos[1]][numVectors-1].v);
+			    Op->f4_ii_Compressed[1][index].v;
 			f4_curr[1][pos[0]][pos[1]][numVectors-1].v +=
 			    Op->f4_iv_Compressed[1][index].v * (
 			        f4_volt[0][pos[0]  ][pos[1]][numVectors-1].v -
 			        temp.v -
 			        f4_volt[2][pos[0]  ][pos[1]][numVectors-1].v +
 			        f4_volt[2][pos[0]+1][pos[1]][numVectors-1].v
 			    );
 			// z-pol
-			f4_curr[2][pos[0]][pos[1]][numVectors-1].v *= Op->f4_ii_Compressed[2][index].v;
+			f4_curr[2][pos[0]][pos[1]][numVectors-1].v *=
-			f4_curr[2][pos[0]][pos[1]][numVectors-1].v += Op->f4_iv_Compressed[2][index].v * ( f4_volt[1][pos[0]][pos[1]][numVectors-1].v - f4_volt[1][pos[0]+1][pos[1]][numVectors-1].v - f4_volt[0][pos[0]][pos[1]][numVectors-1].v + f4_volt[0][pos[0]][pos[1]+1][numVectors-1].v);
+			    Op->f4_ii_Compressed[2][index].v;
 			f4_curr[2][pos[0]][pos[1]][numVectors-1].v +=
 			    Op->f4_iv_Compressed[2][index].v * (
 			        f4_volt[1][pos[0]  ][pos[1]  ][numVectors-1].v -
 			        f4_volt[1][pos[0]+1][pos[1]  ][numVectors-1].v -
 			        f4_volt[0][pos[0]  ][pos[1]  ][numVectors-1].v +
 			        f4_volt[0][pos[0]  ][pos[1]+1][numVectors-1].v
 			    );
 		}
 		++pos[0];
 	}