diff --git a/ice40/arch.cc b/ice40/arch.cc index d44d8c19..3b9a6992 100644 --- a/ice40/arch.cc +++ b/ice40/arch.cc @@ -651,6 +651,7 @@ delay_t Arch::predictDelay(const NetInfo *net_info, const PortRef &sink) const return 250; } +#if 1 int xd = sink_loc.x - driver_loc.x, yd = sink_loc.y - driver_loc.y; int xscale = 120, yscale = 120, offset = 0; @@ -665,6 +666,35 @@ delay_t Arch::predictDelay(const NetInfo *net_info, const PortRef &sink) const offset += 260; return xscale * abs(xd) + yscale * abs(yd) + offset; +#else + float model1_param_offset = 902.1066988; + float model1_param_norm1 = 169.80428447; + float model1_param_norm2 = -503.28635487; + float model1_param_norm3 = 402.96583807; + + float model2_param_offset = -1.09578873e+03; + float model2_param_linear = 5.01094876e-01; + float model2_param_sqrt = 4.71761281e+01; + + float dx = fabsf(sink_loc.x - driver_loc.x); + float dy = fabsf(sink_loc.y - driver_loc.y); + float norm1 = dx + dy; + + float dx2 = dx * dx; + float dy2 = dy * dy; + float norm2 = sqrtf(dx2 + dy2); + + float dx3 = dx2 * dx; + float dy3 = dy2 * dy; + float norm3 = powf(dx3 + dy3, 1.0/3.0); + + float v = model1_param_offset; + v += model1_param_norm1 * norm1; + v += model1_param_norm2 * norm2; + v += model1_param_norm3 * norm3; + + return model2_param_offset + model2_param_linear * v + model2_param_sqrt * sqrtf(v); +#endif } delay_t Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay_t budget) const diff --git a/ice40/delay.cc b/ice40/delay.cc index 8bf8211c..d63af5d1 100644 --- a/ice40/delay.cc +++ b/ice40/delay.cc @@ -23,6 +23,8 @@ NEXTPNR_NAMESPACE_BEGIN +#define NUM_FUZZ_ROUTES 100000 + void ice40DelayFuzzerMain(Context *ctx) { std::vector srcWires, dstWires; @@ -53,17 +55,25 @@ void ice40DelayFuzzerMain(Context *ctx) int index = 0; int cnt = 0; - while (cnt < 1000) + while (cnt < NUM_FUZZ_ROUTES) { - NPNR_ASSERT(index < int(srcWires.size())); - NPNR_ASSERT(index < int(dstWires.size())); + if (index >= int(srcWires.size()) || index >= int(dstWires.size())) { + index = 0; + ctx->shuffle(srcWires); + ctx->shuffle(dstWires); + } WireId src = srcWires[index]; WireId dst = dstWires[index++]; std::unordered_map route; +#if NUM_FUZZ_ROUTES <= 1000 if (!ctx->getActualRouteDelay(src, dst, nullptr, &route, false)) continue; +#else + if (!ctx->getActualRouteDelay(src, dst, nullptr, &route, true)) + continue; +#endif WireId cursor = dst; delay_t delay = 0; @@ -85,6 +95,9 @@ void ice40DelayFuzzerMain(Context *ctx) } cnt++; + + if (cnt % 100 == 0) + fprintf(stderr, "Fuzzed %d arcs.\n", cnt); } } diff --git a/ice40/tmfuzz.py b/ice40/tmfuzz.py index 0f725932..caf3bc80 100644 --- a/ice40/tmfuzz.py +++ b/ice40/tmfuzz.py @@ -10,6 +10,8 @@ device = "hx8k" sel_src_type = "LUTFF_OUT" sel_dst_type = "LUTFF_IN_LUT" +#%% Read fuzz data + src_dst_pairs = defaultdict(lambda: 0) delay_data = list() @@ -47,23 +49,153 @@ with open("tmfuzz_%s.txt" % device, "r") as f: delay_data = np.array(delay_data) -#%% +#%% Apply simple low-weight bluring to fill gaps + +for i in range(1): + neigh_sum = np.zeros((41, 41)) + neigh_sum2 = np.zeros((41, 41)) + neigh_count = np.zeros((41, 41)) + + for x in range(41): + for y in range(41): + for p in range(-1, 2): + for q in range(-1, 2): + if p == 0 and q == 0: + continue + if 0 <= (x+p) <= 40: + if 0 <= (y+q) <= 40: + neigh_sum[x, y] += delay_map_sum[x+p, y+q] + neigh_sum2[x, y] += delay_map_sum2[x+p, y+q] + neigh_count[x, y] += delay_map_count[x+p, y+q] + + delay_map_sum += 0.1 * neigh_sum + delay_map_sum2 += 0.1 * neigh_sum2 + delay_map_count += 0.1 * neigh_count + +delay_map = delay_map_sum / delay_map_count +delay_map_std = np.sqrt(delay_map_count*delay_map_sum2 - delay_map_sum**2) / delay_map_count + +#%% Print src-dst-pair summary print("Src-Dst-Type pair summary:") for cnt, src, dst in sorted([(v, k[0], k[1]) for k, v in src_dst_pairs.items()]): print("%20s %20s %5d%s" % (src, dst, cnt, " *" if src == sel_src_type and dst == sel_dst_type else "")) print() -#%% - -plt.figure() -plt.imshow(delay_map_sum / delay_map_count) -plt.colorbar() -plt.show() - -#%% +#%% Plot estimate vs actual delay plt.figure() plt.plot(delay_data[:,0], delay_data[:,1], ".") plt.show() +#%% Plot delay heatmap and std dev heatmap + +plt.figure(figsize=(9, 3)) +plt.subplot(121) +plt.title("Actual Delay Map") +plt.imshow(delay_map) +plt.colorbar() +plt.subplot(122) +plt.title("Standard Deviation") +plt.imshow(delay_map_std) +plt.colorbar() +plt.show() + +#%% Linear least-squares fits of delayEstimate models + +def nonlinearPreprocessor1(dx, dy): + dx, dy = abs(dx), abs(dy) + values = [1.0] + values.append(dx + dy) # 1-norm + values.append((dx**2 + dy**2)**(1/2)) # 2-norm + values.append((dx**3 + dy**3)**(1/3)) # 3-norm + return np.array(values) + +A = np.zeros((41*41, len(nonlinearPreprocessor1(0, 0)))) +b = np.zeros(41*41) + +index = 0 +for x in range(41): + for y in range(41): + A[index, :] = nonlinearPreprocessor1(x-20, y-20) + b[index] = delay_map[x, y] + index += 1 + +model1_params, _, _, _ = np.linalg.lstsq(A, b) +print("Model #1 parameters:", model1_params) + +model1_map = np.zeros((41, 41)) +for x in range(41): + for y in range(41): + v = np.dot(model1_params, nonlinearPreprocessor1(x-20, y-20)) + model1_map[x, y] = v + +plt.figure(figsize=(9, 3)) +plt.subplot(121) +plt.title("Model #1 Delay Map") +plt.imshow(model1_map) +plt.colorbar() +plt.subplot(122) +plt.title("Model #1 Error Map") +plt.imshow(model1_map - delay_map) +plt.colorbar() +plt.show() + +plt.figure(figsize=(8, 3)) +plt.title("Model #1 vs Actual Delay") +plt.plot(delay_map.flat, model1_map.flat, ".") +plt.plot([0, 4000], [0, 4000], "k") +plt.ylabel("Model #1 Delay") +plt.xlabel("Actual Delay") +plt.grid() +plt.show() + +print("Total RMS error: %f" % np.sqrt(np.mean((delay_map - model1_map)**2))) +print() + +if True: + def nonlinearPreprocessor2(v): + return np.array([1, v, np.sqrt(v)]) + + A = np.zeros((41*41, len(nonlinearPreprocessor2(0)))) + b = np.zeros(41*41) + + index = 0 + for x in range(41): + for y in range(41): + A[index, :] = nonlinearPreprocessor2(model1_map[x, y]) + b[index] = delay_map[x, y] + index += 1 + + model2_params, _, _, _ = np.linalg.lstsq(A, b) + print("Model #2 parameters:", model2_params) + + model2_map = np.zeros((41, 41)) + for x in range(41): + for y in range(41): + v = np.dot(model1_params, nonlinearPreprocessor1(x-20, y-20)) + v = np.dot(model2_params, nonlinearPreprocessor2(v)) + model2_map[x, y] = v + + plt.figure(figsize=(9, 3)) + plt.subplot(121) + plt.title("Model #2 Delay Map") + plt.imshow(model2_map) + plt.colorbar() + plt.subplot(122) + plt.title("Model #2 Error Map") + plt.imshow(model2_map - delay_map) + plt.colorbar() + plt.show() + + plt.figure(figsize=(8, 3)) + plt.title("Model #2 vs Actual Delay") + plt.plot(delay_map.flat, model2_map.flat, ".") + plt.plot([0, 4000], [0, 4000], "k") + plt.ylabel("Model #2 Delay") + plt.xlabel("Actual Delay") + plt.grid() + plt.show() + + print("Total RMS error: %f" % np.sqrt(np.mean((delay_map - model2_map)**2))) + print()