From 9a42b64a6853a3802a6d934a1ca251e84ddb7e07 Mon Sep 17 00:00:00 2001 From: David Shah Date: Sat, 1 Dec 2018 11:54:26 +0000 Subject: [PATCH 01/17] timing: Add criticality calculation to timing analysis Signed-off-by: David Shah --- common/timing.cc | 150 ++++++++++++++++++++++++++++++++++++++++++- common/timing_opt.cc | 42 ++++++++++++ common/timing_opt.h | 30 +++++++++ 3 files changed, 220 insertions(+), 2 deletions(-) create mode 100644 common/timing_opt.cc create mode 100644 common/timing_opt.h diff --git a/common/timing.cc b/common/timing.cc index 88ab14c2..55d3a46f 100644 --- a/common/timing.cc +++ b/common/timing.cc @@ -85,7 +85,16 @@ struct CriticalPath delay_t path_period; }; +// Data for the timing optimisation algorithm +struct NetCriticalityInfo +{ + // One each per user + std::vector slack; + std::vector criticality; +}; + typedef std::unordered_map CriticalPathMap; +typedef std::unordered_map NetCriticalityMap; struct Timing { @@ -96,6 +105,7 @@ struct Timing CriticalPathMap *crit_path; DelayFrequency *slack_histogram; IdString async_clock; + NetCriticalityMap *net_crit; struct TimingData { @@ -105,13 +115,15 @@ struct Timing unsigned max_path_length = 0; delay_t min_remaining_budget; bool false_startpoint = false; + std::vector min_required; std::unordered_map arrival_time; }; Timing(Context *ctx, bool net_delays, bool update, CriticalPathMap *crit_path = nullptr, - DelayFrequency *slack_histogram = nullptr) + DelayFrequency *slack_histogram = nullptr, NetCriticalityMap *net_crit = nullptr) : ctx(ctx), net_delays(net_delays), update(update), min_slack(1.0e12 / ctx->target_freq), - crit_path(crit_path), slack_histogram(slack_histogram), async_clock(ctx->id("$async$")) + crit_path(crit_path), slack_histogram(slack_histogram), net_crit(net_crit), + async_clock(ctx->id("$async$")) { } @@ -454,6 +466,140 @@ struct Timing std::reverse(cp_ports.begin(), cp_ports.end()); } } + + if (net_crit) { + NPNR_ASSERT(crit_path); + // Go through in reverse topographical order to set required times + for (auto net : boost::adaptors::reverse(topographical_order)) { + if (!net_data.count(net)) + continue; + auto &nd_map = net_data.at(net); + for (auto &startdomain : nd_map) { + auto &nd = startdomain.second; + if (nd.false_startpoint) + continue; + const delay_t net_length_plus_one = nd.max_path_length + 1; + auto &net_min_remaining_budget = nd.min_remaining_budget; + if (nd.min_required.empty()) + nd.min_required.resize(net->users.size(), std::numeric_limits::max()); + delay_t net_min_required = std::numeric_limits::max(); + for (size_t i = 0; i < net->users.size(); i++) { + auto &usr = net->users.at(i); + auto net_delay = ctx->getNetinfoRouteDelay(net, usr); + int port_clocks; + TimingPortClass portClass = ctx->getPortTimingClass(usr.cell, usr.port, port_clocks); + if (portClass == TMG_REGISTER_INPUT || portClass == TMG_ENDPOINT) { + auto process_endpoint = [&](IdString clksig, ClockEdge edge, delay_t setup) { + delay_t period; + // Set default period + if (edge == startdomain.first.edge) { + period = clk_period; + } else { + period = clk_period / 2; + } + if (clksig != async_clock) { + if (ctx->nets.at(clksig)->clkconstr) { + if (edge == startdomain.first.edge) { + // same edge + period = ctx->nets.at(clksig)->clkconstr->period.minDelay(); + } else if (edge == RISING_EDGE) { + // falling -> rising + period = ctx->nets.at(clksig)->clkconstr->low.minDelay(); + } else if (edge == FALLING_EDGE) { + // rising -> falling + period = ctx->nets.at(clksig)->clkconstr->high.minDelay(); + } + } + } + nd.min_required.at(i) = std::min(period - setup, nd.min_required.at(i)); + }; + if (portClass == TMG_REGISTER_INPUT) { + for (int j = 0; j < port_clocks; j++) { + TimingClockingInfo clkInfo = ctx->getPortClockingInfo(usr.cell, usr.port, j); + const NetInfo *clknet = get_net_or_empty(usr.cell, clkInfo.clock_port); + IdString clksig = clknet ? clknet->name : async_clock; + process_endpoint(clksig, clknet ? clkInfo.edge : RISING_EDGE, + clkInfo.setup.maxDelay()); + } + } else { + process_endpoint(async_clock, RISING_EDGE, 0); + } + } + net_min_required = std::min(net_min_required, nd.min_required.at(i) - net_delay); + } + PortRef &drv = net->driver; + if (drv.cell == nullptr) + continue; + for (const auto &port : drv.cell->ports) { + if (port.second.type != PORT_IN || !port.second.net) + continue; + DelayInfo comb_delay; + bool is_path = ctx->getCellDelay(drv.cell, port.first, drv.port, comb_delay); + if (!is_path) + continue; + NetInfo *sink_net = port.second.net; + if (net_data.count(sink_net) && net_data.at(sink_net).count(startdomain.first)) { + auto &sink_nd = net_data.at(sink_net).at(startdomain.first); + if (sink_nd.min_required.empty()) + sink_nd.min_required.resize(sink_net->users.size(), + std::numeric_limits::max()); + for (size_t i = 0; i < sink_net->users.size(); i++) { + auto &user = sink_net->users.at(i); + if (user.cell == drv.cell && user.port == port.first) { + sink_nd.min_required.at(i) = net_min_required - comb_delay.maxDelay(); + break; + } + } + } + } + } + } + std::unordered_map worst_slack; + + // Assign slack values + for (auto &net_entry : net_data) { + const NetInfo *net = net_entry.first; + for (auto &startdomain : net_entry.second) { + auto &nd = startdomain.second; + if (nd.min_required.empty()) + continue; + auto &nc = (*net_crit)[net->name]; + if (nc.slack.empty()) + nc.slack.resize(net->users.size(), std::numeric_limits::max()); + for (size_t i = 0; i < net->users.size(); i++) { + delay_t slack = nd.min_required.at(i) - + (nd.max_arrival + ctx->getNetinfoRouteDelay(net, net->users.at(i))); + if (worst_slack.count(startdomain.first)) + worst_slack.at(startdomain.first) = std::min(worst_slack.at(startdomain.first), slack); + else + worst_slack[startdomain.first] = slack; + nc.slack.at(i) = std::min(nc.slack.at(i), slack); + } + } + } + // Assign criticality values + for (auto &net_entry : net_data) { + const NetInfo *net = net_entry.first; + for (auto &startdomain : net_entry.second) { + auto &nd = startdomain.second; + if (nd.min_required.empty()) + continue; + auto &nc = (*net_crit)[net->name]; + if (nc.slack.empty()) + continue; + if (nc.criticality.empty()) + nc.criticality.resize(net->users.size(), 0); + // Only consider intra-clock paths for criticality + if (!crit_path->count(ClockPair{startdomain.first, startdomain.first})) + continue; + delay_t dmax = crit_path->at(ClockPair{startdomain.first, startdomain.first}).path_delay; + for (size_t i = 0; i < net->users.size(); i++) { + float criticality = 1.0 - ((nc.slack.at(i) - worst_slack.at(startdomain.first)) / dmax); + nc.criticality.at(i) = std::max(nc.criticality.at(i), criticality); + } + } + } + } return min_slack; } diff --git a/common/timing_opt.cc b/common/timing_opt.cc new file mode 100644 index 00000000..b33c2db0 --- /dev/null +++ b/common/timing_opt.cc @@ -0,0 +1,42 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2018 David Shah + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +/* + * Timing-optimised detailed placement algorithm + * Based on "An Effective Timing-Driven Detailed Placement Algorithm for FPGAs" + * https://www.cerc.utexas.edu/utda/publications/C205.pdf + */ + +#include "timing_opt.h" +#include "nextpnr.h" +NEXTPNR_NAMESPACE_BEGIN + +class TimingOptimiser +{ + public: + TimingOptimiser(Context *ctx) : ctx(ctx){}; + bool optimise() {} + + private: + Context *ctx; +}; + +bool timing_opt(Context *ctx, TimingOptCfg cfg) { return TimingOptimiser(ctx).optimise(); } + +NEXTPNR_NAMESPACE_END diff --git a/common/timing_opt.h b/common/timing_opt.h new file mode 100644 index 00000000..60df7df9 --- /dev/null +++ b/common/timing_opt.h @@ -0,0 +1,30 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2018 David Shah + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "nextpnr.h" + +NEXTPNR_NAMESPACE_BEGIN + +struct TimingOptCfg : public Settings +{ +}; + +extern bool timing_opt(Context *ctx, TimingOptCfg cfg); + +NEXTPNR_NAMESPACE_END From 83e32775775cc06d0f70a18e2a18089c38ff3c35 Mon Sep 17 00:00:00 2001 From: David Shah Date: Sat, 1 Dec 2018 13:22:57 +0000 Subject: [PATCH 02/17] timing_opt: Implement neighbour Bel finder Signed-off-by: David Shah --- common/timing.cc | 2 ++ common/timing_opt.cc | 77 ++++++++++++++++++++++++++++++++++++++++++++ common/timing_opt.h | 4 +++ 3 files changed, 83 insertions(+) diff --git a/common/timing.cc b/common/timing.cc index 55d3a46f..ebe3a177 100644 --- a/common/timing.cc +++ b/common/timing.cc @@ -91,6 +91,7 @@ struct NetCriticalityInfo // One each per user std::vector slack; std::vector criticality; + unsigned max_path_length = 0; }; typedef std::unordered_map CriticalPathMap; @@ -597,6 +598,7 @@ struct Timing float criticality = 1.0 - ((nc.slack.at(i) - worst_slack.at(startdomain.first)) / dmax); nc.criticality.at(i) = std::max(nc.criticality.at(i), criticality); } + nc.max_path_length = std::max(nc.max_path_length, nd.max_path_length); } } } diff --git a/common/timing_opt.cc b/common/timing_opt.cc index b33c2db0..de8e00a5 100644 --- a/common/timing_opt.cc +++ b/common/timing_opt.cc @@ -34,6 +34,83 @@ class TimingOptimiser bool optimise() {} private: + // Ratio of available to already-candidates to begin borrowing + const float borrow_thresh = 0.2; + + bool check_cell_delay_limits(CellInfo *cell) { + + } + + bool acceptable_bel_candidate(CellInfo *cell, BelId newBel) { + bool result = true; + // At the moment we have to actually do the swap to get an accurate legality result + // Switching to macro swaps might help with this + BelId oldBel = cell->bel; + CellInfo *other_cell = ctx->getBoundBelCell(newBel); + if (other_cell != nullptr && other_cell->belStrength > STRENGTH_WEAK) { + return false; + } + + ctx->bindBel(newBel, cell, STRENGTH_WEAK); + if (other_cell != nullptr) { + ctx->bindBel(oldBel, other_cell, STRENGTH_WEAK); + } + if (!ctx->isBelLocationValid(newBel) || ((other_cell != nullptr && !ctx->isBelLocationValid(oldBel)))) { + result = false; + goto unbind; + } + + + +unbind: + ctx->unbindBel(newBel); + if (other_cell != nullptr) + ctx->unbindBel(oldBel); + // Undo the swap + ctx->bindBel(oldBel, cell, STRENGTH_WEAK); + if (other_cell != nullptr) { + ctx->bindBel(newBel, other_cell, STRENGTH_WEAK); + } + return result; + } + + void find_neighbours(CellInfo *cell, int d) { + BelId curr = cell->bel; + Loc curr_loc = ctx->getBelLocation(curr); + for (int dy = -d; dy <= d; dy++) { + for (int dx = -d; dx <= d; dx++) { + if (dx == 0 && dy == 0) + continue; + // Go through all the Bels at this location + // First, find all bels of the correct type that are either unbound or bound normally + // Strongly bound bels are ignored + // FIXME: This means that we cannot touch carry chains or similar relatively constrained macros + std::vector free_bels_at_loc; + std::vector bound_bels_at_loc; + for (auto bel : ctx->getBelsByTile(curr_loc.x + dx, curr_loc.y + dy)) { + if (ctx->getBelType(bel) != cell->type) + continue; + CellInfo *bound = ctx->getBoundBelCell(bel); + if (bound == nullptr) { + free_bels_at_loc.push_back(bel); + } else if (bound->belStrength <= STRENGTH_WEAK) { + bound_bels_at_loc.push_back(bel); + } + } + bool found = false; + + if (found) + continue; + } + } + } + + // Current candidate Bels for cells (linked in both direction> + std::vector path_cells; + std::unordered_map> cell_neighbour_bels; + std::unordered_map> bel_candidate_cells; + // Map net users to net delay limit + std::unordered_map> max_net_delay; Context *ctx; }; diff --git a/common/timing_opt.h b/common/timing_opt.h index 60df7df9..746294bb 100644 --- a/common/timing_opt.h +++ b/common/timing_opt.h @@ -23,6 +23,10 @@ NEXTPNR_NAMESPACE_BEGIN struct TimingOptCfg : public Settings { + // The timing optimiser will *only* optimise cells of these types + // Normally these would only be logic cells (or tiles if applicable), the algorithm makes little sense + // for other cell types + std::unordered_set cellTypes; }; extern bool timing_opt(Context *ctx, TimingOptCfg cfg); From 2de506c071b090c18977a594efbd6effd0315bf5 Mon Sep 17 00:00:00 2001 From: David Shah Date: Sat, 1 Dec 2018 13:43:12 +0000 Subject: [PATCH 03/17] timing_opt: Functions to calculate arc delay limits Signed-off-by: David Shah --- common/timing.cc | 17 +++++++------- common/timing.h | 13 +++++++++++ common/timing_opt.cc | 55 +++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 74 insertions(+), 11 deletions(-) diff --git a/common/timing.cc b/common/timing.cc index ebe3a177..1f48261d 100644 --- a/common/timing.cc +++ b/common/timing.cc @@ -85,14 +85,7 @@ struct CriticalPath delay_t path_period; }; -// Data for the timing optimisation algorithm -struct NetCriticalityInfo -{ - // One each per user - std::vector slack; - std::vector criticality; - unsigned max_path_length = 0; -}; + typedef std::unordered_map CriticalPathMap; typedef std::unordered_map NetCriticalityMap; @@ -599,6 +592,7 @@ struct Timing nc.criticality.at(i) = std::max(nc.criticality.at(i), criticality); } nc.max_path_length = std::max(nc.max_path_length, nd.max_path_length); + nc.cd_worst_slack = std::min(nc.cd_worst_slack, worst_slack.at(startdomain.first)); } } } @@ -914,4 +908,11 @@ void timing_analysis(Context *ctx, bool print_histogram, bool print_fmax, bool p } } +void get_criticalities(Context *ctx, NetCriticalityMap *net_crit) { + CriticalPathMap crit_paths; + net_crit->clear(); + Timing timing(ctx, true, true, &crit_paths, nullptr, net_crit); + timing.walk_paths(); +} + NEXTPNR_NAMESPACE_END diff --git a/common/timing.h b/common/timing.h index 42f928dc..f1d18e8a 100644 --- a/common/timing.h +++ b/common/timing.h @@ -32,6 +32,19 @@ void assign_budget(Context *ctx, bool quiet = false); void timing_analysis(Context *ctx, bool slack_histogram = true, bool print_fmax = true, bool print_path = false, bool warn_on_failure = false); +// Data for the timing optimisation algorithm +struct NetCriticalityInfo +{ + // One each per user + std::vector slack; + std::vector criticality; + unsigned max_path_length = 0; + delay_t cd_worst_slack = std::numeric_limits::max(); +}; + +typedef std::unordered_map NetCriticalityMap; +void get_criticalities(Context *ctx, NetCriticalityMap *net_crit); + NEXTPNR_NAMESPACE_END #endif diff --git a/common/timing_opt.cc b/common/timing_opt.cc index de8e00a5..97860a23 100644 --- a/common/timing_opt.cc +++ b/common/timing_opt.cc @@ -23,8 +23,10 @@ * https://www.cerc.utexas.edu/utda/publications/C205.pdf */ +#include "timing.h" #include "timing_opt.h" #include "nextpnr.h" +#include "util.h" NEXTPNR_NAMESPACE_BEGIN class TimingOptimiser @@ -37,8 +39,52 @@ class TimingOptimiser // Ratio of available to already-candidates to begin borrowing const float borrow_thresh = 0.2; + void setup_delay_limits() { + for (auto net : sorted(ctx->nets)) { + NetInfo *ni = net.second; + max_net_delay[ni].clear(); + max_net_delay[ni].resize(ni->users.size(), std::numeric_limits::max()); + if (!net_crit.count(net.first)) + continue; + auto &nc = net_crit.at(net.first); + if (nc.slack.empty()) + continue; + for (size_t i = 0; i < ni->users.size(); i++) { + delay_t net_delay = ctx->getNetinfoRouteDelay(ni, ni->users.at(i)); + max_net_delay[ni].at(i) = net_delay + ((nc.slack.at(i) - nc.cd_worst_slack) / nc.max_path_length); + } + } + } + bool check_cell_delay_limits(CellInfo *cell) { - + for (const auto &port : cell->ports) { + int nc; + if (ctx->getPortTimingClass(cell, port.first, nc) == TMG_IGNORE) + continue; + NetInfo *net = port.second.net; + if (net == nullptr) + continue; + if (port.second.type == PORT_IN) { + if (net->driver.cell == nullptr || net->driver.cell->bel == BelId()) + continue; + BelId srcBel = net->driver.cell->bel; + if (ctx->estimateDelay(ctx->getBelPinWire(srcBel, net->driver.port), + ctx->getBelPinWire(cell->bel, port.first)) > max_net_delay.at(std::make_pair(cell->name, port.first))) + return false; + } else if (port.second.type == PORT_OUT) { + for (auto user : net->users) { + // This could get expensive for high-fanout nets?? + BelId dstBel = user.cell->bel; + if (dstBel == BelId()) + continue; + if (ctx->estimateDelay(ctx->getBelPinWire(cell->bel, port.first), + ctx->getBelPinWire(dstBel, user.port)) > max_net_delay.at(std::make_pair(user.cell->name, user.port))) + return false; + } + } + + } + return true; } bool acceptable_bel_candidate(CellInfo *cell, BelId newBel) { @@ -109,8 +155,11 @@ unbind: std::vector path_cells; std::unordered_map> cell_neighbour_bels; std::unordered_map> bel_candidate_cells; - // Map net users to net delay limit - std::unordered_map> max_net_delay; + // Map cell ports to net delay limit + std::unordered_map, delay_t> max_net_delay; + // Criticality data from timing analysis + NetCriticalityMap net_crit; + Context *ctx; }; From cd9a65a84c34bfeb6d759e3c147272e09880cb0f Mon Sep 17 00:00:00 2001 From: David Shah Date: Sat, 1 Dec 2018 14:06:51 +0000 Subject: [PATCH 04/17] timing_opt: Neigbour bel validity checking Signed-off-by: David Shah --- common/timing_opt.cc | 53 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 5 deletions(-) diff --git a/common/timing_opt.cc b/common/timing_opt.cc index 97860a23..abfe5cf1 100644 --- a/common/timing_opt.cc +++ b/common/timing_opt.cc @@ -106,7 +106,10 @@ class TimingOptimiser goto unbind; } - + if (!check_cell_delay_limits(cell) || (other_cell != nullptr && !check_cell_delay_limits(other_cell))) { + result = false; + goto unbind; + } unbind: ctx->unbindBel(newBel); @@ -120,9 +123,10 @@ unbind: return result; } - void find_neighbours(CellInfo *cell, int d) { + int find_neighbours(CellInfo *cell, IdString prev_cell, int d, bool allow_swap) { BelId curr = cell->bel; Loc curr_loc = ctx->getBelLocation(curr); + int found_count = 0; for (int dy = -d; dy <= d; dy++) { for (int dx = -d; dx <= d; dx++) { if (dx == 0 && dy == 0) @@ -143,12 +147,51 @@ unbind: bound_bels_at_loc.push_back(bel); } } - bool found = false; + BelId candidate; - if (found) - continue; + while (!free_bels_at_loc.empty() && !bound_bels_at_loc.empty()) { + BelId try_bel; + if (!free_bels_at_loc.empty()) { + int try_idx = ctx->rng(int(free_bels_at_loc.size())); + try_bel = free_bels_at_loc.at(try_idx); + free_bels_at_loc.erase(free_bels_at_loc.begin() + try_idx); + } else { + int try_idx = ctx->rng(int(bound_bels_at_loc.size())); + try_bel = bound_bels_at_loc.at(try_idx); + bound_bels_at_loc.erase(bound_bels_at_loc.begin() + try_idx); + } + if (bel_candidate_cells.count(try_bel) && !allow_swap) { + // Overlap is only allowed if it is with the previous cell (this is handled by removing those + // edges in the graph), or if allow_swap is true to deal with cases where overlap means few neighbours + // are identified + if (bel_candidate_cells.at(try_bel).size() > 1 || (bel_candidate_cells.at(try_bel).size() == 0 || + *(bel_candidate_cells.at(try_bel).begin()) != prev_cell)) + continue; + } + if (acceptable_bel_candidate(cell, try_bel)) { + candidate = try_bel; + break; + } + } + + if (candidate != BelId()) { + cell_neighbour_bels[cell->name].insert(candidate); + bel_candidate_cells[candidate].insert(cell->name); + // Work out if we need to delete any overlap + std::vector overlap; + for (auto other : bel_candidate_cells[candidate]) + if (other != cell->name && other != prev_cell) + overlap.push_back(other); + if (overlap.size() > 0) + NPNR_ASSERT(allow_swap); + for (auto ov : overlap) { + bel_candidate_cells[candidate].erase(ov); + cell_neighbour_bels[ov].erase(candidate); + } + } } } + return found_count; } // Current candidate Bels for cells (linked in both direction> From 51a662d37e4361fc2a39258fd1dc1b56ff6c15b0 Mon Sep 17 00:00:00 2001 From: David Shah Date: Sat, 1 Dec 2018 15:22:32 +0000 Subject: [PATCH 05/17] timing_opt: Implement critical path finder Signed-off-by: David Shah --- common/timing_opt.cc | 114 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/common/timing_opt.cc b/common/timing_opt.cc index abfe5cf1..c7ecd814 100644 --- a/common/timing_opt.cc +++ b/common/timing_opt.cc @@ -194,6 +194,120 @@ unbind: return found_count; } + std::vector> find_crit_paths(float crit_thresh, int max_count) { + std::vector> crit_paths; + std::vector> crit_nets; + std::vector netnames; + std::transform(ctx->nets.begin(), ctx->nets.end(), std::back_inserter(netnames), + [](const std::pair> &kv){ + return kv.first; + }); + ctx->sorted_shuffle(netnames); + for (auto net : netnames) { + if (crit_nets.size() >= max_count) + break; + if (!net_crit.count(net)) + continue; + auto crit_user = std::max_element(net_crit[net].criticality.begin(), + net_crit[net].criticality.end()); + if (*crit_user > crit_thresh) + crit_nets.push_back(std::make_pair(ctx->nets[net].get(), crit_user - net_crit[net].criticality.begin())); + } + + auto port_user_index = [](CellInfo *cell, PortInfo &port) -> size_t { + NPNR_ASSERT(port.net != nullptr); + for (size_t i = 0; i < port.net->users.size(); i++) { + auto &usr = port.net->users.at(i); + if (usr.cell == cell && usr.port == port.name) + return i; + } + NPNR_ASSERT_FALSE("port user not found on net"); + }; + + for (auto crit_net : crit_nets) { + std::deque crit_path; + + // FIXME: This will fail badly on combinational loops + + // Iterate backwards following greatest criticality + NetInfo* back_cursor = crit_net.first; + while (back_cursor != nullptr) { + float max_crit = 0; + std::pair crit_sink{nullptr, 0}; + CellInfo *cell = back_cursor->driver.cell; + if (cell == nullptr) + break; + for (auto port : cell->ports) { + if (port.second.type != PORT_IN) + continue; + NetInfo *pn = port.second.net; + if (pn == nullptr) + continue; + if (!net_crit.count(pn->name) || net_crit.at(pn->name).criticality.empty()) + continue; + int ccount; + DelayInfo combDelay; + TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount); + if (tpclass != TMG_COMB_INPUT && tpclass != TMG_REGISTER_INPUT) + continue; + bool is_path = ctx->getCellDelay(cell, port.first, back_cursor->driver.port, combDelay); + if (!is_path) + continue; + size_t user_idx = port_user_index(cell, port.second); + float usr_crit = net_crit.at(pn->name).criticality.at(user_idx); + if (usr_crit >= max_crit) { + max_crit = usr_crit; + crit_sink = std::make_pair(pn, user_idx); + } + } + + if (crit_sink.first != nullptr) { + crit_path.push_front(&(crit_sink.first->users.at(crit_sink.second))); + } + back_cursor = crit_sink.first; + } + // Iterate forwards following greatest criticiality + PortRef *fwd_cursor = &(crit_net.first->users.at(crit_net.second)); + while (fwd_cursor != nullptr) { + crit_path.push_back(fwd_cursor); + float max_crit = 0; + std::pair crit_sink{nullptr, 0}; + CellInfo *cell = fwd_cursor->cell; + for (auto port : cell->ports) { + if (port.second.type != PORT_OUT) + continue; + NetInfo *pn = port.second.net; + if (pn == nullptr) + continue; + if (!net_crit.count(pn->name) || net_crit.at(pn->name).criticality.empty()) + continue; + int ccount; + DelayInfo combDelay; + TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount); + if (tpclass != TMG_COMB_OUTPUT && tpclass != TMG_REGISTER_OUTPUT) + continue; + auto &crits = net_crit.at(pn->name).criticality; + auto most_crit_usr = std::max_element(crits.begin(), crits.end()); + if (*most_crit_usr >= max_crit) { + max_crit = *most_crit_usr; + crit_sink = std::make_pair(pn, std::distance(crits.begin(), most_crit_usr)); + } + } + if (crit_sink.first != nullptr) { + fwd_cursor = &(crit_sink.first->users.at(crit_sink.second)); + } else { + fwd_cursor = nullptr; + } + } + + std::vector crit_path_vec; + std::copy(crit_path.begin(), crit_path.end(), std::back_inserter(crit_path_vec)); + crit_paths.push_back(crit_path_vec); + } + + return crit_paths; + } + // Current candidate Bels for cells (linked in both direction> std::vector path_cells; std::unordered_map> cell_neighbour_bels; From 1b7214a18ae4cf6fb62827b06e4b5f158292da4b Mon Sep 17 00:00:00 2001 From: David Shah Date: Sat, 1 Dec 2018 16:50:47 +0000 Subject: [PATCH 06/17] timing_opt: Implement the BFS-based path optimisation Signed-off-by: David Shah --- common/timing_opt.cc | 186 +++++++++++++++++++++++++++++++++++-------- common/timing_opt.h | 3 +- 2 files changed, 154 insertions(+), 35 deletions(-) diff --git a/common/timing_opt.cc b/common/timing_opt.cc index c7ecd814..42c2242a 100644 --- a/common/timing_opt.cc +++ b/common/timing_opt.cc @@ -18,15 +18,22 @@ */ /* - * Timing-optimised detailed placement algorithm + * Timing-optimised detailed placement algorithm using BFS of the neighbour graph created from cells + * on a critical path + * * Based on "An Effective Timing-Driven Detailed Placement Algorithm for FPGAs" * https://www.cerc.utexas.edu/utda/publications/C205.pdf + * + * Modifications made to deal with the smaller Bels that nextpnr uses instead of swapping whole tiles, + * and deal with the fact that not every cell on the crit path may be swappable. */ #include "timing.h" #include "timing_opt.h" #include "nextpnr.h" #include "util.h" +#include +#include NEXTPNR_NAMESPACE_BEGIN class TimingOptimiser @@ -87,40 +94,38 @@ class TimingOptimiser return true; } - bool acceptable_bel_candidate(CellInfo *cell, BelId newBel) { - bool result = true; - // At the moment we have to actually do the swap to get an accurate legality result - // Switching to macro swaps might help with this + BelId cell_swap_bel(CellInfo *cell, BelId newBel) { BelId oldBel = cell->bel; CellInfo *other_cell = ctx->getBoundBelCell(newBel); - if (other_cell != nullptr && other_cell->belStrength > STRENGTH_WEAK) { - return false; - } - - ctx->bindBel(newBel, cell, STRENGTH_WEAK); + NPNR_ASSERT(other_cell == nullptr || other_cell->belStrength <= STRENGTH_WEAK); + ctx->unbindBel(oldBel); if (other_cell != nullptr) { + ctx->unbindBel(newBel); ctx->bindBel(oldBel, other_cell, STRENGTH_WEAK); } - if (!ctx->isBelLocationValid(newBel) || ((other_cell != nullptr && !ctx->isBelLocationValid(oldBel)))) { - result = false; - goto unbind; - } + ctx->bindBel(newBel, cell, STRENGTH_WEAK); + return oldBel; + } - if (!check_cell_delay_limits(cell) || (other_cell != nullptr && !check_cell_delay_limits(other_cell))) { - result = false; - goto unbind; + // Check that a series of moves are both legal and remain within maximum delay bounds + // Moves are specified as a vector of pairs + bool acceptable_move(std::vector> &move, bool check_delays = true) { + for (auto &entry : move) { + if (!ctx->isBelLocationValid(entry.first->bel)) + return false; + if (!ctx->isBelLocationValid(entry.second)) + return false; + if (!check_delays) + continue; + if (!check_cell_delay_limits(entry.first)) + return false; + // We might have swapped another cell onto the original bel. Check this for max delay violations + // too + CellInfo *swapped = ctx->getBoundBelCell(entry.second); + if (swapped != nullptr && !check_cell_delay_limits(swapped)) + return false; } - -unbind: - ctx->unbindBel(newBel); - if (other_cell != nullptr) - ctx->unbindBel(oldBel); - // Undo the swap - ctx->bindBel(oldBel, cell, STRENGTH_WEAK); - if (other_cell != nullptr) { - ctx->bindBel(newBel, other_cell, STRENGTH_WEAK); - } - return result; + return true; } int find_neighbours(CellInfo *cell, IdString prev_cell, int d, bool allow_swap) { @@ -129,8 +134,6 @@ unbind: int found_count = 0; for (int dy = -d; dy <= d; dy++) { for (int dx = -d; dx <= d; dx++) { - if (dx == 0 && dy == 0) - continue; // Go through all the Bels at this location // First, find all bels of the correct type that are either unbound or bound normally // Strongly bound bels are ignored @@ -168,10 +171,9 @@ unbind: *(bel_candidate_cells.at(try_bel).begin()) != prev_cell)) continue; } - if (acceptable_bel_candidate(cell, try_bel)) { - candidate = try_bel; - break; - } + // TODO: what else to check here? + candidate = try_bel; + break; } if (candidate != BelId()) { @@ -308,6 +310,120 @@ unbind: return crit_paths; } + void optimise_path(std::vector &path) { + path_cells.clear(); + cell_neighbour_bels.clear(); + bel_candidate_cells.clear(); + for (auto port : path) { + if (std::find(path_cells.begin(), path_cells.end(), port->cell->name) != path_cells.end()) + continue; + if (port->cell->belStrength > STRENGTH_WEAK || !cfg.cellTypes.count(port->cell->type)) + continue; + path_cells.push_back(port->cell->name); + } + + if (path_cells.empty()) + return; + + IdString last_cell; + const int d = 3; // FIXME: how to best determine d + for (auto cell : path_cells) { + // FIXME: when should we allow swapping due to a lack of candidates + find_neighbours(ctx->cells[cell].get(), last_cell, d, false); + last_cell = cell; + } + // Map cells that we will actually modify to the arc we will use for cost + // calculation + // for delay calc purposes + std::unordered_map> cost_ports; + PortRef *last_port = nullptr; + auto pcell = path_cells.begin(); + for (auto port : path) { + if (port->cell->name == *pcell) { + cost_ports[*pcell] = std::make_pair(last_port, port); + pcell++; + } + last_port = port; + } + + // Actual BFS path optimisation algorithm + std::unordered_map> cumul_costs; + std::unordered_map, std::pair> backtrace; + std::queue> visit; + std::unordered_set> to_visit; + + for (auto startbel : cell_neighbour_bels[path_cells.front()]) { + auto entry = std::make_pair(0, startbel); + visit.push(entry); + cumul_costs[path_cells.front()][startbel] = 0; + } + + while(!visit.empty()) { + auto entry = visit.front(); + visit.pop(); + auto cellname = path_cells.at(entry.first); + if (entry.first == path_cells.size() - 1) + continue; + std::vector> move; + // Apply the entire backtrace for accurate legality and delay checks + // This is probably pretty expensive (but also probably pales in comparison to the number of swaps + // SA will make...) + std::vector> route_to_entry; + auto cursor = std::make_pair(cellname, entry.second); + route_to_entry.push_back(cursor); + while (backtrace.count(cursor)) { + cursor = backtrace.at(cursor); + route_to_entry.push_back(cursor); + } + for (auto rt_entry : boost::adaptors::reverse(route_to_entry)) { + CellInfo *cell = ctx->cells.at(rt_entry.first).get(); + BelId origBel = cell_swap_bel(cell, rt_entry.second); + move.push_back(std::make_pair(cell, origBel)); + } + + delay_t cdelay = cumul_costs[cellname][entry.second]; + + // Have a look at where we can travel from here + for (auto neighbour : cell_neighbour_bels.at(path_cells.at(entry.first + 1))) { + // Edges between overlapping bels are deleted + if (neighbour == entry.second) + continue; + // Experimentally swap the next path cell onto the neighbour bel we are trying + IdString ncname = path_cells.at(entry.first + 1); + CellInfo *next_cell = ctx->cells.at(ncname).get(); + BelId origBel = cell_swap_bel(next_cell, neighbour); + move.push_back(std::make_pair(next_cell, origBel)); + + // Check the new cumulative delay + auto port_pair = cost_ports.at(ncname); + delay_t edge_delay = ctx->estimateDelay(ctx->getBelPinWire(port_pair.first->cell->bel, port_pair.first->port), + ctx->getBelPinWire(port_pair.second->cell->bel, port_pair.second->port)); + delay_t total_delay = cdelay + edge_delay; + // First, check if the move is actually worthwhile from a delay point of view before the expensive + // legality check + if (!cumul_costs.count(ncname) || !cumul_costs.at(ncname).count(neighbour) + || cumul_costs.at(ncname).at(neighbour) > total_delay) { + // Now check that the swaps we have made to get here are legal and meet max delay requirements + if (acceptable_move(move)) { + cumul_costs[ncname][neighbour] = total_delay; + backtrace[std::make_pair(ncname, neighbour)] = std::make_pair(cellname, entry.second); + if (!to_visit.count(std::make_pair(entry.first + 1, neighbour))) + visit.push(std::make_pair(entry.first + 1, neighbour)); + } + } + // Revert the experimental swap + cell_swap_bel(move.back().first, move.back().second); + move.pop_back(); + } + + // Revert move by swapping cells back to their original order + // Execute swaps in reverse order to how we made them originally + for (auto move_entry : boost::adaptors::reverse(move)) { + cell_swap_bel(move_entry.first, move_entry.second); + } + } + } + // Current candidate Bels for cells (linked in both direction> std::vector path_cells; std::unordered_map> cell_neighbour_bels; @@ -317,6 +433,8 @@ unbind: // Criticality data from timing analysis NetCriticalityMap net_crit; + TimingOptCfg cfg; + Context *ctx; }; diff --git a/common/timing_opt.h b/common/timing_opt.h index 746294bb..fda29d30 100644 --- a/common/timing_opt.h +++ b/common/timing_opt.h @@ -18,6 +18,7 @@ */ #include "nextpnr.h" +#include "settings.h" NEXTPNR_NAMESPACE_BEGIN @@ -26,7 +27,7 @@ struct TimingOptCfg : public Settings // The timing optimiser will *only* optimise cells of these types // Normally these would only be logic cells (or tiles if applicable), the algorithm makes little sense // for other cell types - std::unordered_set cellTypes; + std::unordered_set cellTypes; }; extern bool timing_opt(Context *ctx, TimingOptCfg cfg); From b51308708bf7202c097deb7f70ff83e710e0970c Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 2 Dec 2018 12:01:43 +0000 Subject: [PATCH 07/17] timing_opt: Debugging and integration Signed-off-by: David Shah --- common/timing_opt.cc | 127 ++++++++++++++++++++++++++++++++++++++----- common/timing_opt.h | 2 + ice40/arch.cc | 9 ++- 3 files changed, 123 insertions(+), 15 deletions(-) diff --git a/common/timing_opt.cc b/common/timing_opt.cc index 42c2242a..3a289812 100644 --- a/common/timing_opt.cc +++ b/common/timing_opt.cc @@ -34,31 +34,92 @@ #include "util.h" #include #include + +namespace std { + + template <> struct hash> + { + std::size_t operator()(const std::pair &idp) const noexcept + { + std::size_t seed = 0; + boost::hash_combine(seed, hash()(idp.first)); + boost::hash_combine(seed, hash()(idp.second)); + return seed; + } + }; + + template <> struct hash> + { + std::size_t operator()(const std::pair &idp) const noexcept + { + std::size_t seed = 0; + boost::hash_combine(seed, hash()(idp.first)); + boost::hash_combine(seed, hash()(idp.second)); + return seed; + } + }; + + template <> struct hash> + { + std::size_t operator()(const std::pair &idp) const noexcept + { + std::size_t seed = 0; + boost::hash_combine(seed, hash()(idp.first)); + boost::hash_combine(seed, hash()(idp.second)); + return seed; + } + }; +} + NEXTPNR_NAMESPACE_BEGIN class TimingOptimiser { public: - TimingOptimiser(Context *ctx) : ctx(ctx){}; - bool optimise() {} + TimingOptimiser(Context *ctx, TimingOptCfg cfg) : ctx(ctx), cfg(cfg) {}; + bool optimise() { + log_info("Running timing-driven placement optimisation...\n"); +#if 1 + timing_analysis(ctx, false, true, ctx->debug, false); +#endif + for (int i = 0; i < 20; i++) { + log_info(" Iteration %d...\n", i); + get_criticalities(ctx, &net_crit); + setup_delay_limits(); + auto crit_paths = find_crit_paths(0.98, 1000); + for (auto &path : crit_paths) + optimise_path(path); +#if 1 + timing_analysis(ctx, false, true, ctx->debug, false); +#endif + } + return true; + } private: // Ratio of available to already-candidates to begin borrowing const float borrow_thresh = 0.2; void setup_delay_limits() { + max_net_delay.clear(); for (auto net : sorted(ctx->nets)) { NetInfo *ni = net.second; - max_net_delay[ni].clear(); - max_net_delay[ni].resize(ni->users.size(), std::numeric_limits::max()); + for (auto usr : ni->users) { + max_net_delay[std::make_pair(usr.cell->name, usr.port)] + = std::numeric_limits::max(); + } if (!net_crit.count(net.first)) continue; auto &nc = net_crit.at(net.first); if (nc.slack.empty()) continue; for (size_t i = 0; i < ni->users.size(); i++) { - delay_t net_delay = ctx->getNetinfoRouteDelay(ni, ni->users.at(i)); - max_net_delay[ni].at(i) = net_delay + ((nc.slack.at(i) - nc.cd_worst_slack) / nc.max_path_length); + auto &usr = ni->users.at(i); + delay_t net_delay = ctx->getNetinfoRouteDelay(ni, usr); + if (nc.max_path_length != 0) { + max_net_delay[std::make_pair(usr.cell->name, usr.port)] + = net_delay + ((nc.slack.at(i) - nc.cd_worst_slack) / nc.max_path_length); + } } } } @@ -196,12 +257,12 @@ class TimingOptimiser return found_count; } - std::vector> find_crit_paths(float crit_thresh, int max_count) { + std::vector> find_crit_paths(float crit_thresh, size_t max_count) { std::vector> crit_paths; std::vector> crit_nets; std::vector netnames; std::transform(ctx->nets.begin(), ctx->nets.end(), std::back_inserter(netnames), - [](const std::pair> &kv){ + [](const std::pair> &kv){ return kv.first; }); ctx->sorted_shuffle(netnames); @@ -250,7 +311,7 @@ class TimingOptimiser int ccount; DelayInfo combDelay; TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount); - if (tpclass != TMG_COMB_INPUT && tpclass != TMG_REGISTER_INPUT) + if (tpclass != TMG_COMB_INPUT) continue; bool is_path = ctx->getCellDelay(cell, port.first, back_cursor->driver.port, combDelay); if (!is_path) @@ -286,7 +347,7 @@ class TimingOptimiser int ccount; DelayInfo combDelay; TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount); - if (tpclass != TMG_COMB_OUTPUT && tpclass != TMG_REGISTER_OUTPUT) + if (tpclass != TMG_COMB_OUTPUT) continue; auto &crits = net_crit.at(pn->name).criticality; auto most_crit_usr = std::max_element(crits.begin(), crits.end()); @@ -314,11 +375,17 @@ class TimingOptimiser path_cells.clear(); cell_neighbour_bels.clear(); bel_candidate_cells.clear(); + if (ctx->debug) + log_info("Optimising the following path: \n"); for (auto port : path) { + if (ctx->debug) + log_info(" %s.%s at %s\n", port->cell->name.c_str(ctx), port->port.c_str(ctx), ctx->getBelName(port->cell->bel).c_str(ctx)); if (std::find(path_cells.begin(), path_cells.end(), port->cell->name) != path_cells.end()) continue; if (port->cell->belStrength > STRENGTH_WEAK || !cfg.cellTypes.count(port->cell->type)) continue; + if (ctx->debug) + log_info(" can move\n"); path_cells.push_back(port->cell->name); } @@ -362,7 +429,7 @@ class TimingOptimiser auto entry = visit.front(); visit.pop(); auto cellname = path_cells.at(entry.first); - if (entry.first == path_cells.size() - 1) + if (entry.first == int(path_cells.size()) - 1) continue; std::vector> move; // Apply the entire backtrace for accurate legality and delay checks @@ -422,6 +489,39 @@ class TimingOptimiser cell_swap_bel(move_entry.first, move_entry.second); } } + + // Did we find a solution?? + if (cumul_costs.count(path_cells.back())) { + // Find the end position with the lowest total delay + auto &end_options = cumul_costs.at(path_cells.back()); + auto lowest = std::min_element(end_options.begin(), end_options.end(), [](const std::pair &a, + const std::pair &b) { + return a.second < b.second; + }); + NPNR_ASSERT(lowest != end_options.end()); + + std::vector> route_to_solution; + auto cursor = std::make_pair(path_cells.back(), lowest->first); + route_to_solution.push_back(cursor); + while (backtrace.count(cursor)) { + cursor = backtrace.at(cursor); + route_to_solution.push_back(cursor); + } + if (ctx->debug) + log_info("Found a solution with cost %.02f ns\n", ctx->getDelayNS(lowest->second)); + for (auto rt_entry : boost::adaptors::reverse(route_to_solution)) { + CellInfo *cell = ctx->cells.at(rt_entry.first).get(); + cell_swap_bel(cell, rt_entry.second); + if(ctx->debug) + log_info(" %s at %s\n", rt_entry.first.c_str(ctx), ctx->getBelName(rt_entry.second).c_str(ctx)); + } + + } else { + if (ctx->debug) + log_info("Solution was not found\n"); + } + if (ctx->debug) + log_break(); } // Current candidate Bels for cells (linked in both direction> @@ -432,12 +532,11 @@ class TimingOptimiser std::unordered_map, delay_t> max_net_delay; // Criticality data from timing analysis NetCriticalityMap net_crit; - + Context *ctx; TimingOptCfg cfg; - Context *ctx; }; -bool timing_opt(Context *ctx, TimingOptCfg cfg) { return TimingOptimiser(ctx).optimise(); } +bool timing_opt(Context *ctx, TimingOptCfg cfg) { return TimingOptimiser(ctx, cfg).optimise(); } NEXTPNR_NAMESPACE_END diff --git a/common/timing_opt.h b/common/timing_opt.h index fda29d30..ceb35c71 100644 --- a/common/timing_opt.h +++ b/common/timing_opt.h @@ -24,6 +24,8 @@ NEXTPNR_NAMESPACE_BEGIN struct TimingOptCfg : public Settings { + TimingOptCfg(Context *ctx) : Settings(ctx) {} + // The timing optimiser will *only* optimise cells of these types // Normally these would only be logic cells (or tiles if applicable), the algorithm makes little sense // for other cell types diff --git a/ice40/arch.cc b/ice40/arch.cc index ada78020..5cd55774 100644 --- a/ice40/arch.cc +++ b/ice40/arch.cc @@ -27,6 +27,7 @@ #include "placer1.h" #include "router1.h" #include "util.h" +#include "timing_opt.h" NEXTPNR_NAMESPACE_BEGIN @@ -626,7 +627,13 @@ bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay // ----------------------------------------------------------------------- -bool Arch::place() { return placer1(getCtx(), Placer1Cfg(getCtx())); } +bool Arch::place() { + if(!placer1(getCtx(), Placer1Cfg(getCtx()))) + return false; + TimingOptCfg tocfg(getCtx()); + tocfg.cellTypes.insert(id_ICESTORM_LC); + return timing_opt(getCtx(), tocfg); +} bool Arch::route() { return router1(getCtx(), Router1Cfg(getCtx())); } From e1c74ad3db06c7279b018a93416dc3be178002d5 Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 2 Dec 2018 12:23:18 +0000 Subject: [PATCH 08/17] timing_opt: Fixes including single-move legality Signed-off-by: David Shah --- common/timing.cc | 8 ++++- common/timing_opt.cc | 69 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 60 insertions(+), 17 deletions(-) diff --git a/common/timing.cc b/common/timing.cc index 1f48261d..69ccc78f 100644 --- a/common/timing.cc +++ b/common/timing.cc @@ -472,6 +472,8 @@ struct Timing auto &nd = startdomain.second; if (nd.false_startpoint) continue; + if (startdomain.first.clock == async_clock) + continue; const delay_t net_length_plus_one = nd.max_path_length + 1; auto &net_min_remaining_budget = nd.min_remaining_budget; if (nd.min_required.empty()) @@ -555,6 +557,8 @@ struct Timing const NetInfo *net = net_entry.first; for (auto &startdomain : net_entry.second) { auto &nd = startdomain.second; + if (startdomain.first.clock == async_clock) + continue; if (nd.min_required.empty()) continue; auto &nc = (*net_crit)[net->name]; @@ -575,6 +579,8 @@ struct Timing for (auto &net_entry : net_data) { const NetInfo *net = net_entry.first; for (auto &startdomain : net_entry.second) { + if (startdomain.first.clock == async_clock) + continue; auto &nd = startdomain.second; if (nd.min_required.empty()) continue; @@ -588,7 +594,7 @@ struct Timing continue; delay_t dmax = crit_path->at(ClockPair{startdomain.first, startdomain.first}).path_delay; for (size_t i = 0; i < net->users.size(); i++) { - float criticality = 1.0 - ((nc.slack.at(i) - worst_slack.at(startdomain.first)) / dmax); + float criticality = 1.0f - (float(nc.slack.at(i) - worst_slack.at(startdomain.first)) / dmax); nc.criticality.at(i) = std::max(nc.criticality.at(i), criticality); } nc.max_path_length = std::max(nc.max_path_length, nd.max_path_length); diff --git a/common/timing_opt.cc b/common/timing_opt.cc index 3a289812..d1194876 100644 --- a/common/timing_opt.cc +++ b/common/timing_opt.cc @@ -80,7 +80,7 @@ class TimingOptimiser bool optimise() { log_info("Running timing-driven placement optimisation...\n"); #if 1 - timing_analysis(ctx, false, true, ctx->debug, false); + timing_analysis(ctx, false, true, false, false); #endif for (int i = 0; i < 20; i++) { log_info(" Iteration %d...\n", i); @@ -90,7 +90,7 @@ class TimingOptimiser for (auto &path : crit_paths) optimise_path(path); #if 1 - timing_analysis(ctx, false, true, ctx->debug, false); + timing_analysis(ctx, false, true, false, false); #endif } return true; @@ -146,8 +146,17 @@ class TimingOptimiser if (dstBel == BelId()) continue; if (ctx->estimateDelay(ctx->getBelPinWire(cell->bel, port.first), - ctx->getBelPinWire(dstBel, user.port)) > max_net_delay.at(std::make_pair(user.cell->name, user.port))) + ctx->getBelPinWire(dstBel, user.port)) > max_net_delay.at(std::make_pair(user.cell->name, user.port))) { +#if 0 + if (ctx->debug) { + log_info(" est delay %.02fns exceeded maximum %.02fns\n", ctx->getDelayNS(ctx->estimateDelay(ctx->getBelPinWire(cell->bel, port.first), + ctx->getBelPinWire(dstBel, user.port))), + ctx->getDelayNS(max_net_delay.at(std::make_pair(user.cell->name, user.port)))); + } +#endif return false; + + } } } @@ -193,6 +202,7 @@ class TimingOptimiser BelId curr = cell->bel; Loc curr_loc = ctx->getBelLocation(curr); int found_count = 0; + cell_neighbour_bels[cell->name] = std::unordered_set{}; for (int dy = -d; dy <= d; dy++) { for (int dx = -d; dx <= d; dx++) { // Go through all the Bels at this location @@ -207,7 +217,7 @@ class TimingOptimiser CellInfo *bound = ctx->getBoundBelCell(bel); if (bound == nullptr) { free_bels_at_loc.push_back(bel); - } else if (bound->belStrength <= STRENGTH_WEAK) { + } else if (bound->belStrength <= STRENGTH_WEAK || bound->constr_parent != nullptr || !bound->constr_children.empty()) { bound_bels_at_loc.push_back(bel); } } @@ -286,6 +296,7 @@ class TimingOptimiser } NPNR_ASSERT_FALSE("port user not found on net"); }; + std::unordered_set used_ports; for (auto crit_net : crit_nets) { std::deque crit_path; @@ -318,6 +329,8 @@ class TimingOptimiser continue; size_t user_idx = port_user_index(cell, port.second); float usr_crit = net_crit.at(pn->name).criticality.at(user_idx); + if (used_ports.count(&(pn->users.at(user_idx)))) + continue; if (usr_crit >= max_crit) { max_crit = usr_crit; crit_sink = std::make_pair(pn, user_idx); @@ -326,6 +339,7 @@ class TimingOptimiser if (crit_sink.first != nullptr) { crit_path.push_front(&(crit_sink.first->users.at(crit_sink.second))); + used_ports.insert(&(crit_sink.first->users.at(crit_sink.second))); } back_cursor = crit_sink.first; } @@ -350,14 +364,19 @@ class TimingOptimiser if (tpclass != TMG_COMB_OUTPUT) continue; auto &crits = net_crit.at(pn->name).criticality; - auto most_crit_usr = std::max_element(crits.begin(), crits.end()); - if (*most_crit_usr >= max_crit) { - max_crit = *most_crit_usr; - crit_sink = std::make_pair(pn, std::distance(crits.begin(), most_crit_usr)); + for (size_t i = 0; i < crits.size(); i++) { + if (used_ports.count(&(pn->users.at(i)))) + continue; + if (crits.at(i) >= max_crit) { + max_crit = crits.at(i); + crit_sink = std::make_pair(pn, i); + } } + } if (crit_sink.first != nullptr) { fwd_cursor = &(crit_sink.first->users.at(crit_sink.second)); + used_ports.insert(&(crit_sink.first->users.at(crit_sink.second))); } else { fwd_cursor = nullptr; } @@ -378,20 +397,30 @@ class TimingOptimiser if (ctx->debug) log_info("Optimising the following path: \n"); for (auto port : path) { - if (ctx->debug) - log_info(" %s.%s at %s\n", port->cell->name.c_str(ctx), port->port.c_str(ctx), ctx->getBelName(port->cell->bel).c_str(ctx)); + if (ctx->debug) { + float crit = 0; + NetInfo *pn = port->cell->ports.at(port->port).net; + if (net_crit.count(pn->name) && !net_crit.at(pn->name).criticality.empty()) + for (size_t i = 0; i < pn->users.size(); i++) + if (pn->users.at(i).cell == port->cell && pn->users.at(i).port == port->port) + crit = net_crit.at(pn->name).criticality.at(i); + log_info(" %s.%s at %s crit %0.02f\n", port->cell->name.c_str(ctx), port->port.c_str(ctx), ctx->getBelName(port->cell->bel).c_str(ctx), crit); + + } if (std::find(path_cells.begin(), path_cells.end(), port->cell->name) != path_cells.end()) continue; - if (port->cell->belStrength > STRENGTH_WEAK || !cfg.cellTypes.count(port->cell->type)) + if (port->cell->belStrength > STRENGTH_WEAK || !cfg.cellTypes.count(port->cell->type) || port->cell->constr_parent != nullptr || !port->cell->constr_children.empty()) continue; if (ctx->debug) log_info(" can move\n"); path_cells.push_back(port->cell->name); } - if (path_cells.empty()) + if (path_cells.size() < 3) { + log_info("Too few moveable cells; skipping path\n"); + log_break(); return; - + } IdString last_cell; const int d = 3; // FIXME: how to best determine d for (auto cell : path_cells) { @@ -420,9 +449,17 @@ class TimingOptimiser std::unordered_set> to_visit; for (auto startbel : cell_neighbour_bels[path_cells.front()]) { - auto entry = std::make_pair(0, startbel); - visit.push(entry); - cumul_costs[path_cells.front()][startbel] = 0; + // Swap for legality check + CellInfo *cell = ctx->cells.at(path_cells.front()).get(); + BelId origBel = cell_swap_bel(cell, startbel); + std::vector> move{std::make_pair(cell, origBel)}; + if (acceptable_move(move)) { + auto entry = std::make_pair(0, startbel); + visit.push(entry); + cumul_costs[path_cells.front()][startbel] = 0; + } + // Swap back + cell_swap_bel(cell, origBel); } while(!visit.empty()) { From 254c5ea3599bb78051642030c410bcb79c17699a Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 2 Dec 2018 13:15:39 +0000 Subject: [PATCH 09/17] clangformat Signed-off-by: David Shah --- common/timing.cc | 5 +- common/timing_opt.cc | 170 +++++++++++++++++++++++-------------------- ice40/arch.cc | 7 +- 3 files changed, 96 insertions(+), 86 deletions(-) diff --git a/common/timing.cc b/common/timing.cc index 69ccc78f..e90718d8 100644 --- a/common/timing.cc +++ b/common/timing.cc @@ -85,8 +85,6 @@ struct CriticalPath delay_t path_period; }; - - typedef std::unordered_map CriticalPathMap; typedef std::unordered_map NetCriticalityMap; @@ -914,7 +912,8 @@ void timing_analysis(Context *ctx, bool print_histogram, bool print_fmax, bool p } } -void get_criticalities(Context *ctx, NetCriticalityMap *net_crit) { +void get_criticalities(Context *ctx, NetCriticalityMap *net_crit) +{ CriticalPathMap crit_paths; net_crit->clear(); Timing timing(ctx, true, true, &crit_paths, nullptr, net_crit); diff --git a/common/timing_opt.cc b/common/timing_opt.cc index d1194876..950cbbbd 100644 --- a/common/timing_opt.cc +++ b/common/timing_opt.cc @@ -28,56 +28,60 @@ * and deal with the fact that not every cell on the crit path may be swappable. */ -#include "timing.h" #include "timing_opt.h" -#include "nextpnr.h" -#include "util.h" #include #include +#include "nextpnr.h" +#include "timing.h" +#include "util.h" namespace std { - template <> struct hash> +template <> struct hash> +{ + std::size_t + operator()(const std::pair &idp) const + noexcept { - std::size_t operator()(const std::pair &idp) const noexcept - { - std::size_t seed = 0; - boost::hash_combine(seed, hash()(idp.first)); - boost::hash_combine(seed, hash()(idp.second)); - return seed; - } - }; + std::size_t seed = 0; + boost::hash_combine(seed, hash()(idp.first)); + boost::hash_combine(seed, hash()(idp.second)); + return seed; + } +}; - template <> struct hash> +template <> struct hash> +{ + std::size_t operator()(const std::pair &idp) const noexcept { - std::size_t operator()(const std::pair &idp) const noexcept - { - std::size_t seed = 0; - boost::hash_combine(seed, hash()(idp.first)); - boost::hash_combine(seed, hash()(idp.second)); - return seed; - } - }; + std::size_t seed = 0; + boost::hash_combine(seed, hash()(idp.first)); + boost::hash_combine(seed, hash()(idp.second)); + return seed; + } +}; - template <> struct hash> +template <> struct hash> +{ + std::size_t + operator()(const std::pair &idp) const noexcept { - std::size_t operator()(const std::pair &idp) const noexcept - { - std::size_t seed = 0; - boost::hash_combine(seed, hash()(idp.first)); - boost::hash_combine(seed, hash()(idp.second)); - return seed; - } - }; -} + std::size_t seed = 0; + boost::hash_combine(seed, hash()(idp.first)); + boost::hash_combine(seed, hash()(idp.second)); + return seed; + } +}; +} // namespace std NEXTPNR_NAMESPACE_BEGIN class TimingOptimiser { public: - TimingOptimiser(Context *ctx, TimingOptCfg cfg) : ctx(ctx), cfg(cfg) {}; - bool optimise() { + TimingOptimiser(Context *ctx, TimingOptCfg cfg) : ctx(ctx), cfg(cfg){}; + bool optimise() + { log_info("Running timing-driven placement optimisation...\n"); #if 1 timing_analysis(ctx, false, true, false, false); @@ -100,13 +104,13 @@ class TimingOptimiser // Ratio of available to already-candidates to begin borrowing const float borrow_thresh = 0.2; - void setup_delay_limits() { + void setup_delay_limits() + { max_net_delay.clear(); for (auto net : sorted(ctx->nets)) { NetInfo *ni = net.second; for (auto usr : ni->users) { - max_net_delay[std::make_pair(usr.cell->name, usr.port)] - = std::numeric_limits::max(); + max_net_delay[std::make_pair(usr.cell->name, usr.port)] = std::numeric_limits::max(); } if (!net_crit.count(net.first)) continue; @@ -117,14 +121,15 @@ class TimingOptimiser auto &usr = ni->users.at(i); delay_t net_delay = ctx->getNetinfoRouteDelay(ni, usr); if (nc.max_path_length != 0) { - max_net_delay[std::make_pair(usr.cell->name, usr.port)] - = net_delay + ((nc.slack.at(i) - nc.cd_worst_slack) / nc.max_path_length); + max_net_delay[std::make_pair(usr.cell->name, usr.port)] = + net_delay + ((nc.slack.at(i) - nc.cd_worst_slack) / nc.max_path_length); } } } } - bool check_cell_delay_limits(CellInfo *cell) { + bool check_cell_delay_limits(CellInfo *cell) + { for (const auto &port : cell->ports) { int nc; if (ctx->getPortTimingClass(cell, port.first, nc) == TMG_IGNORE) @@ -137,7 +142,8 @@ class TimingOptimiser continue; BelId srcBel = net->driver.cell->bel; if (ctx->estimateDelay(ctx->getBelPinWire(srcBel, net->driver.port), - ctx->getBelPinWire(cell->bel, port.first)) > max_net_delay.at(std::make_pair(cell->name, port.first))) + ctx->getBelPinWire(cell->bel, port.first)) > + max_net_delay.at(std::make_pair(cell->name, port.first))) return false; } else if (port.second.type == PORT_OUT) { for (auto user : net->users) { @@ -146,7 +152,8 @@ class TimingOptimiser if (dstBel == BelId()) continue; if (ctx->estimateDelay(ctx->getBelPinWire(cell->bel, port.first), - ctx->getBelPinWire(dstBel, user.port)) > max_net_delay.at(std::make_pair(user.cell->name, user.port))) { + ctx->getBelPinWire(dstBel, user.port)) > + max_net_delay.at(std::make_pair(user.cell->name, user.port))) { #if 0 if (ctx->debug) { log_info(" est delay %.02fns exceeded maximum %.02fns\n", ctx->getDelayNS(ctx->estimateDelay(ctx->getBelPinWire(cell->bel, port.first), @@ -155,16 +162,15 @@ class TimingOptimiser } #endif return false; - } } } - } return true; } - BelId cell_swap_bel(CellInfo *cell, BelId newBel) { + BelId cell_swap_bel(CellInfo *cell, BelId newBel) + { BelId oldBel = cell->bel; CellInfo *other_cell = ctx->getBoundBelCell(newBel); NPNR_ASSERT(other_cell == nullptr || other_cell->belStrength <= STRENGTH_WEAK); @@ -179,7 +185,8 @@ class TimingOptimiser // Check that a series of moves are both legal and remain within maximum delay bounds // Moves are specified as a vector of pairs - bool acceptable_move(std::vector> &move, bool check_delays = true) { + bool acceptable_move(std::vector> &move, bool check_delays = true) + { for (auto &entry : move) { if (!ctx->isBelLocationValid(entry.first->bel)) return false; @@ -198,7 +205,8 @@ class TimingOptimiser return true; } - int find_neighbours(CellInfo *cell, IdString prev_cell, int d, bool allow_swap) { + int find_neighbours(CellInfo *cell, IdString prev_cell, int d, bool allow_swap) + { BelId curr = cell->bel; Loc curr_loc = ctx->getBelLocation(curr); int found_count = 0; @@ -217,7 +225,8 @@ class TimingOptimiser CellInfo *bound = ctx->getBoundBelCell(bel); if (bound == nullptr) { free_bels_at_loc.push_back(bel); - } else if (bound->belStrength <= STRENGTH_WEAK || bound->constr_parent != nullptr || !bound->constr_children.empty()) { + } else if (bound->belStrength <= STRENGTH_WEAK || bound->constr_parent != nullptr || + !bound->constr_children.empty()) { bound_bels_at_loc.push_back(bel); } } @@ -236,10 +245,11 @@ class TimingOptimiser } if (bel_candidate_cells.count(try_bel) && !allow_swap) { // Overlap is only allowed if it is with the previous cell (this is handled by removing those - // edges in the graph), or if allow_swap is true to deal with cases where overlap means few neighbours - // are identified - if (bel_candidate_cells.at(try_bel).size() > 1 || (bel_candidate_cells.at(try_bel).size() == 0 || - *(bel_candidate_cells.at(try_bel).begin()) != prev_cell)) + // edges in the graph), or if allow_swap is true to deal with cases where overlap means few + // neighbours are identified + if (bel_candidate_cells.at(try_bel).size() > 1 || + (bel_candidate_cells.at(try_bel).size() == 0 || + *(bel_candidate_cells.at(try_bel).begin()) != prev_cell)) continue; } // TODO: what else to check here? @@ -267,24 +277,23 @@ class TimingOptimiser return found_count; } - std::vector> find_crit_paths(float crit_thresh, size_t max_count) { - std::vector> crit_paths; + std::vector> find_crit_paths(float crit_thresh, size_t max_count) + { + std::vector> crit_paths; std::vector> crit_nets; std::vector netnames; std::transform(ctx->nets.begin(), ctx->nets.end(), std::back_inserter(netnames), - [](const std::pair> &kv){ - return kv.first; - }); + [](const std::pair> &kv) { return kv.first; }); ctx->sorted_shuffle(netnames); for (auto net : netnames) { if (crit_nets.size() >= max_count) break; if (!net_crit.count(net)) continue; - auto crit_user = std::max_element(net_crit[net].criticality.begin(), - net_crit[net].criticality.end()); + auto crit_user = std::max_element(net_crit[net].criticality.begin(), net_crit[net].criticality.end()); if (*crit_user > crit_thresh) - crit_nets.push_back(std::make_pair(ctx->nets[net].get(), crit_user - net_crit[net].criticality.begin())); + crit_nets.push_back( + std::make_pair(ctx->nets[net].get(), crit_user - net_crit[net].criticality.begin())); } auto port_user_index = [](CellInfo *cell, PortInfo &port) -> size_t { @@ -296,15 +305,15 @@ class TimingOptimiser } NPNR_ASSERT_FALSE("port user not found on net"); }; - std::unordered_set used_ports; + std::unordered_set used_ports; for (auto crit_net : crit_nets) { - std::deque crit_path; + std::deque crit_path; // FIXME: This will fail badly on combinational loops // Iterate backwards following greatest criticality - NetInfo* back_cursor = crit_net.first; + NetInfo *back_cursor = crit_net.first; while (back_cursor != nullptr) { float max_crit = 0; std::pair crit_sink{nullptr, 0}; @@ -372,7 +381,6 @@ class TimingOptimiser crit_sink = std::make_pair(pn, i); } } - } if (crit_sink.first != nullptr) { fwd_cursor = &(crit_sink.first->users.at(crit_sink.second)); @@ -382,7 +390,7 @@ class TimingOptimiser } } - std::vector crit_path_vec; + std::vector crit_path_vec; std::copy(crit_path.begin(), crit_path.end(), std::back_inserter(crit_path_vec)); crit_paths.push_back(crit_path_vec); } @@ -390,7 +398,8 @@ class TimingOptimiser return crit_paths; } - void optimise_path(std::vector &path) { + void optimise_path(std::vector &path) + { path_cells.clear(); cell_neighbour_bels.clear(); bel_candidate_cells.clear(); @@ -404,12 +413,13 @@ class TimingOptimiser for (size_t i = 0; i < pn->users.size(); i++) if (pn->users.at(i).cell == port->cell && pn->users.at(i).port == port->port) crit = net_crit.at(pn->name).criticality.at(i); - log_info(" %s.%s at %s crit %0.02f\n", port->cell->name.c_str(ctx), port->port.c_str(ctx), ctx->getBelName(port->cell->bel).c_str(ctx), crit); - + log_info(" %s.%s at %s crit %0.02f\n", port->cell->name.c_str(ctx), port->port.c_str(ctx), + ctx->getBelName(port->cell->bel).c_str(ctx), crit); } if (std::find(path_cells.begin(), path_cells.end(), port->cell->name) != path_cells.end()) continue; - if (port->cell->belStrength > STRENGTH_WEAK || !cfg.cellTypes.count(port->cell->type) || port->cell->constr_parent != nullptr || !port->cell->constr_children.empty()) + if (port->cell->belStrength > STRENGTH_WEAK || !cfg.cellTypes.count(port->cell->type) || + port->cell->constr_parent != nullptr || !port->cell->constr_children.empty()) continue; if (ctx->debug) log_info(" can move\n"); @@ -452,7 +462,7 @@ class TimingOptimiser // Swap for legality check CellInfo *cell = ctx->cells.at(path_cells.front()).get(); BelId origBel = cell_swap_bel(cell, startbel); - std::vector> move{std::make_pair(cell, origBel)}; + std::vector> move{std::make_pair(cell, origBel)}; if (acceptable_move(move)) { auto entry = std::make_pair(0, startbel); visit.push(entry); @@ -462,7 +472,7 @@ class TimingOptimiser cell_swap_bel(cell, origBel); } - while(!visit.empty()) { + while (!visit.empty()) { auto entry = visit.front(); visit.pop(); auto cellname = path_cells.at(entry.first); @@ -500,13 +510,14 @@ class TimingOptimiser // Check the new cumulative delay auto port_pair = cost_ports.at(ncname); - delay_t edge_delay = ctx->estimateDelay(ctx->getBelPinWire(port_pair.first->cell->bel, port_pair.first->port), - ctx->getBelPinWire(port_pair.second->cell->bel, port_pair.second->port)); + delay_t edge_delay = + ctx->estimateDelay(ctx->getBelPinWire(port_pair.first->cell->bel, port_pair.first->port), + ctx->getBelPinWire(port_pair.second->cell->bel, port_pair.second->port)); delay_t total_delay = cdelay + edge_delay; // First, check if the move is actually worthwhile from a delay point of view before the expensive // legality check - if (!cumul_costs.count(ncname) || !cumul_costs.at(ncname).count(neighbour) - || cumul_costs.at(ncname).at(neighbour) > total_delay) { + if (!cumul_costs.count(ncname) || !cumul_costs.at(ncname).count(neighbour) || + cumul_costs.at(ncname).at(neighbour) > total_delay) { // Now check that the swaps we have made to get here are legal and meet max delay requirements if (acceptable_move(move)) { cumul_costs[ncname][neighbour] = total_delay; @@ -531,10 +542,10 @@ class TimingOptimiser if (cumul_costs.count(path_cells.back())) { // Find the end position with the lowest total delay auto &end_options = cumul_costs.at(path_cells.back()); - auto lowest = std::min_element(end_options.begin(), end_options.end(), [](const std::pair &a, - const std::pair &b) { - return a.second < b.second; - }); + auto lowest = std::min_element(end_options.begin(), end_options.end(), + [](const std::pair &a, const std::pair &b) { + return a.second < b.second; + }); NPNR_ASSERT(lowest != end_options.end()); std::vector> route_to_solution; @@ -549,7 +560,7 @@ class TimingOptimiser for (auto rt_entry : boost::adaptors::reverse(route_to_solution)) { CellInfo *cell = ctx->cells.at(rt_entry.first).get(); cell_swap_bel(cell, rt_entry.second); - if(ctx->debug) + if (ctx->debug) log_info(" %s at %s\n", rt_entry.first.c_str(ctx), ctx->getBelName(rt_entry.second).c_str(ctx)); } @@ -571,7 +582,6 @@ class TimingOptimiser NetCriticalityMap net_crit; Context *ctx; TimingOptCfg cfg; - }; bool timing_opt(Context *ctx, TimingOptCfg cfg) { return TimingOptimiser(ctx, cfg).optimise(); } diff --git a/ice40/arch.cc b/ice40/arch.cc index 5cd55774..98e6d4c7 100644 --- a/ice40/arch.cc +++ b/ice40/arch.cc @@ -26,8 +26,8 @@ #include "nextpnr.h" #include "placer1.h" #include "router1.h" -#include "util.h" #include "timing_opt.h" +#include "util.h" NEXTPNR_NAMESPACE_BEGIN @@ -627,8 +627,9 @@ bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay // ----------------------------------------------------------------------- -bool Arch::place() { - if(!placer1(getCtx(), Placer1Cfg(getCtx()))) +bool Arch::place() +{ + if (!placer1(getCtx(), Placer1Cfg(getCtx()))) return false; TimingOptCfg tocfg(getCtx()); tocfg.cellTypes.insert(id_ICESTORM_LC); From 0f40e5fe8ce29bf55a943f7f0ff288a5e78dde6b Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 2 Dec 2018 13:47:56 +0000 Subject: [PATCH 10/17] timing: Fixes to criticality calculation Signed-off-by: David Shah --- common/timing.cc | 16 ++++++++++++++++ common/timing_opt.cc | 12 +++++++++--- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/common/timing.cc b/common/timing.cc index e90718d8..000a36b7 100644 --- a/common/timing.cc +++ b/common/timing.cc @@ -599,6 +599,22 @@ struct Timing nc.cd_worst_slack = std::min(nc.cd_worst_slack, worst_slack.at(startdomain.first)); } } + + if (ctx->debug) { + for (auto &nc : *net_crit) { + NetInfo *net = ctx->nets.at(nc.first).get(); + log_info("Net %s maxlen %d worst_slack %.02fns: \n", nc.first.c_str(ctx), nc.second.max_path_length, + ctx->getDelayNS(nc.second.cd_worst_slack)); + if (!nc.second.criticality.empty() && !nc.second.slack.empty()) { + for (size_t i = 0; i < net->users.size(); i++) { + log_info(" user %s.%s slack %.02fns crit %.03f\n", net->users.at(i).cell->name.c_str(ctx), + net->users.at(i).port.c_str(ctx), ctx->getDelayNS(nc.second.slack.at(i)), + nc.second.criticality.at(i)); + } + } + log_break(); + } + } } return min_slack; } diff --git a/common/timing_opt.cc b/common/timing_opt.cc index 950cbbbd..300ca06f 100644 --- a/common/timing_opt.cc +++ b/common/timing_opt.cc @@ -90,7 +90,7 @@ class TimingOptimiser log_info(" Iteration %d...\n", i); get_criticalities(ctx, &net_crit); setup_delay_limits(); - auto crit_paths = find_crit_paths(0.98, 1000); + auto crit_paths = find_crit_paths(0.92, 1000); for (auto &path : crit_paths) optimise_path(path); #if 1 @@ -372,6 +372,9 @@ class TimingOptimiser TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount); if (tpclass != TMG_COMB_OUTPUT) continue; + bool is_path = ctx->getCellDelay(cell, fwd_cursor->port, port.first, combDelay); + if (!is_path) + continue; auto &crits = net_crit.at(pn->name).criticality; for (size_t i = 0; i < crits.size(); i++) { if (used_ports.count(&(pn->users.at(i)))) @@ -427,8 +430,11 @@ class TimingOptimiser } if (path_cells.size() < 3) { - log_info("Too few moveable cells; skipping path\n"); - log_break(); + if (ctx->debug) { + log_info("Too few moveable cells; skipping path\n"); + log_break(); + } + return; } IdString last_cell; From f3adf5a576a881e39cf78e599cbcd7ed3d3b8ec1 Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 2 Dec 2018 14:08:11 +0000 Subject: [PATCH 11/17] timing_opt: Make an optional pass controlled by command line Signed-off-by: David Shah --- ice40/arch.cc | 11 ++++++++--- ice40/main.cc | 3 +++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/ice40/arch.cc b/ice40/arch.cc index 98e6d4c7..9dbc78bb 100644 --- a/ice40/arch.cc +++ b/ice40/arch.cc @@ -631,9 +631,14 @@ bool Arch::place() { if (!placer1(getCtx(), Placer1Cfg(getCtx()))) return false; - TimingOptCfg tocfg(getCtx()); - tocfg.cellTypes.insert(id_ICESTORM_LC); - return timing_opt(getCtx(), tocfg); + if(bool_or_default(settings, id("opt_timing"), false)) { + TimingOptCfg tocfg(getCtx()); + tocfg.cellTypes.insert(id_ICESTORM_LC); + return timing_opt(getCtx(), tocfg); + } else { + return true; + } + } bool Arch::route() { return router1(getCtx(), Router1Cfg(getCtx())); } diff --git a/ice40/main.cc b/ice40/main.cc index 4b6a9e42..543bd229 100644 --- a/ice40/main.cc +++ b/ice40/main.cc @@ -68,6 +68,7 @@ po::options_description Ice40CommandHandler::getArchOptions() specific.add_options()("promote-logic", "enable promotion of 'logic' globals (in addition to clk/ce/sr by default)"); specific.add_options()("no-promote-globals", "disable all global promotion"); + specific.add_options()("opt-timing", "run post-placement timing optimisation pass (experimental)"); specific.add_options()("tmfuzz", "run path delay estimate fuzzer"); return specific; } @@ -161,6 +162,8 @@ std::unique_ptr Ice40CommandHandler::createContext() ctx->settings[ctx->id("promote_logic")] = "1"; if (vm.count("no-promote-globals")) ctx->settings[ctx->id("no_promote_globals")] = "1"; + if (vm.count("opt-timing")) + ctx->settings[ctx->id("opt_timing")] = "1"; return ctx; } From a990a1576cc3b932ec784a0d9863f0ba9c337b0f Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 2 Dec 2018 14:14:44 +0000 Subject: [PATCH 12/17] timing_opt: Fix criticality and cost calculations Signed-off-by: David Shah --- common/timing.cc | 13 +++++++++++++ common/timing_opt.cc | 40 +++++++++++++++++----------------------- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/common/timing.cc b/common/timing.cc index 000a36b7..18caa989 100644 --- a/common/timing.cc +++ b/common/timing.cc @@ -531,6 +531,10 @@ struct Timing bool is_path = ctx->getCellDelay(drv.cell, port.first, drv.port, comb_delay); if (!is_path) continue; + int cc; + auto pclass = ctx->getPortTimingClass(drv.cell, port.first, cc); + if (pclass != TMG_COMB_INPUT) + continue; NetInfo *sink_net = port.second.net; if (net_data.count(sink_net) && net_data.at(sink_net).count(startdomain.first)) { auto &sink_nd = net_data.at(sink_net).at(startdomain.first); @@ -562,15 +566,24 @@ struct Timing auto &nc = (*net_crit)[net->name]; if (nc.slack.empty()) nc.slack.resize(net->users.size(), std::numeric_limits::max()); + if (ctx->debug) + log_info("Net %s cd %s\n", net->name.c_str(ctx), startdomain.first.clock.c_str(ctx)); for (size_t i = 0; i < net->users.size(); i++) { delay_t slack = nd.min_required.at(i) - (nd.max_arrival + ctx->getNetinfoRouteDelay(net, net->users.at(i))); + if (ctx->debug) + log_info(" user %s.%s required %.02fns arrival %.02f route %.02f slack %.02f\n", + net->users.at(i).cell->name.c_str(ctx), net->users.at(i).port.c_str(ctx), + ctx->getDelayNS(nd.min_required.at(i)), ctx->getDelayNS(nd.max_arrival), + ctx->getDelayNS(ctx->getNetinfoRouteDelay(net, net->users.at(i))), ctx->getDelayNS(slack)); if (worst_slack.count(startdomain.first)) worst_slack.at(startdomain.first) = std::min(worst_slack.at(startdomain.first), slack); else worst_slack[startdomain.first] = slack; nc.slack.at(i) = std::min(nc.slack.at(i), slack); } + if (ctx->debug) + log_break(); } } // Assign criticality values diff --git a/common/timing_opt.cc b/common/timing_opt.cc index 300ca06f..ed1618da 100644 --- a/common/timing_opt.cc +++ b/common/timing_opt.cc @@ -90,7 +90,7 @@ class TimingOptimiser log_info(" Iteration %d...\n", i); get_criticalities(ctx, &net_crit); setup_delay_limits(); - auto crit_paths = find_crit_paths(0.92, 1000); + auto crit_paths = find_crit_paths(0.95, 1000); for (auto &path : crit_paths) optimise_path(path); #if 1 @@ -438,25 +438,12 @@ class TimingOptimiser return; } IdString last_cell; - const int d = 3; // FIXME: how to best determine d + const int d = 4; // FIXME: how to best determine d for (auto cell : path_cells) { // FIXME: when should we allow swapping due to a lack of candidates find_neighbours(ctx->cells[cell].get(), last_cell, d, false); last_cell = cell; } - // Map cells that we will actually modify to the arc we will use for cost - // calculation - // for delay calc purposes - std::unordered_map> cost_ports; - PortRef *last_port = nullptr; - auto pcell = path_cells.begin(); - for (auto port : path) { - if (port->cell->name == *pcell) { - cost_ports[*pcell] = std::make_pair(last_port, port); - pcell++; - } - last_port = port; - } // Actual BFS path optimisation algorithm std::unordered_map> cumul_costs; @@ -501,8 +488,6 @@ class TimingOptimiser move.push_back(std::make_pair(cell, origBel)); } - delay_t cdelay = cumul_costs[cellname][entry.second]; - // Have a look at where we can travel from here for (auto neighbour : cell_neighbour_bels.at(path_cells.at(entry.first + 1))) { // Edges between overlapping bels are deleted @@ -514,12 +499,21 @@ class TimingOptimiser BelId origBel = cell_swap_bel(next_cell, neighbour); move.push_back(std::make_pair(next_cell, origBel)); - // Check the new cumulative delay - auto port_pair = cost_ports.at(ncname); - delay_t edge_delay = - ctx->estimateDelay(ctx->getBelPinWire(port_pair.first->cell->bel, port_pair.first->port), - ctx->getBelPinWire(port_pair.second->cell->bel, port_pair.second->port)); - delay_t total_delay = cdelay + edge_delay; + delay_t total_delay = 0; + + for (size_t i = 0; i < path.size(); i++) { + NetInfo *pn = path.at(i)->cell->ports.at(path.at(i)->port).net; + for (size_t j = 0; j < pn->users.size(); j++) { + auto & usr = pn->users.at(j); + if (usr.cell == path.at(i)->cell && usr.port == path.at(i)->port) { + total_delay += ctx->predictDelay(pn, usr); + break; + } + } + if (path.at(i)->cell == next_cell) + break; + } + // First, check if the move is actually worthwhile from a delay point of view before the expensive // legality check if (!cumul_costs.count(ncname) || !cumul_costs.at(ncname).count(neighbour) || From f53dc8d3c9735b4d9c50db1848de9dd3fefbe7ef Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 2 Dec 2018 15:49:24 +0000 Subject: [PATCH 13/17] timing_opt: Improve heuristics Signed-off-by: David Shah --- common/timing.cc | 7 ++++- common/timing_opt.cc | 63 +++++++++++++++++++++++++------------ ice40/picorv32_benchmark.py | 2 +- 3 files changed, 50 insertions(+), 22 deletions(-) diff --git a/common/timing.cc b/common/timing.cc index 18caa989..6965307d 100644 --- a/common/timing.cc +++ b/common/timing.cc @@ -566,16 +566,20 @@ struct Timing auto &nc = (*net_crit)[net->name]; if (nc.slack.empty()) nc.slack.resize(net->users.size(), std::numeric_limits::max()); +#if 0 if (ctx->debug) log_info("Net %s cd %s\n", net->name.c_str(ctx), startdomain.first.clock.c_str(ctx)); +#endif for (size_t i = 0; i < net->users.size(); i++) { delay_t slack = nd.min_required.at(i) - (nd.max_arrival + ctx->getNetinfoRouteDelay(net, net->users.at(i))); +#if 0 if (ctx->debug) log_info(" user %s.%s required %.02fns arrival %.02f route %.02f slack %.02f\n", net->users.at(i).cell->name.c_str(ctx), net->users.at(i).port.c_str(ctx), ctx->getDelayNS(nd.min_required.at(i)), ctx->getDelayNS(nd.max_arrival), ctx->getDelayNS(ctx->getNetinfoRouteDelay(net, net->users.at(i))), ctx->getDelayNS(slack)); +#endif if (worst_slack.count(startdomain.first)) worst_slack.at(startdomain.first) = std::min(worst_slack.at(startdomain.first), slack); else @@ -612,7 +616,7 @@ struct Timing nc.cd_worst_slack = std::min(nc.cd_worst_slack, worst_slack.at(startdomain.first)); } } - +#if 0 if (ctx->debug) { for (auto &nc : *net_crit) { NetInfo *net = ctx->nets.at(nc.first).get(); @@ -628,6 +632,7 @@ struct Timing log_break(); } } +#endif } return min_slack; } diff --git a/common/timing_opt.cc b/common/timing_opt.cc index ed1618da..e8bb7d4f 100644 --- a/common/timing_opt.cc +++ b/common/timing_opt.cc @@ -90,7 +90,7 @@ class TimingOptimiser log_info(" Iteration %d...\n", i); get_criticalities(ctx, &net_crit); setup_delay_limits(); - auto crit_paths = find_crit_paths(0.95, 1000); + auto crit_paths = find_crit_paths(0.98, 1000); for (auto &path : crit_paths) optimise_path(path); #if 1 @@ -140,27 +140,23 @@ class TimingOptimiser if (port.second.type == PORT_IN) { if (net->driver.cell == nullptr || net->driver.cell->bel == BelId()) continue; - BelId srcBel = net->driver.cell->bel; - if (ctx->estimateDelay(ctx->getBelPinWire(srcBel, net->driver.port), - ctx->getBelPinWire(cell->bel, port.first)) > - max_net_delay.at(std::make_pair(cell->name, port.first))) - return false; + for (auto user : net->users) { + if (user.cell == cell && user.port == port.first) { + if (ctx->predictDelay(net, user) > + 1.1 * max_net_delay.at(std::make_pair(cell->name, port.first))) + return false; + } + } + } else if (port.second.type == PORT_OUT) { for (auto user : net->users) { // This could get expensive for high-fanout nets?? BelId dstBel = user.cell->bel; if (dstBel == BelId()) continue; - if (ctx->estimateDelay(ctx->getBelPinWire(cell->bel, port.first), - ctx->getBelPinWire(dstBel, user.port)) > - max_net_delay.at(std::make_pair(user.cell->name, user.port))) { -#if 0 - if (ctx->debug) { - log_info(" est delay %.02fns exceeded maximum %.02fns\n", ctx->getDelayNS(ctx->estimateDelay(ctx->getBelPinWire(cell->bel, port.first), - ctx->getBelPinWire(dstBel, user.port))), - ctx->getDelayNS(max_net_delay.at(std::make_pair(user.cell->name, user.port)))); - } -#endif + if (ctx->predictDelay(net, user) > + 1.1 * max_net_delay.at(std::make_pair(user.cell->name, user.port))) { + return false; } } @@ -370,7 +366,7 @@ class TimingOptimiser int ccount; DelayInfo combDelay; TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount); - if (tpclass != TMG_COMB_OUTPUT) + if (tpclass != TMG_COMB_OUTPUT && tpclass != TMG_REGISTER_OUTPUT) continue; bool is_path = ctx->getCellDelay(cell, fwd_cursor->port, port.first, combDelay); if (!is_path) @@ -408,6 +404,17 @@ class TimingOptimiser bel_candidate_cells.clear(); if (ctx->debug) log_info("Optimising the following path: \n"); + + auto front_port = path.front(); + NetInfo *front_net = front_port->cell->ports.at(front_port->port).net; + if (front_net != nullptr && front_net->driver.cell != nullptr) { + auto front_cell = front_net->driver.cell; + if (front_cell->belStrength <= STRENGTH_WEAK && cfg.cellTypes.count(front_cell->type) && + front_cell->constr_parent == nullptr && front_cell->constr_children.empty()) { + path_cells.push_back(front_cell->name); + } + } + for (auto port : path) { if (ctx->debug) { float crit = 0; @@ -429,7 +436,7 @@ class TimingOptimiser path_cells.push_back(port->cell->name); } - if (path_cells.size() < 3) { + if (path_cells.size() < 2) { if (ctx->debug) { log_info("Too few moveable cells; skipping path\n"); log_break(); @@ -437,8 +444,23 @@ class TimingOptimiser return; } + + // Calculate original delay before touching anything + delay_t original_delay = 0; + + for (size_t i = 0; i < path.size(); i++) { + NetInfo *pn = path.at(i)->cell->ports.at(path.at(i)->port).net; + for (size_t j = 0; j < pn->users.size(); j++) { + auto & usr = pn->users.at(j); + if (usr.cell == path.at(i)->cell && usr.port == path.at(i)->port) { + original_delay += ctx->predictDelay(pn, usr); + break; + } + } + } + IdString last_cell; - const int d = 4; // FIXME: how to best determine d + const int d = 3; // FIXME: how to best determine d for (auto cell : path_cells) { // FIXME: when should we allow swapping due to a lack of candidates find_neighbours(ctx->cells[cell].get(), last_cell, d, false); @@ -556,7 +578,8 @@ class TimingOptimiser route_to_solution.push_back(cursor); } if (ctx->debug) - log_info("Found a solution with cost %.02f ns\n", ctx->getDelayNS(lowest->second)); + log_info("Found a solution with cost %.02f ns (existing path %.02f ns)\n", ctx->getDelayNS(lowest->second), + ctx->getDelayNS(original_delay)); for (auto rt_entry : boost::adaptors::reverse(route_to_solution)) { CellInfo *cell = ctx->cells.at(rt_entry.first).get(); cell_swap_bel(cell, rt_entry.second); diff --git a/ice40/picorv32_benchmark.py b/ice40/picorv32_benchmark.py index a4ec581e..5e4fc2e1 100755 --- a/ice40/picorv32_benchmark.py +++ b/ice40/picorv32_benchmark.py @@ -22,7 +22,7 @@ for i in range(num_runs): ascfile = "picorv32_work/picorv32_s{}.asc".format(run) if path.exists(ascfile): os.remove(ascfile) - result = subprocess.run(["../nextpnr-ice40", "--hx8k", "--seed", str(run), "--json", "picorv32.json", "--asc", ascfile, "--freq", "70"], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL) + result = subprocess.run(["../nextpnr-ice40", "--hx8k", "--seed", str(run), "--json", "picorv32.json", "--asc", ascfile, "--freq", "40", "--opt-timing"], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL) if result.returncode != 0: print("Run {} failed!".format(run)) else: From 745960fa858b91dc27371971770a0abd8ca244dc Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 2 Dec 2018 16:34:58 +0000 Subject: [PATCH 14/17] timing_opt: Neighbour related fixes Signed-off-by: David Shah --- common/timing_opt.cc | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/common/timing_opt.cc b/common/timing_opt.cc index e8bb7d4f..851a20d7 100644 --- a/common/timing_opt.cc +++ b/common/timing_opt.cc @@ -86,11 +86,11 @@ class TimingOptimiser #if 1 timing_analysis(ctx, false, true, false, false); #endif - for (int i = 0; i < 20; i++) { + for (int i = 0; i < 100; i++) { log_info(" Iteration %d...\n", i); get_criticalities(ctx, &net_crit); setup_delay_limits(); - auto crit_paths = find_crit_paths(0.98, 1000); + auto crit_paths = find_crit_paths(0.98, 50000); for (auto &path : crit_paths) optimise_path(path); #if 1 @@ -122,7 +122,7 @@ class TimingOptimiser delay_t net_delay = ctx->getNetinfoRouteDelay(ni, usr); if (nc.max_path_length != 0) { max_net_delay[std::make_pair(usr.cell->name, usr.port)] = - net_delay + ((nc.slack.at(i) - nc.cd_worst_slack) / nc.max_path_length); + net_delay + ((nc.slack.at(i) - nc.cd_worst_slack) / 10); } } } @@ -168,6 +168,8 @@ class TimingOptimiser BelId cell_swap_bel(CellInfo *cell, BelId newBel) { BelId oldBel = cell->bel; + if (oldBel == newBel) + return oldBel; CellInfo *other_cell = ctx->getBoundBelCell(newBel); NPNR_ASSERT(other_cell == nullptr || other_cell->belStrength <= STRENGTH_WEAK); ctx->unbindBel(oldBel); @@ -221,14 +223,14 @@ class TimingOptimiser CellInfo *bound = ctx->getBoundBelCell(bel); if (bound == nullptr) { free_bels_at_loc.push_back(bel); - } else if (bound->belStrength <= STRENGTH_WEAK || bound->constr_parent != nullptr || - !bound->constr_children.empty()) { + } else if (bound->belStrength <= STRENGTH_WEAK && bound->constr_parent == nullptr && + bound->constr_children.empty()) { bound_bels_at_loc.push_back(bel); } } BelId candidate; - while (!free_bels_at_loc.empty() && !bound_bels_at_loc.empty()) { + while (!free_bels_at_loc.empty() || !bound_bels_at_loc.empty()) { BelId try_bel; if (!free_bels_at_loc.empty()) { int try_idx = ctx->rng(int(free_bels_at_loc.size())); @@ -244,7 +246,7 @@ class TimingOptimiser // edges in the graph), or if allow_swap is true to deal with cases where overlap means few // neighbours are identified if (bel_candidate_cells.at(try_bel).size() > 1 || - (bel_candidate_cells.at(try_bel).size() == 0 || + (bel_candidate_cells.at(try_bel).size() == 1 && *(bel_candidate_cells.at(try_bel).begin()) != prev_cell)) continue; } @@ -304,6 +306,10 @@ class TimingOptimiser std::unordered_set used_ports; for (auto crit_net : crit_nets) { + + if (used_ports.count(&(crit_net.first->users.at(crit_net.second)))) + continue; + std::deque crit_path; // FIXME: This will fail badly on combinational loops @@ -460,13 +466,22 @@ class TimingOptimiser } IdString last_cell; - const int d = 3; // FIXME: how to best determine d + const int d = 5; // FIXME: how to best determine d for (auto cell : path_cells) { // FIXME: when should we allow swapping due to a lack of candidates find_neighbours(ctx->cells[cell].get(), last_cell, d, false); last_cell = cell; } + if (ctx->debug) { + for (auto cell : path_cells) { + log_info("Candidate neighbours for %s (%s):\n", cell.c_str(ctx), ctx->getBelName(ctx->cells[cell]->bel).c_str(ctx)); + for (auto neigh : cell_neighbour_bels.at(cell)) { + log_info(" %s\n", ctx->getBelName(neigh).c_str(ctx)); + } + } + } + // Actual BFS path optimisation algorithm std::unordered_map> cumul_costs; std::unordered_map, std::pair> backtrace; From 2b84b33cd697a1dfa46ed4bc231644177add2b83 Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 2 Dec 2018 16:43:11 +0000 Subject: [PATCH 15/17] timing_opt: Reduce search diameter to 2 Signed-off-by: David Shah --- common/timing_opt.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/timing_opt.cc b/common/timing_opt.cc index 851a20d7..6aa120ae 100644 --- a/common/timing_opt.cc +++ b/common/timing_opt.cc @@ -466,7 +466,7 @@ class TimingOptimiser } IdString last_cell; - const int d = 5; // FIXME: how to best determine d + const int d = 2; // FIXME: how to best determine d for (auto cell : path_cells) { // FIXME: when should we allow swapping due to a lack of candidates find_neighbours(ctx->cells[cell].get(), last_cell, d, false); From 56dfd5564a2581bcb04a927cfc3161acae662064 Mon Sep 17 00:00:00 2001 From: David Shah Date: Wed, 5 Dec 2018 12:31:35 +0000 Subject: [PATCH 16/17] timing: Fix xclock crit calc and compiler warnings Signed-off-by: David Shah --- common/timing.cc | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/common/timing.cc b/common/timing.cc index 6965307d..b15327fb 100644 --- a/common/timing.cc +++ b/common/timing.cc @@ -96,8 +96,8 @@ struct Timing delay_t min_slack; CriticalPathMap *crit_path; DelayFrequency *slack_histogram; - IdString async_clock; NetCriticalityMap *net_crit; + IdString async_clock; struct TimingData { @@ -472,8 +472,6 @@ struct Timing continue; if (startdomain.first.clock == async_clock) continue; - const delay_t net_length_plus_one = nd.max_path_length + 1; - auto &net_min_remaining_budget = nd.min_remaining_budget; if (nd.min_required.empty()) nd.min_required.resize(net->users.size(), std::numeric_limits::max()); delay_t net_min_required = std::numeric_limits::max(); @@ -584,7 +582,7 @@ struct Timing worst_slack.at(startdomain.first) = std::min(worst_slack.at(startdomain.first), slack); else worst_slack[startdomain.first] = slack; - nc.slack.at(i) = std::min(nc.slack.at(i), slack); + nc.slack.at(i) = slack; } if (ctx->debug) log_break(); @@ -610,10 +608,10 @@ struct Timing delay_t dmax = crit_path->at(ClockPair{startdomain.first, startdomain.first}).path_delay; for (size_t i = 0; i < net->users.size(); i++) { float criticality = 1.0f - (float(nc.slack.at(i) - worst_slack.at(startdomain.first)) / dmax); - nc.criticality.at(i) = std::max(nc.criticality.at(i), criticality); + nc.criticality.at(i) = criticality; } - nc.max_path_length = std::max(nc.max_path_length, nd.max_path_length); - nc.cd_worst_slack = std::min(nc.cd_worst_slack, worst_slack.at(startdomain.first)); + nc.max_path_length = nd.max_path_length; + nc.cd_worst_slack = worst_slack.at(startdomain.first); } } #if 0 From b732e42fa312b83bee6c122d69e0a171afca779c Mon Sep 17 00:00:00 2001 From: David Shah Date: Thu, 6 Dec 2018 11:00:16 +0000 Subject: [PATCH 17/17] timing_opt: Reduce iterations to 30, tidy up logging Signed-off-by: David Shah --- common/timing_opt.cc | 28 ++++++++++++---------------- ice40/arch.cc | 3 +-- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/common/timing_opt.cc b/common/timing_opt.cc index 6aa120ae..a451bfa0 100644 --- a/common/timing_opt.cc +++ b/common/timing_opt.cc @@ -83,27 +83,22 @@ class TimingOptimiser bool optimise() { log_info("Running timing-driven placement optimisation...\n"); -#if 1 - timing_analysis(ctx, false, true, false, false); -#endif - for (int i = 0; i < 100; i++) { + if (ctx->verbose) + timing_analysis(ctx, false, true, false, false); + for (int i = 0; i < 30; i++) { log_info(" Iteration %d...\n", i); get_criticalities(ctx, &net_crit); setup_delay_limits(); auto crit_paths = find_crit_paths(0.98, 50000); for (auto &path : crit_paths) optimise_path(path); -#if 1 - timing_analysis(ctx, false, true, false, false); -#endif + if (ctx->verbose) + timing_analysis(ctx, false, true, false, false); } return true; } private: - // Ratio of available to already-candidates to begin borrowing - const float borrow_thresh = 0.2; - void setup_delay_limits() { max_net_delay.clear(); @@ -416,7 +411,7 @@ class TimingOptimiser if (front_net != nullptr && front_net->driver.cell != nullptr) { auto front_cell = front_net->driver.cell; if (front_cell->belStrength <= STRENGTH_WEAK && cfg.cellTypes.count(front_cell->type) && - front_cell->constr_parent == nullptr && front_cell->constr_children.empty()) { + front_cell->constr_parent == nullptr && front_cell->constr_children.empty()) { path_cells.push_back(front_cell->name); } } @@ -457,7 +452,7 @@ class TimingOptimiser for (size_t i = 0; i < path.size(); i++) { NetInfo *pn = path.at(i)->cell->ports.at(path.at(i)->port).net; for (size_t j = 0; j < pn->users.size(); j++) { - auto & usr = pn->users.at(j); + auto &usr = pn->users.at(j); if (usr.cell == path.at(i)->cell && usr.port == path.at(i)->port) { original_delay += ctx->predictDelay(pn, usr); break; @@ -475,7 +470,8 @@ class TimingOptimiser if (ctx->debug) { for (auto cell : path_cells) { - log_info("Candidate neighbours for %s (%s):\n", cell.c_str(ctx), ctx->getBelName(ctx->cells[cell]->bel).c_str(ctx)); + log_info("Candidate neighbours for %s (%s):\n", cell.c_str(ctx), + ctx->getBelName(ctx->cells[cell]->bel).c_str(ctx)); for (auto neigh : cell_neighbour_bels.at(cell)) { log_info(" %s\n", ctx->getBelName(neigh).c_str(ctx)); } @@ -541,7 +537,7 @@ class TimingOptimiser for (size_t i = 0; i < path.size(); i++) { NetInfo *pn = path.at(i)->cell->ports.at(path.at(i)->port).net; for (size_t j = 0; j < pn->users.size(); j++) { - auto & usr = pn->users.at(j); + auto &usr = pn->users.at(j); if (usr.cell == path.at(i)->cell && usr.port == path.at(i)->port) { total_delay += ctx->predictDelay(pn, usr); break; @@ -593,8 +589,8 @@ class TimingOptimiser route_to_solution.push_back(cursor); } if (ctx->debug) - log_info("Found a solution with cost %.02f ns (existing path %.02f ns)\n", ctx->getDelayNS(lowest->second), - ctx->getDelayNS(original_delay)); + log_info("Found a solution with cost %.02f ns (existing path %.02f ns)\n", + ctx->getDelayNS(lowest->second), ctx->getDelayNS(original_delay)); for (auto rt_entry : boost::adaptors::reverse(route_to_solution)) { CellInfo *cell = ctx->cells.at(rt_entry.first).get(); cell_swap_bel(cell, rt_entry.second); diff --git a/ice40/arch.cc b/ice40/arch.cc index 9dbc78bb..8f52987c 100644 --- a/ice40/arch.cc +++ b/ice40/arch.cc @@ -631,14 +631,13 @@ bool Arch::place() { if (!placer1(getCtx(), Placer1Cfg(getCtx()))) return false; - if(bool_or_default(settings, id("opt_timing"), false)) { + if (bool_or_default(settings, id("opt_timing"), false)) { TimingOptCfg tocfg(getCtx()); tocfg.cellTypes.insert(id_ICESTORM_LC); return timing_opt(getCtx(), tocfg); } else { return true; } - } bool Arch::route() { return router1(getCtx(), Router1Cfg(getCtx())); }