diff --git a/common/timing.cc b/common/timing.cc index 88ab14c2..b15327fb 100644 --- a/common/timing.cc +++ b/common/timing.cc @@ -86,6 +86,7 @@ struct CriticalPath }; typedef std::unordered_map CriticalPathMap; +typedef std::unordered_map NetCriticalityMap; struct Timing { @@ -95,6 +96,7 @@ struct Timing delay_t min_slack; CriticalPathMap *crit_path; DelayFrequency *slack_histogram; + NetCriticalityMap *net_crit; IdString async_clock; struct TimingData @@ -105,13 +107,15 @@ struct Timing unsigned max_path_length = 0; delay_t min_remaining_budget; bool false_startpoint = false; + std::vector min_required; std::unordered_map arrival_time; }; Timing(Context *ctx, bool net_delays, bool update, CriticalPathMap *crit_path = nullptr, - DelayFrequency *slack_histogram = nullptr) + DelayFrequency *slack_histogram = nullptr, NetCriticalityMap *net_crit = nullptr) : ctx(ctx), net_delays(net_delays), update(update), min_slack(1.0e12 / ctx->target_freq), - crit_path(crit_path), slack_histogram(slack_histogram), async_clock(ctx->id("$async$")) + crit_path(crit_path), slack_histogram(slack_histogram), net_crit(net_crit), + async_clock(ctx->id("$async$")) { } @@ -454,6 +458,180 @@ struct Timing std::reverse(cp_ports.begin(), cp_ports.end()); } } + + if (net_crit) { + NPNR_ASSERT(crit_path); + // Go through in reverse topographical order to set required times + for (auto net : boost::adaptors::reverse(topographical_order)) { + if (!net_data.count(net)) + continue; + auto &nd_map = net_data.at(net); + for (auto &startdomain : nd_map) { + auto &nd = startdomain.second; + if (nd.false_startpoint) + continue; + if (startdomain.first.clock == async_clock) + continue; + if (nd.min_required.empty()) + nd.min_required.resize(net->users.size(), std::numeric_limits::max()); + delay_t net_min_required = std::numeric_limits::max(); + for (size_t i = 0; i < net->users.size(); i++) { + auto &usr = net->users.at(i); + auto net_delay = ctx->getNetinfoRouteDelay(net, usr); + int port_clocks; + TimingPortClass portClass = ctx->getPortTimingClass(usr.cell, usr.port, port_clocks); + if (portClass == TMG_REGISTER_INPUT || portClass == TMG_ENDPOINT) { + auto process_endpoint = [&](IdString clksig, ClockEdge edge, delay_t setup) { + delay_t period; + // Set default period + if (edge == startdomain.first.edge) { + period = clk_period; + } else { + period = clk_period / 2; + } + if (clksig != async_clock) { + if (ctx->nets.at(clksig)->clkconstr) { + if (edge == startdomain.first.edge) { + // same edge + period = ctx->nets.at(clksig)->clkconstr->period.minDelay(); + } else if (edge == RISING_EDGE) { + // falling -> rising + period = ctx->nets.at(clksig)->clkconstr->low.minDelay(); + } else if (edge == FALLING_EDGE) { + // rising -> falling + period = ctx->nets.at(clksig)->clkconstr->high.minDelay(); + } + } + } + nd.min_required.at(i) = std::min(period - setup, nd.min_required.at(i)); + }; + if (portClass == TMG_REGISTER_INPUT) { + for (int j = 0; j < port_clocks; j++) { + TimingClockingInfo clkInfo = ctx->getPortClockingInfo(usr.cell, usr.port, j); + const NetInfo *clknet = get_net_or_empty(usr.cell, clkInfo.clock_port); + IdString clksig = clknet ? clknet->name : async_clock; + process_endpoint(clksig, clknet ? clkInfo.edge : RISING_EDGE, + clkInfo.setup.maxDelay()); + } + } else { + process_endpoint(async_clock, RISING_EDGE, 0); + } + } + net_min_required = std::min(net_min_required, nd.min_required.at(i) - net_delay); + } + PortRef &drv = net->driver; + if (drv.cell == nullptr) + continue; + for (const auto &port : drv.cell->ports) { + if (port.second.type != PORT_IN || !port.second.net) + continue; + DelayInfo comb_delay; + bool is_path = ctx->getCellDelay(drv.cell, port.first, drv.port, comb_delay); + if (!is_path) + continue; + int cc; + auto pclass = ctx->getPortTimingClass(drv.cell, port.first, cc); + if (pclass != TMG_COMB_INPUT) + continue; + NetInfo *sink_net = port.second.net; + if (net_data.count(sink_net) && net_data.at(sink_net).count(startdomain.first)) { + auto &sink_nd = net_data.at(sink_net).at(startdomain.first); + if (sink_nd.min_required.empty()) + sink_nd.min_required.resize(sink_net->users.size(), + std::numeric_limits::max()); + for (size_t i = 0; i < sink_net->users.size(); i++) { + auto &user = sink_net->users.at(i); + if (user.cell == drv.cell && user.port == port.first) { + sink_nd.min_required.at(i) = net_min_required - comb_delay.maxDelay(); + break; + } + } + } + } + } + } + std::unordered_map worst_slack; + + // Assign slack values + for (auto &net_entry : net_data) { + const NetInfo *net = net_entry.first; + for (auto &startdomain : net_entry.second) { + auto &nd = startdomain.second; + if (startdomain.first.clock == async_clock) + continue; + if (nd.min_required.empty()) + continue; + auto &nc = (*net_crit)[net->name]; + if (nc.slack.empty()) + nc.slack.resize(net->users.size(), std::numeric_limits::max()); +#if 0 + if (ctx->debug) + log_info("Net %s cd %s\n", net->name.c_str(ctx), startdomain.first.clock.c_str(ctx)); +#endif + for (size_t i = 0; i < net->users.size(); i++) { + delay_t slack = nd.min_required.at(i) - + (nd.max_arrival + ctx->getNetinfoRouteDelay(net, net->users.at(i))); +#if 0 + if (ctx->debug) + log_info(" user %s.%s required %.02fns arrival %.02f route %.02f slack %.02f\n", + net->users.at(i).cell->name.c_str(ctx), net->users.at(i).port.c_str(ctx), + ctx->getDelayNS(nd.min_required.at(i)), ctx->getDelayNS(nd.max_arrival), + ctx->getDelayNS(ctx->getNetinfoRouteDelay(net, net->users.at(i))), ctx->getDelayNS(slack)); +#endif + if (worst_slack.count(startdomain.first)) + worst_slack.at(startdomain.first) = std::min(worst_slack.at(startdomain.first), slack); + else + worst_slack[startdomain.first] = slack; + nc.slack.at(i) = slack; + } + if (ctx->debug) + log_break(); + } + } + // Assign criticality values + for (auto &net_entry : net_data) { + const NetInfo *net = net_entry.first; + for (auto &startdomain : net_entry.second) { + if (startdomain.first.clock == async_clock) + continue; + auto &nd = startdomain.second; + if (nd.min_required.empty()) + continue; + auto &nc = (*net_crit)[net->name]; + if (nc.slack.empty()) + continue; + if (nc.criticality.empty()) + nc.criticality.resize(net->users.size(), 0); + // Only consider intra-clock paths for criticality + if (!crit_path->count(ClockPair{startdomain.first, startdomain.first})) + continue; + delay_t dmax = crit_path->at(ClockPair{startdomain.first, startdomain.first}).path_delay; + for (size_t i = 0; i < net->users.size(); i++) { + float criticality = 1.0f - (float(nc.slack.at(i) - worst_slack.at(startdomain.first)) / dmax); + nc.criticality.at(i) = criticality; + } + nc.max_path_length = nd.max_path_length; + nc.cd_worst_slack = worst_slack.at(startdomain.first); + } + } +#if 0 + if (ctx->debug) { + for (auto &nc : *net_crit) { + NetInfo *net = ctx->nets.at(nc.first).get(); + log_info("Net %s maxlen %d worst_slack %.02fns: \n", nc.first.c_str(ctx), nc.second.max_path_length, + ctx->getDelayNS(nc.second.cd_worst_slack)); + if (!nc.second.criticality.empty() && !nc.second.slack.empty()) { + for (size_t i = 0; i < net->users.size(); i++) { + log_info(" user %s.%s slack %.02fns crit %.03f\n", net->users.at(i).cell->name.c_str(ctx), + net->users.at(i).port.c_str(ctx), ctx->getDelayNS(nc.second.slack.at(i)), + nc.second.criticality.at(i)); + } + } + log_break(); + } + } +#endif + } return min_slack; } @@ -766,4 +944,12 @@ void timing_analysis(Context *ctx, bool print_histogram, bool print_fmax, bool p } } +void get_criticalities(Context *ctx, NetCriticalityMap *net_crit) +{ + CriticalPathMap crit_paths; + net_crit->clear(); + Timing timing(ctx, true, true, &crit_paths, nullptr, net_crit); + timing.walk_paths(); +} + NEXTPNR_NAMESPACE_END diff --git a/common/timing.h b/common/timing.h index 42f928dc..f1d18e8a 100644 --- a/common/timing.h +++ b/common/timing.h @@ -32,6 +32,19 @@ void assign_budget(Context *ctx, bool quiet = false); void timing_analysis(Context *ctx, bool slack_histogram = true, bool print_fmax = true, bool print_path = false, bool warn_on_failure = false); +// Data for the timing optimisation algorithm +struct NetCriticalityInfo +{ + // One each per user + std::vector slack; + std::vector criticality; + unsigned max_path_length = 0; + delay_t cd_worst_slack = std::numeric_limits::max(); +}; + +typedef std::unordered_map NetCriticalityMap; +void get_criticalities(Context *ctx, NetCriticalityMap *net_crit); + NEXTPNR_NAMESPACE_END #endif diff --git a/common/timing_opt.cc b/common/timing_opt.cc new file mode 100644 index 00000000..a451bfa0 --- /dev/null +++ b/common/timing_opt.cc @@ -0,0 +1,623 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2018 David Shah + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +/* + * Timing-optimised detailed placement algorithm using BFS of the neighbour graph created from cells + * on a critical path + * + * Based on "An Effective Timing-Driven Detailed Placement Algorithm for FPGAs" + * https://www.cerc.utexas.edu/utda/publications/C205.pdf + * + * Modifications made to deal with the smaller Bels that nextpnr uses instead of swapping whole tiles, + * and deal with the fact that not every cell on the crit path may be swappable. + */ + +#include "timing_opt.h" +#include +#include +#include "nextpnr.h" +#include "timing.h" +#include "util.h" + +namespace std { + +template <> struct hash> +{ + std::size_t + operator()(const std::pair &idp) const + noexcept + { + std::size_t seed = 0; + boost::hash_combine(seed, hash()(idp.first)); + boost::hash_combine(seed, hash()(idp.second)); + return seed; + } +}; + +template <> struct hash> +{ + std::size_t operator()(const std::pair &idp) const noexcept + { + std::size_t seed = 0; + boost::hash_combine(seed, hash()(idp.first)); + boost::hash_combine(seed, hash()(idp.second)); + return seed; + } +}; + +template <> struct hash> +{ + std::size_t + operator()(const std::pair &idp) const noexcept + { + std::size_t seed = 0; + boost::hash_combine(seed, hash()(idp.first)); + boost::hash_combine(seed, hash()(idp.second)); + return seed; + } +}; +} // namespace std + +NEXTPNR_NAMESPACE_BEGIN + +class TimingOptimiser +{ + public: + TimingOptimiser(Context *ctx, TimingOptCfg cfg) : ctx(ctx), cfg(cfg){}; + bool optimise() + { + log_info("Running timing-driven placement optimisation...\n"); + if (ctx->verbose) + timing_analysis(ctx, false, true, false, false); + for (int i = 0; i < 30; i++) { + log_info(" Iteration %d...\n", i); + get_criticalities(ctx, &net_crit); + setup_delay_limits(); + auto crit_paths = find_crit_paths(0.98, 50000); + for (auto &path : crit_paths) + optimise_path(path); + if (ctx->verbose) + timing_analysis(ctx, false, true, false, false); + } + return true; + } + + private: + void setup_delay_limits() + { + max_net_delay.clear(); + for (auto net : sorted(ctx->nets)) { + NetInfo *ni = net.second; + for (auto usr : ni->users) { + max_net_delay[std::make_pair(usr.cell->name, usr.port)] = std::numeric_limits::max(); + } + if (!net_crit.count(net.first)) + continue; + auto &nc = net_crit.at(net.first); + if (nc.slack.empty()) + continue; + for (size_t i = 0; i < ni->users.size(); i++) { + auto &usr = ni->users.at(i); + delay_t net_delay = ctx->getNetinfoRouteDelay(ni, usr); + if (nc.max_path_length != 0) { + max_net_delay[std::make_pair(usr.cell->name, usr.port)] = + net_delay + ((nc.slack.at(i) - nc.cd_worst_slack) / 10); + } + } + } + } + + bool check_cell_delay_limits(CellInfo *cell) + { + for (const auto &port : cell->ports) { + int nc; + if (ctx->getPortTimingClass(cell, port.first, nc) == TMG_IGNORE) + continue; + NetInfo *net = port.second.net; + if (net == nullptr) + continue; + if (port.second.type == PORT_IN) { + if (net->driver.cell == nullptr || net->driver.cell->bel == BelId()) + continue; + for (auto user : net->users) { + if (user.cell == cell && user.port == port.first) { + if (ctx->predictDelay(net, user) > + 1.1 * max_net_delay.at(std::make_pair(cell->name, port.first))) + return false; + } + } + + } else if (port.second.type == PORT_OUT) { + for (auto user : net->users) { + // This could get expensive for high-fanout nets?? + BelId dstBel = user.cell->bel; + if (dstBel == BelId()) + continue; + if (ctx->predictDelay(net, user) > + 1.1 * max_net_delay.at(std::make_pair(user.cell->name, user.port))) { + + return false; + } + } + } + } + return true; + } + + BelId cell_swap_bel(CellInfo *cell, BelId newBel) + { + BelId oldBel = cell->bel; + if (oldBel == newBel) + return oldBel; + CellInfo *other_cell = ctx->getBoundBelCell(newBel); + NPNR_ASSERT(other_cell == nullptr || other_cell->belStrength <= STRENGTH_WEAK); + ctx->unbindBel(oldBel); + if (other_cell != nullptr) { + ctx->unbindBel(newBel); + ctx->bindBel(oldBel, other_cell, STRENGTH_WEAK); + } + ctx->bindBel(newBel, cell, STRENGTH_WEAK); + return oldBel; + } + + // Check that a series of moves are both legal and remain within maximum delay bounds + // Moves are specified as a vector of pairs + bool acceptable_move(std::vector> &move, bool check_delays = true) + { + for (auto &entry : move) { + if (!ctx->isBelLocationValid(entry.first->bel)) + return false; + if (!ctx->isBelLocationValid(entry.second)) + return false; + if (!check_delays) + continue; + if (!check_cell_delay_limits(entry.first)) + return false; + // We might have swapped another cell onto the original bel. Check this for max delay violations + // too + CellInfo *swapped = ctx->getBoundBelCell(entry.second); + if (swapped != nullptr && !check_cell_delay_limits(swapped)) + return false; + } + return true; + } + + int find_neighbours(CellInfo *cell, IdString prev_cell, int d, bool allow_swap) + { + BelId curr = cell->bel; + Loc curr_loc = ctx->getBelLocation(curr); + int found_count = 0; + cell_neighbour_bels[cell->name] = std::unordered_set{}; + for (int dy = -d; dy <= d; dy++) { + for (int dx = -d; dx <= d; dx++) { + // Go through all the Bels at this location + // First, find all bels of the correct type that are either unbound or bound normally + // Strongly bound bels are ignored + // FIXME: This means that we cannot touch carry chains or similar relatively constrained macros + std::vector free_bels_at_loc; + std::vector bound_bels_at_loc; + for (auto bel : ctx->getBelsByTile(curr_loc.x + dx, curr_loc.y + dy)) { + if (ctx->getBelType(bel) != cell->type) + continue; + CellInfo *bound = ctx->getBoundBelCell(bel); + if (bound == nullptr) { + free_bels_at_loc.push_back(bel); + } else if (bound->belStrength <= STRENGTH_WEAK && bound->constr_parent == nullptr && + bound->constr_children.empty()) { + bound_bels_at_loc.push_back(bel); + } + } + BelId candidate; + + while (!free_bels_at_loc.empty() || !bound_bels_at_loc.empty()) { + BelId try_bel; + if (!free_bels_at_loc.empty()) { + int try_idx = ctx->rng(int(free_bels_at_loc.size())); + try_bel = free_bels_at_loc.at(try_idx); + free_bels_at_loc.erase(free_bels_at_loc.begin() + try_idx); + } else { + int try_idx = ctx->rng(int(bound_bels_at_loc.size())); + try_bel = bound_bels_at_loc.at(try_idx); + bound_bels_at_loc.erase(bound_bels_at_loc.begin() + try_idx); + } + if (bel_candidate_cells.count(try_bel) && !allow_swap) { + // Overlap is only allowed if it is with the previous cell (this is handled by removing those + // edges in the graph), or if allow_swap is true to deal with cases where overlap means few + // neighbours are identified + if (bel_candidate_cells.at(try_bel).size() > 1 || + (bel_candidate_cells.at(try_bel).size() == 1 && + *(bel_candidate_cells.at(try_bel).begin()) != prev_cell)) + continue; + } + // TODO: what else to check here? + candidate = try_bel; + break; + } + + if (candidate != BelId()) { + cell_neighbour_bels[cell->name].insert(candidate); + bel_candidate_cells[candidate].insert(cell->name); + // Work out if we need to delete any overlap + std::vector overlap; + for (auto other : bel_candidate_cells[candidate]) + if (other != cell->name && other != prev_cell) + overlap.push_back(other); + if (overlap.size() > 0) + NPNR_ASSERT(allow_swap); + for (auto ov : overlap) { + bel_candidate_cells[candidate].erase(ov); + cell_neighbour_bels[ov].erase(candidate); + } + } + } + } + return found_count; + } + + std::vector> find_crit_paths(float crit_thresh, size_t max_count) + { + std::vector> crit_paths; + std::vector> crit_nets; + std::vector netnames; + std::transform(ctx->nets.begin(), ctx->nets.end(), std::back_inserter(netnames), + [](const std::pair> &kv) { return kv.first; }); + ctx->sorted_shuffle(netnames); + for (auto net : netnames) { + if (crit_nets.size() >= max_count) + break; + if (!net_crit.count(net)) + continue; + auto crit_user = std::max_element(net_crit[net].criticality.begin(), net_crit[net].criticality.end()); + if (*crit_user > crit_thresh) + crit_nets.push_back( + std::make_pair(ctx->nets[net].get(), crit_user - net_crit[net].criticality.begin())); + } + + auto port_user_index = [](CellInfo *cell, PortInfo &port) -> size_t { + NPNR_ASSERT(port.net != nullptr); + for (size_t i = 0; i < port.net->users.size(); i++) { + auto &usr = port.net->users.at(i); + if (usr.cell == cell && usr.port == port.name) + return i; + } + NPNR_ASSERT_FALSE("port user not found on net"); + }; + std::unordered_set used_ports; + + for (auto crit_net : crit_nets) { + + if (used_ports.count(&(crit_net.first->users.at(crit_net.second)))) + continue; + + std::deque crit_path; + + // FIXME: This will fail badly on combinational loops + + // Iterate backwards following greatest criticality + NetInfo *back_cursor = crit_net.first; + while (back_cursor != nullptr) { + float max_crit = 0; + std::pair crit_sink{nullptr, 0}; + CellInfo *cell = back_cursor->driver.cell; + if (cell == nullptr) + break; + for (auto port : cell->ports) { + if (port.second.type != PORT_IN) + continue; + NetInfo *pn = port.second.net; + if (pn == nullptr) + continue; + if (!net_crit.count(pn->name) || net_crit.at(pn->name).criticality.empty()) + continue; + int ccount; + DelayInfo combDelay; + TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount); + if (tpclass != TMG_COMB_INPUT) + continue; + bool is_path = ctx->getCellDelay(cell, port.first, back_cursor->driver.port, combDelay); + if (!is_path) + continue; + size_t user_idx = port_user_index(cell, port.second); + float usr_crit = net_crit.at(pn->name).criticality.at(user_idx); + if (used_ports.count(&(pn->users.at(user_idx)))) + continue; + if (usr_crit >= max_crit) { + max_crit = usr_crit; + crit_sink = std::make_pair(pn, user_idx); + } + } + + if (crit_sink.first != nullptr) { + crit_path.push_front(&(crit_sink.first->users.at(crit_sink.second))); + used_ports.insert(&(crit_sink.first->users.at(crit_sink.second))); + } + back_cursor = crit_sink.first; + } + // Iterate forwards following greatest criticiality + PortRef *fwd_cursor = &(crit_net.first->users.at(crit_net.second)); + while (fwd_cursor != nullptr) { + crit_path.push_back(fwd_cursor); + float max_crit = 0; + std::pair crit_sink{nullptr, 0}; + CellInfo *cell = fwd_cursor->cell; + for (auto port : cell->ports) { + if (port.second.type != PORT_OUT) + continue; + NetInfo *pn = port.second.net; + if (pn == nullptr) + continue; + if (!net_crit.count(pn->name) || net_crit.at(pn->name).criticality.empty()) + continue; + int ccount; + DelayInfo combDelay; + TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount); + if (tpclass != TMG_COMB_OUTPUT && tpclass != TMG_REGISTER_OUTPUT) + continue; + bool is_path = ctx->getCellDelay(cell, fwd_cursor->port, port.first, combDelay); + if (!is_path) + continue; + auto &crits = net_crit.at(pn->name).criticality; + for (size_t i = 0; i < crits.size(); i++) { + if (used_ports.count(&(pn->users.at(i)))) + continue; + if (crits.at(i) >= max_crit) { + max_crit = crits.at(i); + crit_sink = std::make_pair(pn, i); + } + } + } + if (crit_sink.first != nullptr) { + fwd_cursor = &(crit_sink.first->users.at(crit_sink.second)); + used_ports.insert(&(crit_sink.first->users.at(crit_sink.second))); + } else { + fwd_cursor = nullptr; + } + } + + std::vector crit_path_vec; + std::copy(crit_path.begin(), crit_path.end(), std::back_inserter(crit_path_vec)); + crit_paths.push_back(crit_path_vec); + } + + return crit_paths; + } + + void optimise_path(std::vector &path) + { + path_cells.clear(); + cell_neighbour_bels.clear(); + bel_candidate_cells.clear(); + if (ctx->debug) + log_info("Optimising the following path: \n"); + + auto front_port = path.front(); + NetInfo *front_net = front_port->cell->ports.at(front_port->port).net; + if (front_net != nullptr && front_net->driver.cell != nullptr) { + auto front_cell = front_net->driver.cell; + if (front_cell->belStrength <= STRENGTH_WEAK && cfg.cellTypes.count(front_cell->type) && + front_cell->constr_parent == nullptr && front_cell->constr_children.empty()) { + path_cells.push_back(front_cell->name); + } + } + + for (auto port : path) { + if (ctx->debug) { + float crit = 0; + NetInfo *pn = port->cell->ports.at(port->port).net; + if (net_crit.count(pn->name) && !net_crit.at(pn->name).criticality.empty()) + for (size_t i = 0; i < pn->users.size(); i++) + if (pn->users.at(i).cell == port->cell && pn->users.at(i).port == port->port) + crit = net_crit.at(pn->name).criticality.at(i); + log_info(" %s.%s at %s crit %0.02f\n", port->cell->name.c_str(ctx), port->port.c_str(ctx), + ctx->getBelName(port->cell->bel).c_str(ctx), crit); + } + if (std::find(path_cells.begin(), path_cells.end(), port->cell->name) != path_cells.end()) + continue; + if (port->cell->belStrength > STRENGTH_WEAK || !cfg.cellTypes.count(port->cell->type) || + port->cell->constr_parent != nullptr || !port->cell->constr_children.empty()) + continue; + if (ctx->debug) + log_info(" can move\n"); + path_cells.push_back(port->cell->name); + } + + if (path_cells.size() < 2) { + if (ctx->debug) { + log_info("Too few moveable cells; skipping path\n"); + log_break(); + } + + return; + } + + // Calculate original delay before touching anything + delay_t original_delay = 0; + + for (size_t i = 0; i < path.size(); i++) { + NetInfo *pn = path.at(i)->cell->ports.at(path.at(i)->port).net; + for (size_t j = 0; j < pn->users.size(); j++) { + auto &usr = pn->users.at(j); + if (usr.cell == path.at(i)->cell && usr.port == path.at(i)->port) { + original_delay += ctx->predictDelay(pn, usr); + break; + } + } + } + + IdString last_cell; + const int d = 2; // FIXME: how to best determine d + for (auto cell : path_cells) { + // FIXME: when should we allow swapping due to a lack of candidates + find_neighbours(ctx->cells[cell].get(), last_cell, d, false); + last_cell = cell; + } + + if (ctx->debug) { + for (auto cell : path_cells) { + log_info("Candidate neighbours for %s (%s):\n", cell.c_str(ctx), + ctx->getBelName(ctx->cells[cell]->bel).c_str(ctx)); + for (auto neigh : cell_neighbour_bels.at(cell)) { + log_info(" %s\n", ctx->getBelName(neigh).c_str(ctx)); + } + } + } + + // Actual BFS path optimisation algorithm + std::unordered_map> cumul_costs; + std::unordered_map, std::pair> backtrace; + std::queue> visit; + std::unordered_set> to_visit; + + for (auto startbel : cell_neighbour_bels[path_cells.front()]) { + // Swap for legality check + CellInfo *cell = ctx->cells.at(path_cells.front()).get(); + BelId origBel = cell_swap_bel(cell, startbel); + std::vector> move{std::make_pair(cell, origBel)}; + if (acceptable_move(move)) { + auto entry = std::make_pair(0, startbel); + visit.push(entry); + cumul_costs[path_cells.front()][startbel] = 0; + } + // Swap back + cell_swap_bel(cell, origBel); + } + + while (!visit.empty()) { + auto entry = visit.front(); + visit.pop(); + auto cellname = path_cells.at(entry.first); + if (entry.first == int(path_cells.size()) - 1) + continue; + std::vector> move; + // Apply the entire backtrace for accurate legality and delay checks + // This is probably pretty expensive (but also probably pales in comparison to the number of swaps + // SA will make...) + std::vector> route_to_entry; + auto cursor = std::make_pair(cellname, entry.second); + route_to_entry.push_back(cursor); + while (backtrace.count(cursor)) { + cursor = backtrace.at(cursor); + route_to_entry.push_back(cursor); + } + for (auto rt_entry : boost::adaptors::reverse(route_to_entry)) { + CellInfo *cell = ctx->cells.at(rt_entry.first).get(); + BelId origBel = cell_swap_bel(cell, rt_entry.second); + move.push_back(std::make_pair(cell, origBel)); + } + + // Have a look at where we can travel from here + for (auto neighbour : cell_neighbour_bels.at(path_cells.at(entry.first + 1))) { + // Edges between overlapping bels are deleted + if (neighbour == entry.second) + continue; + // Experimentally swap the next path cell onto the neighbour bel we are trying + IdString ncname = path_cells.at(entry.first + 1); + CellInfo *next_cell = ctx->cells.at(ncname).get(); + BelId origBel = cell_swap_bel(next_cell, neighbour); + move.push_back(std::make_pair(next_cell, origBel)); + + delay_t total_delay = 0; + + for (size_t i = 0; i < path.size(); i++) { + NetInfo *pn = path.at(i)->cell->ports.at(path.at(i)->port).net; + for (size_t j = 0; j < pn->users.size(); j++) { + auto &usr = pn->users.at(j); + if (usr.cell == path.at(i)->cell && usr.port == path.at(i)->port) { + total_delay += ctx->predictDelay(pn, usr); + break; + } + } + if (path.at(i)->cell == next_cell) + break; + } + + // First, check if the move is actually worthwhile from a delay point of view before the expensive + // legality check + if (!cumul_costs.count(ncname) || !cumul_costs.at(ncname).count(neighbour) || + cumul_costs.at(ncname).at(neighbour) > total_delay) { + // Now check that the swaps we have made to get here are legal and meet max delay requirements + if (acceptable_move(move)) { + cumul_costs[ncname][neighbour] = total_delay; + backtrace[std::make_pair(ncname, neighbour)] = std::make_pair(cellname, entry.second); + if (!to_visit.count(std::make_pair(entry.first + 1, neighbour))) + visit.push(std::make_pair(entry.first + 1, neighbour)); + } + } + // Revert the experimental swap + cell_swap_bel(move.back().first, move.back().second); + move.pop_back(); + } + + // Revert move by swapping cells back to their original order + // Execute swaps in reverse order to how we made them originally + for (auto move_entry : boost::adaptors::reverse(move)) { + cell_swap_bel(move_entry.first, move_entry.second); + } + } + + // Did we find a solution?? + if (cumul_costs.count(path_cells.back())) { + // Find the end position with the lowest total delay + auto &end_options = cumul_costs.at(path_cells.back()); + auto lowest = std::min_element(end_options.begin(), end_options.end(), + [](const std::pair &a, const std::pair &b) { + return a.second < b.second; + }); + NPNR_ASSERT(lowest != end_options.end()); + + std::vector> route_to_solution; + auto cursor = std::make_pair(path_cells.back(), lowest->first); + route_to_solution.push_back(cursor); + while (backtrace.count(cursor)) { + cursor = backtrace.at(cursor); + route_to_solution.push_back(cursor); + } + if (ctx->debug) + log_info("Found a solution with cost %.02f ns (existing path %.02f ns)\n", + ctx->getDelayNS(lowest->second), ctx->getDelayNS(original_delay)); + for (auto rt_entry : boost::adaptors::reverse(route_to_solution)) { + CellInfo *cell = ctx->cells.at(rt_entry.first).get(); + cell_swap_bel(cell, rt_entry.second); + if (ctx->debug) + log_info(" %s at %s\n", rt_entry.first.c_str(ctx), ctx->getBelName(rt_entry.second).c_str(ctx)); + } + + } else { + if (ctx->debug) + log_info("Solution was not found\n"); + } + if (ctx->debug) + log_break(); + } + + // Current candidate Bels for cells (linked in both direction> + std::vector path_cells; + std::unordered_map> cell_neighbour_bels; + std::unordered_map> bel_candidate_cells; + // Map cell ports to net delay limit + std::unordered_map, delay_t> max_net_delay; + // Criticality data from timing analysis + NetCriticalityMap net_crit; + Context *ctx; + TimingOptCfg cfg; +}; + +bool timing_opt(Context *ctx, TimingOptCfg cfg) { return TimingOptimiser(ctx, cfg).optimise(); } + +NEXTPNR_NAMESPACE_END diff --git a/common/timing_opt.h b/common/timing_opt.h new file mode 100644 index 00000000..ceb35c71 --- /dev/null +++ b/common/timing_opt.h @@ -0,0 +1,37 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2018 David Shah + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "nextpnr.h" +#include "settings.h" + +NEXTPNR_NAMESPACE_BEGIN + +struct TimingOptCfg : public Settings +{ + TimingOptCfg(Context *ctx) : Settings(ctx) {} + + // The timing optimiser will *only* optimise cells of these types + // Normally these would only be logic cells (or tiles if applicable), the algorithm makes little sense + // for other cell types + std::unordered_set cellTypes; +}; + +extern bool timing_opt(Context *ctx, TimingOptCfg cfg); + +NEXTPNR_NAMESPACE_END diff --git a/ice40/arch.cc b/ice40/arch.cc index ada78020..8f52987c 100644 --- a/ice40/arch.cc +++ b/ice40/arch.cc @@ -26,6 +26,7 @@ #include "nextpnr.h" #include "placer1.h" #include "router1.h" +#include "timing_opt.h" #include "util.h" NEXTPNR_NAMESPACE_BEGIN @@ -626,7 +627,18 @@ bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay // ----------------------------------------------------------------------- -bool Arch::place() { return placer1(getCtx(), Placer1Cfg(getCtx())); } +bool Arch::place() +{ + if (!placer1(getCtx(), Placer1Cfg(getCtx()))) + return false; + if (bool_or_default(settings, id("opt_timing"), false)) { + TimingOptCfg tocfg(getCtx()); + tocfg.cellTypes.insert(id_ICESTORM_LC); + return timing_opt(getCtx(), tocfg); + } else { + return true; + } +} bool Arch::route() { return router1(getCtx(), Router1Cfg(getCtx())); } diff --git a/ice40/main.cc b/ice40/main.cc index 4b6a9e42..543bd229 100644 --- a/ice40/main.cc +++ b/ice40/main.cc @@ -68,6 +68,7 @@ po::options_description Ice40CommandHandler::getArchOptions() specific.add_options()("promote-logic", "enable promotion of 'logic' globals (in addition to clk/ce/sr by default)"); specific.add_options()("no-promote-globals", "disable all global promotion"); + specific.add_options()("opt-timing", "run post-placement timing optimisation pass (experimental)"); specific.add_options()("tmfuzz", "run path delay estimate fuzzer"); return specific; } @@ -161,6 +162,8 @@ std::unique_ptr Ice40CommandHandler::createContext() ctx->settings[ctx->id("promote_logic")] = "1"; if (vm.count("no-promote-globals")) ctx->settings[ctx->id("no_promote_globals")] = "1"; + if (vm.count("opt-timing")) + ctx->settings[ctx->id("opt_timing")] = "1"; return ctx; } diff --git a/ice40/picorv32_benchmark.py b/ice40/picorv32_benchmark.py index a4ec581e..5e4fc2e1 100755 --- a/ice40/picorv32_benchmark.py +++ b/ice40/picorv32_benchmark.py @@ -22,7 +22,7 @@ for i in range(num_runs): ascfile = "picorv32_work/picorv32_s{}.asc".format(run) if path.exists(ascfile): os.remove(ascfile) - result = subprocess.run(["../nextpnr-ice40", "--hx8k", "--seed", str(run), "--json", "picorv32.json", "--asc", ascfile, "--freq", "70"], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL) + result = subprocess.run(["../nextpnr-ice40", "--hx8k", "--seed", str(run), "--json", "picorv32.json", "--asc", ascfile, "--freq", "40", "--opt-timing"], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL) if result.returncode != 0: print("Run {} failed!".format(run)) else: